toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "dquote_inline.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  57 #define PL_lex_inpat            (PL_parser->lex_inpat)
  58 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  59 #define PL_lex_op               (PL_parser->lex_op)
  60 #define PL_lex_repl             (PL_parser->lex_repl)
  61 #define PL_lex_starts           (PL_parser->lex_starts)
  62 #define PL_lex_stuff            (PL_parser->lex_stuff)
  63 #define PL_multi_start          (PL_parser->multi_start)
  64 #define PL_multi_open           (PL_parser->multi_open)
  65 #define PL_multi_close          (PL_parser->multi_close)
  66 #define PL_preambled            (PL_parser->preambled)
  67 #define PL_linestr              (PL_parser->linestr)
  68 #define PL_expect               (PL_parser->expect)
  69 #define PL_copline              (PL_parser->copline)
  70 #define PL_bufptr               (PL_parser->bufptr)
  71 #define PL_oldbufptr            (PL_parser->oldbufptr)
  72 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  73 #define PL_linestart            (PL_parser->linestart)
  74 #define PL_bufend               (PL_parser->bufend)
  75 #define PL_last_uni             (PL_parser->last_uni)
  76 #define PL_last_lop             (PL_parser->last_lop)
  77 #define PL_last_lop_op          (PL_parser->last_lop_op)
  78 #define PL_lex_state            (PL_parser->lex_state)
  79 #define PL_rsfp                 (PL_parser->rsfp)
  80 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  81 #define PL_in_my                (PL_parser->in_my)
  82 #define PL_in_my_stash          (PL_parser->in_my_stash)
  83 #define PL_tokenbuf             (PL_parser->tokenbuf)
  84 #define PL_multi_end            (PL_parser->multi_end)
  85 #define PL_error_count          (PL_parser->error_count)
  86
  87 #  define PL_nexttoke           (PL_parser->nexttoke)
  88 #  define PL_nexttype           (PL_parser->nexttype)
  89 #  define PL_nextval            (PL_parser->nextval)
  90
  91
  92 #define SvEVALED(sv) \
  93     (SvTYPE(sv) >= SVt_PVNV \
  94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
  95
  96 static const char* const ident_too_long = "Identifier too long";
  97
  98 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
  99
 100 #define XENUMMASK  0x3f
 101 #define XFAKEEOF   0x40
 102 #define XFAKEBRACK 0x80
 103
 104 #ifdef USE_UTF8_SCRIPTS
 105 #   define UTF cBOOL(!IN_BYTES)
 106 #else
 107 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 108 #endif
 109
 110 /* The maximum number of characters preceding the unrecognized one to display */
 111 #define UNRECOGNIZED_PRECEDE_COUNT 10
 112
 113 /* In variables named $^X, these are the legal values for X.
 114  * 1999-02-27 mjd-perl-patch@plover.com */
 115 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 116
 117 #define SPACE_OR_TAB(c) isBLANK_A(c)
 118
 119 #define HEXFP_PEEK(s)     \
 120     (((s[0] == '.') && \
 121       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 122      isALPHA_FOLD_EQ(s[0], 'p'))
 123
 124 /* LEX_* are values for PL_lex_state, the state of the lexer.
 125  * They are arranged oddly so that the guard on the switch statement
 126  * can get by with a single comparison (if the compiler is smart enough).
 127  *
 128  * These values refer to the various states within a sublex parse,
 129  * i.e. within a double quotish string
 130  */
 131
 132 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 133
 134 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 135 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 136 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 137 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 138 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 139
 140                                    /* at end of code, eg "$x" followed by:  */
 141 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 142 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 143
 144 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 145                                         string or after \E, $foo, etc       */
 146 #define LEX_INTERPCONST          2 /* NOT USED */
 147 #define LEX_FORMLINE             1 /* expecting a format line               */
 148
 149
 150 #ifdef DEBUGGING
 151 static const char* const lex_state_names[] = {
 152     "KNOWNEXT",
 153     "FORMLINE",
 154     "INTERPCONST",
 155     "INTERPCONCAT",
 156     "INTERPENDMAYBE",
 157     "INTERPEND",
 158     "INTERPSTART",
 159     "INTERPPUSH",
 160     "INTERPCASEMOD",
 161     "INTERPNORMAL",
 162     "NORMAL"
 163 };
 164 #endif
 165
 166 #include "keywords.h"
 167
 168 /* CLINE is a macro that ensures PL_copline has a sane value */
 169
 170 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 171
 172 /*
 173  * Convenience functions to return different tokens and prime the
 174  * lexer for the next token.  They all take an argument.
 175  *
 176  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 177  * OPERATOR     : generic operator
 178  * AOPERATOR    : assignment operator
 179  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 180  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 181  * PREREF       : *EXPR where EXPR is not a simple identifier
 182  * TERM         : expression term
 183  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 184  * LOOPX        : loop exiting command (goto, last, dump, etc)
 185  * FTST         : file test operator
 186  * FUN0         : zero-argument function
 187  * FUN0OP       : zero-argument function, with its op created in this file
 188  * FUN1         : not used, except for not, which isn't a UNIOP
 189  * BOop         : bitwise or or xor
 190  * BAop         : bitwise and
 191  * BCop         : bitwise complement
 192  * SHop         : shift operator
 193  * PWop         : power operator
 194  * PMop         : pattern-matching operator
 195  * Aop          : addition-level operator
 196  * AopNOASSIGN  : addition-level operator that is never part of .=
 197  * Mop          : multiplication-level operator
 198  * Eop          : equality-testing operator
 199  * Rop          : relational operator <= != gt
 200  *
 201  * Also see LOP and lop() below.
 202  */
 203
 204 #ifdef DEBUGGING /* Serve -DT. */
 205 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 206 #else
 207 #   define REPORT(retval) (retval)
 208 #endif
 209
 210 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 211 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 212 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 213 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 214 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 215 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 216 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 217 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 218 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
 219                          pl_yylval.ival=f, \
 220                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 221                          REPORT((int)LOOPEX))
 222 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 223 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 224 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 225 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 226 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 227 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 228 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 229                        REPORT('~')
 230 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 231 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 232 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 233 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 234 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 235 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 236 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 237 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 238
 239 /* This bit of chicanery makes a unary function followed by
 240  * a parenthesis into a function with one argument, highest precedence.
 241  * The UNIDOR macro is for unary functions that can be followed by the //
 242  * operator (such as C<shift // 0>).
 243  */
 244 #define UNI3(f,x,have_x) { \
 245         pl_yylval.ival = f; \
 246         if (have_x) PL_expect = x; \
 247         PL_bufptr = s; \
 248         PL_last_uni = PL_oldbufptr; \
 249         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
 250         if (*s == '(') \
 251             return REPORT( (int)FUNC1 ); \
 252         s = skipspace(s); \
 253         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 254         }
 255 #define UNI(f)    UNI3(f,XTERM,1)
 256 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 257 #define UNIPROTO(f,optional) { \
 258         if (optional) PL_last_uni = PL_oldbufptr; \
 259         OPERATOR(f); \
 260         }
 261
 262 #define UNIBRACK(f) UNI3(f,0,0)
 263
 264 /* grandfather return to old style */
 265 #define OLDLOP(f) \
 266         do { \
 267             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 268                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 269             pl_yylval.ival = (f); \
 270             PL_expect = XTERM; \
 271             PL_bufptr = s; \
 272             return (int)LSTOP; \
 273         } while(0)
 274
 275 #define COPLINE_INC_WITH_HERELINES                  \
 276     STMT_START {                                     \
 277         CopLINE_inc(PL_curcop);                       \
 278         if (PL_parser->herelines)                      \
 279             CopLINE(PL_curcop) += PL_parser->herelines, \
 280             PL_parser->herelines = 0;                    \
 281     } STMT_END
 282 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 283  * is no sublex_push to follow. */
 284 #define COPLINE_SET_FROM_MULTI_END            \
 285     STMT_START {                               \
 286         CopLINE_set(PL_curcop, PL_multi_end);   \
 287         if (PL_multi_end != PL_multi_start)      \
 288             PL_parser->herelines = 0;             \
 289     } STMT_END
 290
 291
 292 #ifdef DEBUGGING
 293
 294 /* how to interpret the pl_yylval associated with the token */
 295 enum token_type {
 296     TOKENTYPE_NONE,
 297     TOKENTYPE_IVAL,
 298     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 299     TOKENTYPE_PVAL,
 300     TOKENTYPE_OPVAL
 301 };
 302
 303 static struct debug_tokens {
 304     const int token;
 305     enum token_type type;
 306     const char *name;
 307 } const debug_tokens[] =
 308 {
 309     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 310     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 311     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 312     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 313     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 314     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 315     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 316     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 317     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 318     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 319     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 320     { DO,               TOKENTYPE_NONE,         "DO" },
 321     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 322     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 323     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 324     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 325     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 326     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 327     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 328     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 329     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 330     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 331     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 332     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 333     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 334     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 335     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 336     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 337     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 338     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 339     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 340     { IF,               TOKENTYPE_IVAL,         "IF" },
 341     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 342     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 343     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 344     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 345     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 346     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 347     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 348     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 349     { MY,               TOKENTYPE_IVAL,         "MY" },
 350     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 351     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 352     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 353     { OROR,             TOKENTYPE_NONE,         "OROR" },
 354     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 355     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 356     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 357     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 358     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 359     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 360     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 361     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 362     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 363     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 364     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 365     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 366     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 367     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 368     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 369     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 370     { SUB,              TOKENTYPE_NONE,         "SUB" },
 371     { THING,            TOKENTYPE_OPVAL,        "THING" },
 372     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 373     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 374     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 375     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 376     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 377     { USE,              TOKENTYPE_IVAL,         "USE" },
 378     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 379     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 380     { BAREWORD,         TOKENTYPE_OPVAL,        "BAREWORD" },
 381     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 382     { 0,                TOKENTYPE_NONE,         NULL }
 383 };
 384
 385 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 386
 387 STATIC int
 388 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 389 {
 390     PERL_ARGS_ASSERT_TOKEREPORT;
 391
 392     if (DEBUG_T_TEST) {
 393         const char *name = NULL;
 394         enum token_type type = TOKENTYPE_NONE;
 395         const struct debug_tokens *p;
 396         SV* const report = newSVpvs("<== ");
 397
 398         for (p = debug_tokens; p->token; p++) {
 399             if (p->token == (int)rv) {
 400                 name = p->name;
 401                 type = p->type;
 402                 break;
 403             }
 404         }
 405         if (name)
 406             Perl_sv_catpv(aTHX_ report, name);
 407         else if (isGRAPH(rv))
 408         {
 409             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 410             if ((char)rv == 'p')
 411                 sv_catpvs(report, " (pending identifier)");
 412         }
 413         else if (!rv)
 414             sv_catpvs(report, "EOF");
 415         else
 416             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
 417         switch (type) {
 418         case TOKENTYPE_NONE:
 419             break;
 420         case TOKENTYPE_IVAL:
 421             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
 422             break;
 423         case TOKENTYPE_OPNUM:
 424             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 425                                     PL_op_name[lvalp->ival]);
 426             break;
 427         case TOKENTYPE_PVAL:
 428             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 429             break;
 430         case TOKENTYPE_OPVAL:
 431             if (lvalp->opval) {
 432                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 433                                     PL_op_name[lvalp->opval->op_type]);
 434                 if (lvalp->opval->op_type == OP_CONST) {
 435                     Perl_sv_catpvf(aTHX_ report, " %s",
 436                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 437                 }
 438
 439             }
 440             else
 441                 sv_catpvs(report, "(opval=null)");
 442             break;
 443         }
 444         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 445     };
 446     return (int)rv;
 447 }
 448
 449
 450 /* print the buffer with suitable escapes */
 451
 452 STATIC void
 453 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 454 {
 455     SV* const tmp = newSVpvs("");
 456
 457     PERL_ARGS_ASSERT_PRINTBUF;
 458
 459     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
 460     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 461     GCC_DIAG_RESTORE;
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 static int
 468 S_deprecate_commaless_var_list(pTHX) {
 469     PL_expect = XTERM;
 470     deprecate("comma-less variable list");
 471     return REPORT(','); /* grandfather non-comma-format format */
 472 }
 473
 474 /*
 475  * S_ao
 476  *
 477  * This subroutine looks for an '=' next to the operator that has just been
 478  * parsed and turns it into an ASSIGNOP if it finds one.
 479  */
 480
 481 STATIC int
 482 S_ao(pTHX_ int toketype)
 483 {
 484     if (*PL_bufptr == '=') {
 485         PL_bufptr++;
 486         if (toketype == ANDAND)
 487             pl_yylval.ival = OP_ANDASSIGN;
 488         else if (toketype == OROR)
 489             pl_yylval.ival = OP_ORASSIGN;
 490         else if (toketype == DORDOR)
 491             pl_yylval.ival = OP_DORASSIGN;
 492         toketype = ASSIGNOP;
 493     }
 494     return REPORT(toketype);
 495 }
 496
 497 /*
 498  * S_no_op
 499  * When Perl expects an operator and finds something else, no_op
 500  * prints the warning.  It always prints "<something> found where
 501  * operator expected.  It prints "Missing semicolon on previous line?"
 502  * if the surprise occurs at the start of the line.  "do you need to
 503  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 504  * where the compiler doesn't know if foo is a method call or a function.
 505  * It prints "Missing operator before end of line" if there's nothing
 506  * after the missing operator, or "... before <...>" if there is something
 507  * after the missing operator.
 508  *
 509  * PL_bufptr is expected to point to the start of the thing that was found,
 510  * and s after the next token or partial token.
 511  */
 512
 513 STATIC void
 514 S_no_op(pTHX_ const char *const what, char *s)
 515 {
 516     char * const oldbp = PL_bufptr;
 517     const bool is_first = (PL_oldbufptr == PL_linestart);
 518
 519     PERL_ARGS_ASSERT_NO_OP;
 520
 521     if (!s)
 522         s = oldbp;
 523     else
 524         PL_bufptr = s;
 525     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 526     if (ckWARN_d(WARN_SYNTAX)) {
 527         if (is_first)
 528             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 529                     "\t(Missing semicolon on previous line?)\n");
 530         else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
 531                                                            PL_bufend,
 532                                                            UTF))
 533         {
 534             const char *t;
 535             for (t = PL_oldoldbufptr;
 536                  (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
 537                  t += UTF ? UTF8SKIP(t) : 1)
 538             {
 539                 NOOP;
 540             }
 541             if (t < PL_bufptr && isSPACE(*t))
 542                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 543                         "\t(Do you need to predeclare %" UTF8f "?)\n",
 544                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 545         }
 546         else {
 547             assert(s >= oldbp);
 548             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 549                     "\t(Missing operator before %" UTF8f "?)\n",
 550                      UTF8fARG(UTF, s - oldbp, oldbp));
 551         }
 552     }
 553     PL_bufptr = oldbp;
 554 }
 555
 556 /*
 557  * S_missingterm
 558  * Complain about missing quote/regexp/heredoc terminator.
 559  * If it's called with NULL then it cauterizes the line buffer.
 560  * If we're in a delimited string and the delimiter is a control
 561  * character, it's reformatted into a two-char sequence like ^C.
 562  * This is fatal.
 563  */
 564
 565 STATIC void
 566 S_missingterm(pTHX_ char *s)
 567 {
 568     char tmpbuf[UTF8_MAXBYTES + 1];
 569     char q;
 570     bool uni = FALSE;
 571     SV *sv;
 572     if (s) {
 573         char * const nl = strrchr(s,'\n');
 574         if (nl)
 575             *nl = '\0';
 576         uni = UTF;
 577     }
 578     else if (PL_multi_close < 32) {
 579         *tmpbuf = '^';
 580         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 581         tmpbuf[2] = '\0';
 582         s = tmpbuf;
 583     }
 584     else {
 585         if (LIKELY(PL_multi_close < 256)) {
 586             *tmpbuf = (char)PL_multi_close;
 587             tmpbuf[1] = '\0';
 588         }
 589         else {
 590             uni = TRUE;
 591             *uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close) = 0;
 592         }
 593         s = tmpbuf;
 594     }
 595     q = strchr(s,'"') ? '\'' : '"';
 596     sv = sv_2mortal(newSVpv(s,0));
 597     if (uni)
 598         SvUTF8_on(sv);
 599     Perl_croak(aTHX_ "Can't find string terminator %c%" SVf
 600                      "%c anywhere before EOF",q,SVfARG(sv),q);
 601 }
 602
 603 #include "feature.h"
 604
 605 /*
 606  * Check whether the named feature is enabled.
 607  */
 608 bool
 609 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 610 {
 611     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 612
 613     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 614
 615     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 616
 617     if (namelen > MAX_FEATURE_LEN)
 618         return FALSE;
 619     memcpy(&he_name[8], name, namelen);
 620
 621     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 622                                      REFCOUNTED_HE_EXISTS));
 623 }
 624
 625 /*
 626  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 627  * utf16-to-utf8-reversed.
 628  */
 629
 630 #ifdef PERL_CR_FILTER
 631 static void
 632 strip_return(SV *sv)
 633 {
 634     const char *s = SvPVX_const(sv);
 635     const char * const e = s + SvCUR(sv);
 636
 637     PERL_ARGS_ASSERT_STRIP_RETURN;
 638
 639     /* outer loop optimized to do nothing if there are no CR-LFs */
 640     while (s < e) {
 641         if (*s++ == '\r' && *s == '\n') {
 642             /* hit a CR-LF, need to copy the rest */
 643             char *d = s - 1;
 644             *d++ = *s++;
 645             while (s < e) {
 646                 if (*s == '\r' && s[1] == '\n')
 647                     s++;
 648                 *d++ = *s++;
 649             }
 650             SvCUR(sv) -= s - d;
 651             return;
 652         }
 653     }
 654 }
 655
 656 STATIC I32
 657 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 658 {
 659     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 660     if (count > 0 && !maxlen)
 661         strip_return(sv);
 662     return count;
 663 }
 664 #endif
 665
 666 /*
 667 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 668
 669 Creates and initialises a new lexer/parser state object, supplying
 670 a context in which to lex and parse from a new source of Perl code.
 671 A pointer to the new state object is placed in L</PL_parser>.  An entry
 672 is made on the save stack so that upon unwinding the new state object
 673 will be destroyed and the former value of L</PL_parser> will be restored.
 674 Nothing else need be done to clean up the parsing context.
 675
 676 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 677 non-null, provides a string (in SV form) containing code to be parsed.
 678 A copy of the string is made, so subsequent modification of C<line>
 679 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 680 from which code will be read to be parsed.  If both are non-null, the
 681 code in C<line> comes first and must consist of complete lines of input,
 682 and C<rsfp> supplies the remainder of the source.
 683
 684 The C<flags> parameter is reserved for future use.  Currently it is only
 685 used by perl internally, so extensions should always pass zero.
 686
 687 =cut
 688 */
 689
 690 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 691    can share filters with the current parser.
 692    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 693    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 694    destruction. This is used to handle the case of defaulting to reading the
 695    script from the standard input because no filename was given on the command
 696    line (without getting confused by situation where STDIN has been closed, so
 697    the script handle is opened on fd 0)  */
 698
 699 void
 700 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 701 {
 702     const char *s = NULL;
 703     yy_parser *parser, *oparser;
 704     if (flags && flags & ~LEX_START_FLAGS)
 705         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 706
 707     /* create and initialise a parser */
 708
 709     Newxz(parser, 1, yy_parser);
 710     parser->old_parser = oparser = PL_parser;
 711     PL_parser = parser;
 712
 713     parser->stack = NULL;
 714     parser->stack_max1 = NULL;
 715     parser->ps = NULL;
 716
 717     /* on scope exit, free this parser and restore any outer one */
 718     SAVEPARSER(parser);
 719     parser->saved_curcop = PL_curcop;
 720
 721     /* initialise lexer state */
 722
 723     parser->nexttoke = 0;
 724     parser->error_count = oparser ? oparser->error_count : 0;
 725     parser->copline = parser->preambling = NOLINE;
 726     parser->lex_state = LEX_NORMAL;
 727     parser->expect = XSTATE;
 728     parser->rsfp = rsfp;
 729     parser->rsfp_filters =
 730       !(flags & LEX_START_SAME_FILTER) || !oparser
 731         ? NULL
 732         : MUTABLE_AV(SvREFCNT_inc(
 733             oparser->rsfp_filters
 734              ? oparser->rsfp_filters
 735              : (oparser->rsfp_filters = newAV())
 736           ));
 737
 738     Newx(parser->lex_brackstack, 120, char);
 739     Newx(parser->lex_casestack, 12, char);
 740     *parser->lex_casestack = '\0';
 741     Newxz(parser->lex_shared, 1, LEXSHARED);
 742
 743     if (line) {
 744         STRLEN len;
 745         s = SvPV_const(line, len);
 746         parser->linestr = flags & LEX_START_COPIED
 747                             ? SvREFCNT_inc_simple_NN(line)
 748                             : newSVpvn_flags(s, len, SvUTF8(line));
 749         if (!rsfp)
 750             sv_catpvs(parser->linestr, "\n;");
 751     } else {
 752         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 753     }
 754     parser->oldoldbufptr =
 755         parser->oldbufptr =
 756         parser->bufptr =
 757         parser->linestart = SvPVX(parser->linestr);
 758     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 759     parser->last_lop = parser->last_uni = NULL;
 760
 761     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 762                                                         |LEX_DONT_CLOSE_RSFP));
 763     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 764                                                         |LEX_DONT_CLOSE_RSFP));
 765
 766     parser->in_pod = parser->filtered = 0;
 767 }
 768
 769
 770 /* delete a parser object */
 771
 772 void
 773 Perl_parser_free(pTHX_  const yy_parser *parser)
 774 {
 775     PERL_ARGS_ASSERT_PARSER_FREE;
 776
 777     PL_curcop = parser->saved_curcop;
 778     SvREFCNT_dec(parser->linestr);
 779
 780     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 781         PerlIO_clearerr(parser->rsfp);
 782     else if (parser->rsfp && (!parser->old_parser
 783           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 784         PerlIO_close(parser->rsfp);
 785     SvREFCNT_dec(parser->rsfp_filters);
 786     SvREFCNT_dec(parser->lex_stuff);
 787     SvREFCNT_dec(parser->lex_sub_repl);
 788
 789     Safefree(parser->lex_brackstack);
 790     Safefree(parser->lex_casestack);
 791     Safefree(parser->lex_shared);
 792     PL_parser = parser->old_parser;
 793     Safefree(parser);
 794 }
 795
 796 void
 797 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 798 {
 799     I32 nexttoke = parser->nexttoke;
 800     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 801     while (nexttoke--) {
 802         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 803          && parser->nextval[nexttoke].opval
 804          && parser->nextval[nexttoke].opval->op_slabbed
 805          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 806             op_free(parser->nextval[nexttoke].opval);
 807             parser->nextval[nexttoke].opval = NULL;
 808         }
 809     }
 810 }
 811
 812
 813 /*
 814 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 815
 816 Buffer scalar containing the chunk currently under consideration of the
 817 text currently being lexed.  This is always a plain string scalar (for
 818 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 819 normal scalar means; instead refer to the buffer directly by the pointer
 820 variables described below.
 821
 822 The lexer maintains various C<char*> pointers to things in the
 823 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 824 reallocated, all of these pointers must be updated.  Don't attempt to
 825 do this manually, but rather use L</lex_grow_linestr> if you need to
 826 reallocate the buffer.
 827
 828 The content of the text chunk in the buffer is commonly exactly one
 829 complete line of input, up to and including a newline terminator,
 830 but there are situations where it is otherwise.  The octets of the
 831 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 832 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 833 flag on this scalar, which may disagree with it.
 834
 835 For direct examination of the buffer, the variable
 836 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 837 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 838 of these pointers is usually preferable to examination of the scalar
 839 through normal scalar means.
 840
 841 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 842
 843 Direct pointer to the end of the chunk of text currently being lexed, the
 844 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 845 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 846 always located at the end of the buffer, and does not count as part of
 847 the buffer's contents.
 848
 849 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 850
 851 Points to the current position of lexing inside the lexer buffer.
 852 Characters around this point may be freely examined, within
 853 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 854 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 855 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 856
 857 Lexing code (whether in the Perl core or not) moves this pointer past
 858 the characters that it consumes.  It is also expected to perform some
 859 bookkeeping whenever a newline character is consumed.  This movement
 860 can be more conveniently performed by the function L</lex_read_to>,
 861 which handles newlines appropriately.
 862
 863 Interpretation of the buffer's octets can be abstracted out by
 864 using the slightly higher-level functions L</lex_peek_unichar> and
 865 L</lex_read_unichar>.
 866
 867 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 868
 869 Points to the start of the current line inside the lexer buffer.
 870 This is useful for indicating at which column an error occurred, and
 871 not much else.  This must be updated by any lexing code that consumes
 872 a newline; the function L</lex_read_to> handles this detail.
 873
 874 =cut
 875 */
 876
 877 /*
 878 =for apidoc Amx|bool|lex_bufutf8
 879
 880 Indicates whether the octets in the lexer buffer
 881 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 882 of Unicode characters.  If not, they should be interpreted as Latin-1
 883 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 884
 885 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 886 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 887 encoding.
 888
 889 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 890 is significant, but not the whole story regarding the input character
 891 encoding.  Normally, when a file is being read, the scalar contains octets
 892 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 893 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 894 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 895 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 896 is in effect.  This logic may change in the future; use this function
 897 instead of implementing the logic yourself.
 898
 899 =cut
 900 */
 901
 902 bool
 903 Perl_lex_bufutf8(pTHX)
 904 {
 905     return UTF;
 906 }
 907
 908 /*
 909 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 910
 911 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 912 at least C<len> octets (including terminating C<NUL>).  Returns a
 913 pointer to the reallocated buffer.  This is necessary before making
 914 any direct modification of the buffer that would increase its length.
 915 L</lex_stuff_pvn> provides a more convenient way to insert text into
 916 the buffer.
 917
 918 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 919 this function updates all of the lexer's variables that point directly
 920 into the buffer.
 921
 922 =cut
 923 */
 924
 925 char *
 926 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 927 {
 928     SV *linestr;
 929     char *buf;
 930     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 931     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 932     bool current;
 933
 934     linestr = PL_parser->linestr;
 935     buf = SvPVX(linestr);
 936     if (len <= SvLEN(linestr))
 937         return buf;
 938
 939     /* Is the lex_shared linestr SV the same as the current linestr SV?
 940      * Only in this case does re_eval_start need adjusting, since it
 941      * points within lex_shared->ls_linestr's buffer */
 942     current = (   !PL_parser->lex_shared->ls_linestr
 943                || linestr == PL_parser->lex_shared->ls_linestr);
 944
 945     bufend_pos = PL_parser->bufend - buf;
 946     bufptr_pos = PL_parser->bufptr - buf;
 947     oldbufptr_pos = PL_parser->oldbufptr - buf;
 948     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 949     linestart_pos = PL_parser->linestart - buf;
 950     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 951     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 952     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
 953                             PL_parser->lex_shared->re_eval_start - buf : 0;
 954
 955     buf = sv_grow(linestr, len);
 956
 957     PL_parser->bufend = buf + bufend_pos;
 958     PL_parser->bufptr = buf + bufptr_pos;
 959     PL_parser->oldbufptr = buf + oldbufptr_pos;
 960     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 961     PL_parser->linestart = buf + linestart_pos;
 962     if (PL_parser->last_uni)
 963         PL_parser->last_uni = buf + last_uni_pos;
 964     if (PL_parser->last_lop)
 965         PL_parser->last_lop = buf + last_lop_pos;
 966     if (current && PL_parser->lex_shared->re_eval_start)
 967         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 968     return buf;
 969 }
 970
 971 /*
 972 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 973
 974 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 975 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 976 reallocating the buffer if necessary.  This means that lexing code that
 977 runs later will see the characters as if they had appeared in the input.
 978 It is not recommended to do this as part of normal parsing, and most
 979 uses of this facility run the risk of the inserted characters being
 980 interpreted in an unintended manner.
 981
 982 The string to be inserted is represented by C<len> octets starting
 983 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 984 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
 985 The characters are recoded for the lexer buffer, according to how the
 986 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 987 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 988 function is more convenient.
 989
 990 =cut
 991 */
 992
 993 void
 994 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 995 {
 996     dVAR;
 997     char *bufptr;
 998     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 999     if (flags & ~(LEX_STUFF_UTF8))
1000         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1001     if (UTF) {
1002         if (flags & LEX_STUFF_UTF8) {
1003             goto plain_copy;
1004         } else {
1005             STRLEN highhalf = 0;    /* Count of variants */
1006             const char *p, *e = pv+len;
1007             for (p = pv; p != e; p++) {
1008                 if (! UTF8_IS_INVARIANT(*p)) {
1009                     highhalf++;
1010                 }
1011             }
1012             if (!highhalf)
1013                 goto plain_copy;
1014             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1015             bufptr = PL_parser->bufptr;
1016             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1017             SvCUR_set(PL_parser->linestr,
1018                 SvCUR(PL_parser->linestr) + len+highhalf);
1019             PL_parser->bufend += len+highhalf;
1020             for (p = pv; p != e; p++) {
1021                 U8 c = (U8)*p;
1022                 if (! UTF8_IS_INVARIANT(c)) {
1023                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
1024                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
1025                 } else {
1026                     *bufptr++ = (char)c;
1027                 }
1028             }
1029         }
1030     } else {
1031         if (flags & LEX_STUFF_UTF8) {
1032             STRLEN highhalf = 0;
1033             const char *p, *e = pv+len;
1034             for (p = pv; p != e; p++) {
1035                 U8 c = (U8)*p;
1036                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1037                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1038                                 "non-Latin-1 character into Latin-1 input");
1039                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1040                     p++;
1041                     highhalf++;
1042                 } else if (! UTF8_IS_INVARIANT(c)) {
1043                     _force_out_malformed_utf8_message((U8 *) p, (U8 *) e,
1044                                                       0,
1045                                                       1 /* 1 means die */ );
1046                     NOT_REACHED; /* NOTREACHED */
1047                 }
1048             }
1049             if (!highhalf)
1050                 goto plain_copy;
1051             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1052             bufptr = PL_parser->bufptr;
1053             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1054             SvCUR_set(PL_parser->linestr,
1055                 SvCUR(PL_parser->linestr) + len-highhalf);
1056             PL_parser->bufend += len-highhalf;
1057             p = pv;
1058             while (p < e) {
1059                 if (UTF8_IS_INVARIANT(*p)) {
1060                     *bufptr++ = *p;
1061                     p++;
1062                 }
1063                 else {
1064                     assert(p < e -1 );
1065                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1066                     p += 2;
1067                 }
1068             }
1069         } else {
1070           plain_copy:
1071             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1072             bufptr = PL_parser->bufptr;
1073             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1074             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1075             PL_parser->bufend += len;
1076             Copy(pv, bufptr, len, char);
1077         }
1078     }
1079 }
1080
1081 /*
1082 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1083
1084 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1085 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1086 reallocating the buffer if necessary.  This means that lexing code that
1087 runs later will see the characters as if they had appeared in the input.
1088 It is not recommended to do this as part of normal parsing, and most
1089 uses of this facility run the risk of the inserted characters being
1090 interpreted in an unintended manner.
1091
1092 The string to be inserted is represented by octets starting at C<pv>
1093 and continuing to the first nul.  These octets are interpreted as either
1094 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1095 in C<flags>.  The characters are recoded for the lexer buffer, according
1096 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1097 If it is not convenient to nul-terminate a string to be inserted, the
1098 L</lex_stuff_pvn> function is more appropriate.
1099
1100 =cut
1101 */
1102
1103 void
1104 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1105 {
1106     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1107     lex_stuff_pvn(pv, strlen(pv), flags);
1108 }
1109
1110 /*
1111 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1112
1113 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1114 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1115 reallocating the buffer if necessary.  This means that lexing code that
1116 runs later will see the characters as if they had appeared in the input.
1117 It is not recommended to do this as part of normal parsing, and most
1118 uses of this facility run the risk of the inserted characters being
1119 interpreted in an unintended manner.
1120
1121 The string to be inserted is the string value of C<sv>.  The characters
1122 are recoded for the lexer buffer, according to how the buffer is currently
1123 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1124 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1125 need to construct a scalar.
1126
1127 =cut
1128 */
1129
1130 void
1131 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1132 {
1133     char *pv;
1134     STRLEN len;
1135     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1136     if (flags)
1137         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1138     pv = SvPV(sv, len);
1139     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1140 }
1141
1142 /*
1143 =for apidoc Amx|void|lex_unstuff|char *ptr
1144
1145 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1146 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1147 This hides the discarded text from any lexing code that runs later,
1148 as if the text had never appeared.
1149
1150 This is not the normal way to consume lexed text.  For that, use
1151 L</lex_read_to>.
1152
1153 =cut
1154 */
1155
1156 void
1157 Perl_lex_unstuff(pTHX_ char *ptr)
1158 {
1159     char *buf, *bufend;
1160     STRLEN unstuff_len;
1161     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1162     buf = PL_parser->bufptr;
1163     if (ptr < buf)
1164         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1165     if (ptr == buf)
1166         return;
1167     bufend = PL_parser->bufend;
1168     if (ptr > bufend)
1169         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1170     unstuff_len = ptr - buf;
1171     Move(ptr, buf, bufend+1-ptr, char);
1172     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1173     PL_parser->bufend = bufend - unstuff_len;
1174 }
1175
1176 /*
1177 =for apidoc Amx|void|lex_read_to|char *ptr
1178
1179 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1180 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1181 performing the correct bookkeeping whenever a newline character is passed.
1182 This is the normal way to consume lexed text.
1183
1184 Interpretation of the buffer's octets can be abstracted out by
1185 using the slightly higher-level functions L</lex_peek_unichar> and
1186 L</lex_read_unichar>.
1187
1188 =cut
1189 */
1190
1191 void
1192 Perl_lex_read_to(pTHX_ char *ptr)
1193 {
1194     char *s;
1195     PERL_ARGS_ASSERT_LEX_READ_TO;
1196     s = PL_parser->bufptr;
1197     if (ptr < s || ptr > PL_parser->bufend)
1198         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1199     for (; s != ptr; s++)
1200         if (*s == '\n') {
1201             COPLINE_INC_WITH_HERELINES;
1202             PL_parser->linestart = s+1;
1203         }
1204     PL_parser->bufptr = ptr;
1205 }
1206
1207 /*
1208 =for apidoc Amx|void|lex_discard_to|char *ptr
1209
1210 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1211 up to C<ptr>.  The remaining content of the buffer will be moved, and
1212 all pointers into the buffer updated appropriately.  C<ptr> must not
1213 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1214 it is not permitted to discard text that has yet to be lexed.
1215
1216 Normally it is not necessarily to do this directly, because it suffices to
1217 use the implicit discarding behaviour of L</lex_next_chunk> and things
1218 based on it.  However, if a token stretches across multiple lines,
1219 and the lexing code has kept multiple lines of text in the buffer for
1220 that purpose, then after completion of the token it would be wise to
1221 explicitly discard the now-unneeded earlier lines, to avoid future
1222 multi-line tokens growing the buffer without bound.
1223
1224 =cut
1225 */
1226
1227 void
1228 Perl_lex_discard_to(pTHX_ char *ptr)
1229 {
1230     char *buf;
1231     STRLEN discard_len;
1232     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1233     buf = SvPVX(PL_parser->linestr);
1234     if (ptr < buf)
1235         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1236     if (ptr == buf)
1237         return;
1238     if (ptr > PL_parser->bufptr)
1239         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1240     discard_len = ptr - buf;
1241     if (PL_parser->oldbufptr < ptr)
1242         PL_parser->oldbufptr = ptr;
1243     if (PL_parser->oldoldbufptr < ptr)
1244         PL_parser->oldoldbufptr = ptr;
1245     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1246         PL_parser->last_uni = NULL;
1247     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1248         PL_parser->last_lop = NULL;
1249     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1250     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1251     PL_parser->bufend -= discard_len;
1252     PL_parser->bufptr -= discard_len;
1253     PL_parser->oldbufptr -= discard_len;
1254     PL_parser->oldoldbufptr -= discard_len;
1255     if (PL_parser->last_uni)
1256         PL_parser->last_uni -= discard_len;
1257     if (PL_parser->last_lop)
1258         PL_parser->last_lop -= discard_len;
1259 }
1260
1261 /*
1262 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1263
1264 Reads in the next chunk of text to be lexed, appending it to
1265 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1266 looked to the end of the current chunk and wants to know more.  It is
1267 usual, but not necessary, for lexing to have consumed the entirety of
1268 the current chunk at this time.
1269
1270 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1271 chunk (i.e., the current chunk has been entirely consumed), normally the
1272 current chunk will be discarded at the same time that the new chunk is
1273 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1274 will not be discarded.  If the current chunk has not been entirely
1275 consumed, then it will not be discarded regardless of the flag.
1276
1277 Returns true if some new text was added to the buffer, or false if the
1278 buffer has reached the end of the input text.
1279
1280 =cut
1281 */
1282
1283 #define LEX_FAKE_EOF 0x80000000
1284 #define LEX_NO_TERM  0x40000000 /* here-doc */
1285
1286 bool
1287 Perl_lex_next_chunk(pTHX_ U32 flags)
1288 {
1289     SV *linestr;
1290     char *buf;
1291     STRLEN old_bufend_pos, new_bufend_pos;
1292     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1293     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1294     bool got_some_for_debugger = 0;
1295     bool got_some;
1296     const U8* first_bad_char_loc;
1297
1298     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1299         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1300     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1301         return FALSE;
1302     linestr = PL_parser->linestr;
1303     buf = SvPVX(linestr);
1304     if (!(flags & LEX_KEEP_PREVIOUS)
1305           && PL_parser->bufptr == PL_parser->bufend)
1306     {
1307         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1308         linestart_pos = 0;
1309         if (PL_parser->last_uni != PL_parser->bufend)
1310             PL_parser->last_uni = NULL;
1311         if (PL_parser->last_lop != PL_parser->bufend)
1312             PL_parser->last_lop = NULL;
1313         last_uni_pos = last_lop_pos = 0;
1314         *buf = 0;
1315         SvCUR(linestr) = 0;
1316     } else {
1317         old_bufend_pos = PL_parser->bufend - buf;
1318         bufptr_pos = PL_parser->bufptr - buf;
1319         oldbufptr_pos = PL_parser->oldbufptr - buf;
1320         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1321         linestart_pos = PL_parser->linestart - buf;
1322         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1323         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1324     }
1325     if (flags & LEX_FAKE_EOF) {
1326         goto eof;
1327     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1328         got_some = 0;
1329     } else if (filter_gets(linestr, old_bufend_pos)) {
1330         got_some = 1;
1331         got_some_for_debugger = 1;
1332     } else if (flags & LEX_NO_TERM) {
1333         got_some = 0;
1334     } else {
1335         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1336             SvPVCLEAR(linestr);
1337         eof:
1338         /* End of real input.  Close filehandle (unless it was STDIN),
1339          * then add implicit termination.
1340          */
1341         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1342             PerlIO_clearerr(PL_parser->rsfp);
1343         else if (PL_parser->rsfp)
1344             (void)PerlIO_close(PL_parser->rsfp);
1345         PL_parser->rsfp = NULL;
1346         PL_parser->in_pod = PL_parser->filtered = 0;
1347         if (!PL_in_eval && PL_minus_p) {
1348             sv_catpvs(linestr,
1349                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1350             PL_minus_n = PL_minus_p = 0;
1351         } else if (!PL_in_eval && PL_minus_n) {
1352             sv_catpvs(linestr, /*{*/";}");
1353             PL_minus_n = 0;
1354         } else
1355             sv_catpvs(linestr, ";");
1356         got_some = 1;
1357     }
1358     buf = SvPVX(linestr);
1359     new_bufend_pos = SvCUR(linestr);
1360     PL_parser->bufend = buf + new_bufend_pos;
1361     PL_parser->bufptr = buf + bufptr_pos;
1362
1363     if (UTF && ! is_utf8_string_loc((U8 *) PL_parser->bufptr,
1364                                     PL_parser->bufend - PL_parser->bufptr,
1365                                     &first_bad_char_loc))
1366     {
1367         _force_out_malformed_utf8_message(first_bad_char_loc,
1368                                           (U8 *) PL_parser->bufend,
1369                                           0,
1370                                           1 /* 1 means die */ );
1371         NOT_REACHED; /* NOTREACHED */
1372     }
1373
1374     PL_parser->oldbufptr = buf + oldbufptr_pos;
1375     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1376     PL_parser->linestart = buf + linestart_pos;
1377     if (PL_parser->last_uni)
1378         PL_parser->last_uni = buf + last_uni_pos;
1379     if (PL_parser->last_lop)
1380         PL_parser->last_lop = buf + last_lop_pos;
1381     if (PL_parser->preambling != NOLINE) {
1382         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1383         PL_parser->preambling = NOLINE;
1384     }
1385     if (   got_some_for_debugger
1386         && PERLDB_LINE_OR_SAVESRC
1387         && PL_curstash != PL_debstash)
1388     {
1389         /* debugger active and we're not compiling the debugger code,
1390          * so store the line into the debugger's array of lines
1391          */
1392         update_debugger_info(NULL, buf+old_bufend_pos,
1393             new_bufend_pos-old_bufend_pos);
1394     }
1395     return got_some;
1396 }
1397
1398 /*
1399 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1400
1401 Looks ahead one (Unicode) character in the text currently being lexed.
1402 Returns the codepoint (unsigned integer value) of the next character,
1403 or -1 if lexing has reached the end of the input text.  To consume the
1404 peeked character, use L</lex_read_unichar>.
1405
1406 If the next character is in (or extends into) the next chunk of input
1407 text, the next chunk will be read in.  Normally the current chunk will be
1408 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1409 bit set, then the current chunk will not be discarded.
1410
1411 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1412 is encountered, an exception is generated.
1413
1414 =cut
1415 */
1416
1417 I32
1418 Perl_lex_peek_unichar(pTHX_ U32 flags)
1419 {
1420     dVAR;
1421     char *s, *bufend;
1422     if (flags & ~(LEX_KEEP_PREVIOUS))
1423         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1424     s = PL_parser->bufptr;
1425     bufend = PL_parser->bufend;
1426     if (UTF) {
1427         U8 head;
1428         I32 unichar;
1429         STRLEN len, retlen;
1430         if (s == bufend) {
1431             if (!lex_next_chunk(flags))
1432                 return -1;
1433             s = PL_parser->bufptr;
1434             bufend = PL_parser->bufend;
1435         }
1436         head = (U8)*s;
1437         if (UTF8_IS_INVARIANT(head))
1438             return head;
1439         if (UTF8_IS_START(head)) {
1440             len = UTF8SKIP(&head);
1441             while ((STRLEN)(bufend-s) < len) {
1442                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1443                     break;
1444                 s = PL_parser->bufptr;
1445                 bufend = PL_parser->bufend;
1446             }
1447         }
1448         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1449         if (retlen == (STRLEN)-1) {
1450             _force_out_malformed_utf8_message((U8 *) s,
1451                                               (U8 *) bufend,
1452                                               0,
1453                                               1 /* 1 means die */ );
1454             NOT_REACHED; /* NOTREACHED */
1455         }
1456         return unichar;
1457     } else {
1458         if (s == bufend) {
1459             if (!lex_next_chunk(flags))
1460                 return -1;
1461             s = PL_parser->bufptr;
1462         }
1463         return (U8)*s;
1464     }
1465 }
1466
1467 /*
1468 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1469
1470 Reads the next (Unicode) character in the text currently being lexed.
1471 Returns the codepoint (unsigned integer value) of the character read,
1472 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1473 if lexing has reached the end of the input text.  To non-destructively
1474 examine the next character, use L</lex_peek_unichar> instead.
1475
1476 If the next character is in (or extends into) the next chunk of input
1477 text, the next chunk will be read in.  Normally the current chunk will be
1478 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1479 bit set, then the current chunk will not be discarded.
1480
1481 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1482 is encountered, an exception is generated.
1483
1484 =cut
1485 */
1486
1487 I32
1488 Perl_lex_read_unichar(pTHX_ U32 flags)
1489 {
1490     I32 c;
1491     if (flags & ~(LEX_KEEP_PREVIOUS))
1492         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1493     c = lex_peek_unichar(flags);
1494     if (c != -1) {
1495         if (c == '\n')
1496             COPLINE_INC_WITH_HERELINES;
1497         if (UTF)
1498             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1499         else
1500             ++(PL_parser->bufptr);
1501     }
1502     return c;
1503 }
1504
1505 /*
1506 =for apidoc Amx|void|lex_read_space|U32 flags
1507
1508 Reads optional spaces, in Perl style, in the text currently being
1509 lexed.  The spaces may include ordinary whitespace characters and
1510 Perl-style comments.  C<#line> directives are processed if encountered.
1511 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1512 at a non-space character (or the end of the input text).
1513
1514 If spaces extend into the next chunk of input text, the next chunk will
1515 be read in.  Normally the current chunk will be discarded at the same
1516 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1517 chunk will not be discarded.
1518
1519 =cut
1520 */
1521
1522 #define LEX_NO_INCLINE    0x40000000
1523 #define LEX_NO_NEXT_CHUNK 0x80000000
1524
1525 void
1526 Perl_lex_read_space(pTHX_ U32 flags)
1527 {
1528     char *s, *bufend;
1529     const bool can_incline = !(flags & LEX_NO_INCLINE);
1530     bool need_incline = 0;
1531     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1532         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1533     s = PL_parser->bufptr;
1534     bufend = PL_parser->bufend;
1535     while (1) {
1536         char c = *s;
1537         if (c == '#') {
1538             do {
1539                 c = *++s;
1540             } while (!(c == '\n' || (c == 0 && s == bufend)));
1541         } else if (c == '\n') {
1542             s++;
1543             if (can_incline) {
1544                 PL_parser->linestart = s;
1545                 if (s == bufend)
1546                     need_incline = 1;
1547                 else
1548                     incline(s);
1549             }
1550         } else if (isSPACE(c)) {
1551             s++;
1552         } else if (c == 0 && s == bufend) {
1553             bool got_more;
1554             line_t l;
1555             if (flags & LEX_NO_NEXT_CHUNK)
1556                 break;
1557             PL_parser->bufptr = s;
1558             l = CopLINE(PL_curcop);
1559             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1560             got_more = lex_next_chunk(flags);
1561             CopLINE_set(PL_curcop, l);
1562             s = PL_parser->bufptr;
1563             bufend = PL_parser->bufend;
1564             if (!got_more)
1565                 break;
1566             if (can_incline && need_incline && PL_parser->rsfp) {
1567                 incline(s);
1568                 need_incline = 0;
1569             }
1570         } else if (!c) {
1571             s++;
1572         } else {
1573             break;
1574         }
1575     }
1576     PL_parser->bufptr = s;
1577 }
1578
1579 /*
1580
1581 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1582
1583 This function performs syntax checking on a prototype, C<proto>.
1584 If C<warn> is true, any illegal characters or mismatched brackets
1585 will trigger illegalproto warnings, declaring that they were
1586 detected in the prototype for C<name>.
1587
1588 The return value is C<true> if this is a valid prototype, and
1589 C<false> if it is not, regardless of whether C<warn> was C<true> or
1590 C<false>.
1591
1592 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1593
1594 =cut
1595
1596  */
1597
1598 bool
1599 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1600 {
1601     STRLEN len, origlen;
1602     char *p;
1603     bool bad_proto = FALSE;
1604     bool in_brackets = FALSE;
1605     bool after_slash = FALSE;
1606     char greedy_proto = ' ';
1607     bool proto_after_greedy_proto = FALSE;
1608     bool must_be_last = FALSE;
1609     bool underscore = FALSE;
1610     bool bad_proto_after_underscore = FALSE;
1611
1612     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1613
1614     if (!proto)
1615         return TRUE;
1616
1617     p = SvPV(proto, len);
1618     origlen = len;
1619     for (; len--; p++) {
1620         if (!isSPACE(*p)) {
1621             if (must_be_last)
1622                 proto_after_greedy_proto = TRUE;
1623             if (underscore) {
1624                 if (!strchr(";@%", *p))
1625                     bad_proto_after_underscore = TRUE;
1626                 underscore = FALSE;
1627             }
1628             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1629                 bad_proto = TRUE;
1630             }
1631             else {
1632                 if (*p == '[')
1633                     in_brackets = TRUE;
1634                 else if (*p == ']')
1635                     in_brackets = FALSE;
1636                 else if ((*p == '@' || *p == '%')
1637                          && !after_slash
1638                          && !in_brackets )
1639                 {
1640                     must_be_last = TRUE;
1641                     greedy_proto = *p;
1642                 }
1643                 else if (*p == '_')
1644                     underscore = TRUE;
1645             }
1646             if (*p == '\\')
1647                 after_slash = TRUE;
1648             else
1649                 after_slash = FALSE;
1650         }
1651     }
1652
1653     if (warn) {
1654         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1655         p -= origlen;
1656         p = SvUTF8(proto)
1657             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1658                              origlen, UNI_DISPLAY_ISPRINT)
1659             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1660
1661         if (proto_after_greedy_proto)
1662             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1663                         "Prototype after '%c' for %" SVf " : %s",
1664                         greedy_proto, SVfARG(name), p);
1665         if (in_brackets)
1666             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1667                         "Missing ']' in prototype for %" SVf " : %s",
1668                         SVfARG(name), p);
1669         if (bad_proto)
1670             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1671                         "Illegal character in prototype for %" SVf " : %s",
1672                         SVfARG(name), p);
1673         if (bad_proto_after_underscore)
1674             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1675                         "Illegal character after '_' in prototype for %" SVf " : %s",
1676                         SVfARG(name), p);
1677     }
1678
1679     return (! (proto_after_greedy_proto || bad_proto) );
1680 }
1681
1682 /*
1683  * S_incline
1684  * This subroutine has nothing to do with tilting, whether at windmills
1685  * or pinball tables.  Its name is short for "increment line".  It
1686  * increments the current line number in CopLINE(PL_curcop) and checks
1687  * to see whether the line starts with a comment of the form
1688  *    # line 500 "foo.pm"
1689  * If so, it sets the current line number and file to the values in the comment.
1690  */
1691
1692 STATIC void
1693 S_incline(pTHX_ const char *s)
1694 {
1695     const char *t;
1696     const char *n;
1697     const char *e;
1698     line_t line_num;
1699     UV uv;
1700
1701     PERL_ARGS_ASSERT_INCLINE;
1702
1703     COPLINE_INC_WITH_HERELINES;
1704     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1705      && s+1 == PL_bufend && *s == ';') {
1706         /* fake newline in string eval */
1707         CopLINE_dec(PL_curcop);
1708         return;
1709     }
1710     if (*s++ != '#')
1711         return;
1712     while (SPACE_OR_TAB(*s))
1713         s++;
1714     if (strEQs(s, "line"))
1715         s += 4;
1716     else
1717         return;
1718     if (SPACE_OR_TAB(*s))
1719         s++;
1720     else
1721         return;
1722     while (SPACE_OR_TAB(*s))
1723         s++;
1724     if (!isDIGIT(*s))
1725         return;
1726
1727     n = s;
1728     while (isDIGIT(*s))
1729         s++;
1730     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1731         return;
1732     while (SPACE_OR_TAB(*s))
1733         s++;
1734     if (*s == '"' && (t = strchr(s+1, '"'))) {
1735         s++;
1736         e = t + 1;
1737     }
1738     else {
1739         t = s;
1740         while (*t && !isSPACE(*t))
1741             t++;
1742         e = t;
1743     }
1744     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1745         e++;
1746     if (*e != '\n' && *e != '\0')
1747         return;         /* false alarm */
1748
1749     if (!grok_atoUV(n, &uv, &e))
1750         return;
1751     line_num = ((line_t)uv) - 1;
1752
1753     if (t - s > 0) {
1754         const STRLEN len = t - s;
1755
1756         if (!PL_rsfp && !PL_parser->filtered) {
1757             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1758              * to *{"::_<newfilename"} */
1759             /* However, the long form of evals is only turned on by the
1760                debugger - usually they're "(eval %lu)" */
1761             GV * const cfgv = CopFILEGV(PL_curcop);
1762             if (cfgv) {
1763                 char smallbuf[128];
1764                 STRLEN tmplen2 = len;
1765                 char *tmpbuf2;
1766                 GV *gv2;
1767
1768                 if (tmplen2 + 2 <= sizeof smallbuf)
1769                     tmpbuf2 = smallbuf;
1770                 else
1771                     Newx(tmpbuf2, tmplen2 + 2, char);
1772
1773                 tmpbuf2[0] = '_';
1774                 tmpbuf2[1] = '<';
1775
1776                 memcpy(tmpbuf2 + 2, s, tmplen2);
1777                 tmplen2 += 2;
1778
1779                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1780                 if (!isGV(gv2)) {
1781                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1782                     /* adjust ${"::_<newfilename"} to store the new file name */
1783                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1784                     /* The line number may differ. If that is the case,
1785                        alias the saved lines that are in the array.
1786                        Otherwise alias the whole array. */
1787                     if (CopLINE(PL_curcop) == line_num) {
1788                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1789                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1790                     }
1791                     else if (GvAV(cfgv)) {
1792                         AV * const av = GvAV(cfgv);
1793                         const I32 start = CopLINE(PL_curcop)+1;
1794                         I32 items = AvFILLp(av) - start;
1795                         if (items > 0) {
1796                             AV * const av2 = GvAVn(gv2);
1797                             SV **svp = AvARRAY(av) + start;
1798                             I32 l = (I32)line_num+1;
1799                             while (items--)
1800                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1801                         }
1802                     }
1803                 }
1804
1805                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1806             }
1807         }
1808         CopFILE_free(PL_curcop);
1809         CopFILE_setn(PL_curcop, s, len);
1810     }
1811     CopLINE_set(PL_curcop, line_num);
1812 }
1813
1814 STATIC void
1815 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1816 {
1817     AV *av = CopFILEAVx(PL_curcop);
1818     if (av) {
1819         SV * sv;
1820         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1821         else {
1822             sv = *av_fetch(av, 0, 1);
1823             SvUPGRADE(sv, SVt_PVMG);
1824         }
1825         if (!SvPOK(sv)) SvPVCLEAR(sv);
1826         if (orig_sv)
1827             sv_catsv(sv, orig_sv);
1828         else
1829             sv_catpvn(sv, buf, len);
1830         if (!SvIOK(sv)) {
1831             (void)SvIOK_on(sv);
1832             SvIV_set(sv, 0);
1833         }
1834         if (PL_parser->preambling == NOLINE)
1835             av_store(av, CopLINE(PL_curcop), sv);
1836     }
1837 }
1838
1839 /*
1840  * skipspace
1841  * Called to gobble the appropriate amount and type of whitespace.
1842  * Skips comments as well.
1843  * Returns the next character after the whitespace that is skipped.
1844  *
1845  * peekspace
1846  * Same thing, but look ahead without incrementing line numbers or
1847  * adjusting PL_linestart.
1848  */
1849
1850 #define skipspace(s) skipspace_flags(s, 0)
1851 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1852
1853 STATIC char *
1854 S_skipspace_flags(pTHX_ char *s, U32 flags)
1855 {
1856     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1857     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1858         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1859             s++;
1860     } else {
1861         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1862         PL_bufptr = s;
1863         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1864                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1865                     LEX_NO_NEXT_CHUNK : 0));
1866         s = PL_bufptr;
1867         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1868         if (PL_linestart > PL_bufptr)
1869             PL_bufptr = PL_linestart;
1870         return s;
1871     }
1872     return s;
1873 }
1874
1875 /*
1876  * S_check_uni
1877  * Check the unary operators to ensure there's no ambiguity in how they're
1878  * used.  An ambiguous piece of code would be:
1879  *     rand + 5
1880  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1881  * the +5 is its argument.
1882  */
1883
1884 STATIC void
1885 S_check_uni(pTHX)
1886 {
1887     const char *s;
1888     const char *t;
1889
1890     if (PL_oldoldbufptr != PL_last_uni)
1891         return;
1892     while (isSPACE(*PL_last_uni))
1893         PL_last_uni++;
1894     s = PL_last_uni;
1895     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1896         s += UTF ? UTF8SKIP(s) : 1;
1897     if ((t = strchr(s, '(')) && t < PL_bufptr)
1898         return;
1899
1900     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1901                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1902                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1903 }
1904
1905 /*
1906  * LOP : macro to build a list operator.  Its behaviour has been replaced
1907  * with a subroutine, S_lop() for which LOP is just another name.
1908  */
1909
1910 #define LOP(f,x) return lop(f,x,s)
1911
1912 /*
1913  * S_lop
1914  * Build a list operator (or something that might be one).  The rules:
1915  *  - if we have a next token, then it's a list operator (no parens) for
1916  *    which the next token has already been parsed; e.g.,
1917  *       sort foo @args
1918  *       sort foo (@args)
1919  *  - if the next thing is an opening paren, then it's a function
1920  *  - else it's a list operator
1921  */
1922
1923 STATIC I32
1924 S_lop(pTHX_ I32 f, U8 x, char *s)
1925 {
1926     PERL_ARGS_ASSERT_LOP;
1927
1928     pl_yylval.ival = f;
1929     CLINE;
1930     PL_bufptr = s;
1931     PL_last_lop = PL_oldbufptr;
1932     PL_last_lop_op = (OPCODE)f;
1933     if (PL_nexttoke)
1934         goto lstop;
1935     PL_expect = x;
1936     if (*s == '(')
1937         return REPORT(FUNC);
1938     s = skipspace(s);
1939     if (*s == '(')
1940         return REPORT(FUNC);
1941     else {
1942         lstop:
1943         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1944             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1945         return REPORT(LSTOP);
1946     }
1947 }
1948
1949 /*
1950  * S_force_next
1951  * When the lexer realizes it knows the next token (for instance,
1952  * it is reordering tokens for the parser) then it can call S_force_next
1953  * to know what token to return the next time the lexer is called.  Caller
1954  * will need to set PL_nextval[] and possibly PL_expect to ensure
1955  * the lexer handles the token correctly.
1956  */
1957
1958 STATIC void
1959 S_force_next(pTHX_ I32 type)
1960 {
1961 #ifdef DEBUGGING
1962     if (DEBUG_T_TEST) {
1963         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1964         tokereport(type, &NEXTVAL_NEXTTOKE);
1965     }
1966 #endif
1967     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
1968     PL_nexttype[PL_nexttoke] = type;
1969     PL_nexttoke++;
1970 }
1971
1972 /*
1973  * S_postderef
1974  *
1975  * This subroutine handles postfix deref syntax after the arrow has already
1976  * been emitted.  @* $* etc. are emitted as two separate token right here.
1977  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1978  * only the first, leaving yylex to find the next.
1979  */
1980
1981 static int
1982 S_postderef(pTHX_ int const funny, char const next)
1983 {
1984     assert(funny == DOLSHARP || strchr("$@%&*", funny));
1985     if (next == '*') {
1986         PL_expect = XOPERATOR;
1987         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1988             assert('@' == funny || '$' == funny || DOLSHARP == funny);
1989             PL_lex_state = LEX_INTERPEND;
1990             if ('@' == funny)
1991                 force_next(POSTJOIN);
1992         }
1993         force_next(next);
1994         PL_bufptr+=2;
1995     }
1996     else {
1997         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1998          && !PL_lex_brackets)
1999             PL_lex_dojoin = 2;
2000         PL_expect = XOPERATOR;
2001         PL_bufptr++;
2002     }
2003     return funny;
2004 }
2005
2006 void
2007 Perl_yyunlex(pTHX)
2008 {
2009     int yyc = PL_parser->yychar;
2010     if (yyc != YYEMPTY) {
2011         if (yyc) {
2012             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2013             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2014                 PL_lex_allbrackets--;
2015                 PL_lex_brackets--;
2016                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2017             } else if (yyc == '('/*)*/) {
2018                 PL_lex_allbrackets--;
2019                 yyc |= (2<<24);
2020             }
2021             force_next(yyc);
2022         }
2023         PL_parser->yychar = YYEMPTY;
2024     }
2025 }
2026
2027 STATIC SV *
2028 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2029 {
2030     SV * const sv = newSVpvn_utf8(start, len,
2031                           !IN_BYTES
2032                           && UTF
2033                           && !is_utf8_invariant_string((const U8*)start, len)
2034                           && is_utf8_string((const U8*)start, len));
2035     return sv;
2036 }
2037
2038 /*
2039  * S_force_word
2040  * When the lexer knows the next thing is a word (for instance, it has
2041  * just seen -> and it knows that the next char is a word char, then
2042  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2043  * lookahead.
2044  *
2045  * Arguments:
2046  *   char *start : buffer position (must be within PL_linestr)
2047  *   int token   : PL_next* will be this type of bare word
2048  *                 (e.g., METHOD,BAREWORD)
2049  *   int check_keyword : if true, Perl checks to make sure the word isn't
2050  *       a keyword (do this if the word is a label, e.g. goto FOO)
2051  *   int allow_pack : if true, : characters will also be allowed (require,
2052  *       use, etc. do this)
2053  */
2054
2055 STATIC char *
2056 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2057 {
2058     char *s;
2059     STRLEN len;
2060
2061     PERL_ARGS_ASSERT_FORCE_WORD;
2062
2063     start = skipspace(start);
2064     s = start;
2065     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2066         || (allow_pack && *s == ':' && s[1] == ':') )
2067     {
2068         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2069         if (check_keyword) {
2070           char *s2 = PL_tokenbuf;
2071           STRLEN len2 = len;
2072           if (allow_pack && len > 6 && strEQs(s2, "CORE::"))
2073             s2 += 6, len2 -= 6;
2074           if (keyword(s2, len2, 0))
2075             return start;
2076         }
2077         if (token == METHOD) {
2078             s = skipspace(s);
2079             if (*s == '(')
2080                 PL_expect = XTERM;
2081             else {
2082                 PL_expect = XOPERATOR;
2083             }
2084         }
2085         NEXTVAL_NEXTTOKE.opval
2086             = newSVOP(OP_CONST,0,
2087                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2088         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2089         force_next(token);
2090     }
2091     return s;
2092 }
2093
2094 /*
2095  * S_force_ident
2096  * Called when the lexer wants $foo *foo &foo etc, but the program
2097  * text only contains the "foo" portion.  The first argument is a pointer
2098  * to the "foo", and the second argument is the type symbol to prefix.
2099  * Forces the next token to be a "BAREWORD".
2100  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2101  */
2102
2103 STATIC void
2104 S_force_ident(pTHX_ const char *s, int kind)
2105 {
2106     PERL_ARGS_ASSERT_FORCE_IDENT;
2107
2108     if (s[0]) {
2109         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2110         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2111                                                                 UTF ? SVf_UTF8 : 0));
2112         NEXTVAL_NEXTTOKE.opval = o;
2113         force_next(BAREWORD);
2114         if (kind) {
2115             o->op_private = OPpCONST_ENTERED;
2116             /* XXX see note in pp_entereval() for why we forgo typo
2117                warnings if the symbol must be introduced in an eval.
2118                GSAR 96-10-12 */
2119             gv_fetchpvn_flags(s, len,
2120                               (PL_in_eval ? GV_ADDMULTI
2121                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2122                               kind == '$' ? SVt_PV :
2123                               kind == '@' ? SVt_PVAV :
2124                               kind == '%' ? SVt_PVHV :
2125                               SVt_PVGV
2126                               );
2127         }
2128     }
2129 }
2130
2131 static void
2132 S_force_ident_maybe_lex(pTHX_ char pit)
2133 {
2134     NEXTVAL_NEXTTOKE.ival = pit;
2135     force_next('p');
2136 }
2137
2138 NV
2139 Perl_str_to_version(pTHX_ SV *sv)
2140 {
2141     NV retval = 0.0;
2142     NV nshift = 1.0;
2143     STRLEN len;
2144     const char *start = SvPV_const(sv,len);
2145     const char * const end = start + len;
2146     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2147
2148     PERL_ARGS_ASSERT_STR_TO_VERSION;
2149
2150     while (start < end) {
2151         STRLEN skip;
2152         UV n;
2153         if (utf)
2154             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2155         else {
2156             n = *(U8*)start;
2157             skip = 1;
2158         }
2159         retval += ((NV)n)/nshift;
2160         start += skip;
2161         nshift *= 1000;
2162     }
2163     return retval;
2164 }
2165
2166 /*
2167  * S_force_version
2168  * Forces the next token to be a version number.
2169  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2170  * and if "guessing" is TRUE, then no new token is created (and the caller
2171  * must use an alternative parsing method).
2172  */
2173
2174 STATIC char *
2175 S_force_version(pTHX_ char *s, int guessing)
2176 {
2177     OP *version = NULL;
2178     char *d;
2179
2180     PERL_ARGS_ASSERT_FORCE_VERSION;
2181
2182     s = skipspace(s);
2183
2184     d = s;
2185     if (*d == 'v')
2186         d++;
2187     if (isDIGIT(*d)) {
2188         while (isDIGIT(*d) || *d == '_' || *d == '.')
2189             d++;
2190         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2191             SV *ver;
2192             s = scan_num(s, &pl_yylval);
2193             version = pl_yylval.opval;
2194             ver = cSVOPx(version)->op_sv;
2195             if (SvPOK(ver) && !SvNIOK(ver)) {
2196                 SvUPGRADE(ver, SVt_PVNV);
2197                 SvNV_set(ver, str_to_version(ver));
2198                 SvNOK_on(ver);          /* hint that it is a version */
2199             }
2200         }
2201         else if (guessing) {
2202             return s;
2203         }
2204     }
2205
2206     /* NOTE: The parser sees the package name and the VERSION swapped */
2207     NEXTVAL_NEXTTOKE.opval = version;
2208     force_next(BAREWORD);
2209
2210     return s;
2211 }
2212
2213 /*
2214  * S_force_strict_version
2215  * Forces the next token to be a version number using strict syntax rules.
2216  */
2217
2218 STATIC char *
2219 S_force_strict_version(pTHX_ char *s)
2220 {
2221     OP *version = NULL;
2222     const char *errstr = NULL;
2223
2224     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2225
2226     while (isSPACE(*s)) /* leading whitespace */
2227         s++;
2228
2229     if (is_STRICT_VERSION(s,&errstr)) {
2230         SV *ver = newSV(0);
2231         s = (char *)scan_version(s, ver, 0);
2232         version = newSVOP(OP_CONST, 0, ver);
2233     }
2234     else if ((*s != ';' && *s != '{' && *s != '}' )
2235              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2236     {
2237         PL_bufptr = s;
2238         if (errstr)
2239             yyerror(errstr); /* version required */
2240         return s;
2241     }
2242
2243     /* NOTE: The parser sees the package name and the VERSION swapped */
2244     NEXTVAL_NEXTTOKE.opval = version;
2245     force_next(BAREWORD);
2246
2247     return s;
2248 }
2249
2250 /*
2251  * S_tokeq
2252  * Tokenize a quoted string passed in as an SV.  It finds the next
2253  * chunk, up to end of string or a backslash.  It may make a new
2254  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2255  * turns \\ into \.
2256  */
2257
2258 STATIC SV *
2259 S_tokeq(pTHX_ SV *sv)
2260 {
2261     char *s;
2262     char *send;
2263     char *d;
2264     SV *pv = sv;
2265
2266     PERL_ARGS_ASSERT_TOKEQ;
2267
2268     assert (SvPOK(sv));
2269     assert (SvLEN(sv));
2270     assert (!SvIsCOW(sv));
2271     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2272         goto finish;
2273     s = SvPVX(sv);
2274     send = SvEND(sv);
2275     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2276     while (s < send && !(*s == '\\' && s[1] == '\\'))
2277         s++;
2278     if (s == send)
2279         goto finish;
2280     d = s;
2281     if ( PL_hints & HINT_NEW_STRING ) {
2282         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2283                             SVs_TEMP | SvUTF8(sv));
2284     }
2285     while (s < send) {
2286         if (*s == '\\') {
2287             if (s + 1 < send && (s[1] == '\\'))
2288                 s++;            /* all that, just for this */
2289         }
2290         *d++ = *s++;
2291     }
2292     *d = '\0';
2293     SvCUR_set(sv, d - SvPVX_const(sv));
2294   finish:
2295     if ( PL_hints & HINT_NEW_STRING )
2296        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2297     return sv;
2298 }
2299
2300 /*
2301  * Now come three functions related to double-quote context,
2302  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2303  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2304  * interact with PL_lex_state, and create fake ( ... ) argument lists
2305  * to handle functions and concatenation.
2306  * For example,
2307  *   "foo\lbar"
2308  * is tokenised as
2309  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2310  */
2311
2312 /*
2313  * S_sublex_start
2314  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2315  *
2316  * Pattern matching will set PL_lex_op to the pattern-matching op to
2317  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2318  *
2319  * OP_CONST is easy--just make the new op and return.
2320  *
2321  * Everything else becomes a FUNC.
2322  *
2323  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2324  * had an OP_CONST.  This just sets us up for a
2325  * call to S_sublex_push().
2326  */
2327
2328 STATIC I32
2329 S_sublex_start(pTHX)
2330 {
2331     const I32 op_type = pl_yylval.ival;
2332
2333     if (op_type == OP_NULL) {
2334         pl_yylval.opval = PL_lex_op;
2335         PL_lex_op = NULL;
2336         return THING;
2337     }
2338     if (op_type == OP_CONST) {
2339         SV *sv = PL_lex_stuff;
2340         PL_lex_stuff = NULL;
2341         sv = tokeq(sv);
2342
2343         if (SvTYPE(sv) == SVt_PVIV) {
2344             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2345             STRLEN len;
2346             const char * const p = SvPV_const(sv, len);
2347             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2348             SvREFCNT_dec(sv);
2349             sv = nsv;
2350         }
2351         pl_yylval.opval = newSVOP(op_type, 0, sv);
2352         return THING;
2353     }
2354
2355     PL_parser->lex_super_state = PL_lex_state;
2356     PL_parser->lex_sub_inwhat = (U16)op_type;
2357     PL_parser->lex_sub_op = PL_lex_op;
2358     PL_lex_state = LEX_INTERPPUSH;
2359
2360     PL_expect = XTERM;
2361     if (PL_lex_op) {
2362         pl_yylval.opval = PL_lex_op;
2363         PL_lex_op = NULL;
2364         return PMFUNC;
2365     }
2366     else
2367         return FUNC;
2368 }
2369
2370 /*
2371  * S_sublex_push
2372  * Create a new scope to save the lexing state.  The scope will be
2373  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2374  * to the uc, lc, etc. found before.
2375  * Sets PL_lex_state to LEX_INTERPCONCAT.
2376  */
2377
2378 STATIC I32
2379 S_sublex_push(pTHX)
2380 {
2381     LEXSHARED *shared;
2382     const bool is_heredoc = PL_multi_close == '<';
2383     ENTER;
2384
2385     PL_lex_state = PL_parser->lex_super_state;
2386     SAVEI8(PL_lex_dojoin);
2387     SAVEI32(PL_lex_brackets);
2388     SAVEI32(PL_lex_allbrackets);
2389     SAVEI32(PL_lex_formbrack);
2390     SAVEI8(PL_lex_fakeeof);
2391     SAVEI32(PL_lex_casemods);
2392     SAVEI32(PL_lex_starts);
2393     SAVEI8(PL_lex_state);
2394     SAVESPTR(PL_lex_repl);
2395     SAVEVPTR(PL_lex_inpat);
2396     SAVEI16(PL_lex_inwhat);
2397     if (is_heredoc)
2398     {
2399         SAVECOPLINE(PL_curcop);
2400         SAVEI32(PL_multi_end);
2401         SAVEI32(PL_parser->herelines);
2402         PL_parser->herelines = 0;
2403     }
2404     SAVEIV(PL_multi_close);
2405     SAVEPPTR(PL_bufptr);
2406     SAVEPPTR(PL_bufend);
2407     SAVEPPTR(PL_oldbufptr);
2408     SAVEPPTR(PL_oldoldbufptr);
2409     SAVEPPTR(PL_last_lop);
2410     SAVEPPTR(PL_last_uni);
2411     SAVEPPTR(PL_linestart);
2412     SAVESPTR(PL_linestr);
2413     SAVEGENERICPV(PL_lex_brackstack);
2414     SAVEGENERICPV(PL_lex_casestack);
2415     SAVEGENERICPV(PL_parser->lex_shared);
2416     SAVEBOOL(PL_parser->lex_re_reparsing);
2417     SAVEI32(PL_copline);
2418
2419     /* The here-doc parser needs to be able to peek into outer lexing
2420        scopes to find the body of the here-doc.  So we put PL_linestr and
2421        PL_bufptr into lex_shared, to ‘share’ those values.
2422      */
2423     PL_parser->lex_shared->ls_linestr = PL_linestr;
2424     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2425
2426     PL_linestr = PL_lex_stuff;
2427     PL_lex_repl = PL_parser->lex_sub_repl;
2428     PL_lex_stuff = NULL;
2429     PL_parser->lex_sub_repl = NULL;
2430
2431     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2432        set for an inner quote-like operator and then an error causes scope-
2433        popping.  We must not have a PL_lex_stuff value left dangling, as
2434        that breaks assumptions elsewhere.  See bug #123617.  */
2435     SAVEGENERICSV(PL_lex_stuff);
2436     SAVEGENERICSV(PL_parser->lex_sub_repl);
2437
2438     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2439         = SvPVX(PL_linestr);
2440     PL_bufend += SvCUR(PL_linestr);
2441     PL_last_lop = PL_last_uni = NULL;
2442     SAVEFREESV(PL_linestr);
2443     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2444
2445     PL_lex_dojoin = FALSE;
2446     PL_lex_brackets = PL_lex_formbrack = 0;
2447     PL_lex_allbrackets = 0;
2448     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2449     Newx(PL_lex_brackstack, 120, char);
2450     Newx(PL_lex_casestack, 12, char);
2451     PL_lex_casemods = 0;
2452     *PL_lex_casestack = '\0';
2453     PL_lex_starts = 0;
2454     PL_lex_state = LEX_INTERPCONCAT;
2455     if (is_heredoc)
2456         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2457     PL_copline = NOLINE;
2458
2459     Newxz(shared, 1, LEXSHARED);
2460     shared->ls_prev = PL_parser->lex_shared;
2461     PL_parser->lex_shared = shared;
2462
2463     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2464     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2465     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2466         PL_lex_inpat = PL_parser->lex_sub_op;
2467     else
2468         PL_lex_inpat = NULL;
2469
2470     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2471     PL_in_eval &= ~EVAL_RE_REPARSING;
2472
2473     return '(';
2474 }
2475
2476 /*
2477  * S_sublex_done
2478  * Restores lexer state after a S_sublex_push.
2479  */
2480
2481 STATIC I32
2482 S_sublex_done(pTHX)
2483 {
2484     if (!PL_lex_starts++) {
2485         SV * const sv = newSVpvs("");
2486         if (SvUTF8(PL_linestr))
2487             SvUTF8_on(sv);
2488         PL_expect = XOPERATOR;
2489         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2490         return THING;
2491     }
2492
2493     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2494         PL_lex_state = LEX_INTERPCASEMOD;
2495         return yylex();
2496     }
2497
2498     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2499     assert(PL_lex_inwhat != OP_TRANSR);
2500     if (PL_lex_repl) {
2501         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2502         PL_linestr = PL_lex_repl;
2503         PL_lex_inpat = 0;
2504         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2505         PL_bufend += SvCUR(PL_linestr);
2506         PL_last_lop = PL_last_uni = NULL;
2507         PL_lex_dojoin = FALSE;
2508         PL_lex_brackets = 0;
2509         PL_lex_allbrackets = 0;
2510         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2511         PL_lex_casemods = 0;
2512         *PL_lex_casestack = '\0';
2513         PL_lex_starts = 0;
2514         if (SvEVALED(PL_lex_repl)) {
2515             PL_lex_state = LEX_INTERPNORMAL;
2516             PL_lex_starts++;
2517             /*  we don't clear PL_lex_repl here, so that we can check later
2518                 whether this is an evalled subst; that means we rely on the
2519                 logic to ensure sublex_done() is called again only via the
2520                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2521         }
2522         else {
2523             PL_lex_state = LEX_INTERPCONCAT;
2524             PL_lex_repl = NULL;
2525         }
2526         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2527             CopLINE(PL_curcop) +=
2528                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2529                  + PL_parser->herelines;
2530             PL_parser->herelines = 0;
2531         }
2532         return '/';
2533     }
2534     else {
2535         const line_t l = CopLINE(PL_curcop);
2536         LEAVE;
2537         if (PL_multi_close == '<')
2538             PL_parser->herelines += l - PL_multi_end;
2539         PL_bufend = SvPVX(PL_linestr);
2540         PL_bufend += SvCUR(PL_linestr);
2541         PL_expect = XOPERATOR;
2542         return ')';
2543     }
2544 }
2545
2546 STATIC SV*
2547 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2548 {
2549     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2550      * interior, hence to the "}".  Finds what the name resolves to, returning
2551      * an SV* containing it; NULL if no valid one found */
2552
2553     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2554
2555     HV * table;
2556     SV **cvp;
2557     SV *cv;
2558     SV *rv;
2559     HV *stash;
2560     const U8* first_bad_char_loc;
2561     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2562
2563     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2564
2565     if (!SvCUR(res)) {
2566         Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
2567                        "Unknown charname '' is deprecated");
2568         return res;
2569     }
2570
2571     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2572                                      e - backslash_ptr,
2573                                      &first_bad_char_loc))
2574     {
2575         _force_out_malformed_utf8_message(first_bad_char_loc,
2576                                           (U8 *) PL_parser->bufend,
2577                                           0,
2578                                           0 /* 0 means don't die */ );
2579         yyerror_pv(Perl_form(aTHX_
2580             "Malformed UTF-8 character immediately after '%.*s'",
2581             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
2582                    SVf_UTF8);
2583         return NULL;
2584     }
2585
2586     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2587                         /* include the <}> */
2588                         e - backslash_ptr + 1);
2589     if (! SvPOK(res)) {
2590         SvREFCNT_dec_NN(res);
2591         return NULL;
2592     }
2593
2594     /* See if the charnames handler is the Perl core's, and if so, we can skip
2595      * the validation needed for a user-supplied one, as Perl's does its own
2596      * validation. */
2597     table = GvHV(PL_hintgv);             /* ^H */
2598     cvp = hv_fetchs(table, "charnames", FALSE);
2599     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2600         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2601     {
2602         const char * const name = HvNAME(stash);
2603         if (HvNAMELEN(stash) == sizeof("_charnames")-1
2604          && strEQ(name, "_charnames")) {
2605            return res;
2606        }
2607     }
2608
2609     /* Here, it isn't Perl's charname handler.  We can't rely on a
2610      * user-supplied handler to validate the input name.  For non-ut8 input,
2611      * look to see that the first character is legal.  Then loop through the
2612      * rest checking that each is a continuation */
2613
2614     /* This code makes the reasonable assumption that the only Latin1-range
2615      * characters that begin a character name alias are alphabetic, otherwise
2616      * would have to create a isCHARNAME_BEGIN macro */
2617
2618     if (! UTF) {
2619         if (! isALPHAU(*s)) {
2620             goto bad_charname;
2621         }
2622         s++;
2623         while (s < e) {
2624             if (! isCHARNAME_CONT(*s)) {
2625                 goto bad_charname;
2626             }
2627             if (*s == ' ' && *(s-1) == ' ') {
2628                 goto multi_spaces;
2629             }
2630             s++;
2631         }
2632     }
2633     else {
2634         /* Similarly for utf8.  For invariants can check directly; for other
2635          * Latin1, can calculate their code point and check; otherwise  use a
2636          * swash */
2637         if (UTF8_IS_INVARIANT(*s)) {
2638             if (! isALPHAU(*s)) {
2639                 goto bad_charname;
2640             }
2641             s++;
2642         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2643             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2644                 goto bad_charname;
2645             }
2646             s += 2;
2647         }
2648         else {
2649             if (! PL_utf8_charname_begin) {
2650                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2651                 PL_utf8_charname_begin = _core_swash_init("utf8",
2652                                                         "_Perl_Charname_Begin",
2653                                                         &PL_sv_undef,
2654                                                         1, 0, NULL, &flags);
2655             }
2656             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2657                 goto bad_charname;
2658             }
2659             s += UTF8SKIP(s);
2660         }
2661
2662         while (s < e) {
2663             if (UTF8_IS_INVARIANT(*s)) {
2664                 if (! isCHARNAME_CONT(*s)) {
2665                     goto bad_charname;
2666                 }
2667                 if (*s == ' ' && *(s-1) == ' ') {
2668                     goto multi_spaces;
2669                 }
2670                 s++;
2671             }
2672             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2673                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2674                 {
2675                     goto bad_charname;
2676                 }
2677                 s += 2;
2678             }
2679             else {
2680                 if (! PL_utf8_charname_continue) {
2681                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2682                     PL_utf8_charname_continue = _core_swash_init("utf8",
2683                                                 "_Perl_Charname_Continue",
2684                                                 &PL_sv_undef,
2685                                                 1, 0, NULL, &flags);
2686                 }
2687                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2688                     goto bad_charname;
2689                 }
2690                 s += UTF8SKIP(s);
2691             }
2692         }
2693     }
2694     if (*(s-1) == ' ') {
2695         yyerror_pv(
2696             Perl_form(aTHX_
2697             "charnames alias definitions may not contain trailing "
2698             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2699             (int)(s - backslash_ptr + 1), backslash_ptr,
2700             (int)(e - s + 1), s + 1
2701             ),
2702         UTF ? SVf_UTF8 : 0);
2703         return NULL;
2704     }
2705
2706     if (SvUTF8(res)) { /* Don't accept malformed input */
2707         const U8* first_bad_char_loc;
2708         STRLEN len;
2709         const char* const str = SvPV_const(res, len);
2710         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2711             _force_out_malformed_utf8_message(first_bad_char_loc,
2712                                               (U8 *) PL_parser->bufend,
2713                                               0,
2714                                               0 /* 0 means don't die */ );
2715             yyerror_pv(
2716               Perl_form(aTHX_
2717                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2718                  (int) (e - backslash_ptr + 1), backslash_ptr,
2719                  (int) ((char *) first_bad_char_loc - str), str
2720               ),
2721               SVf_UTF8);
2722             return NULL;
2723         }
2724     }
2725
2726     return res;
2727
2728   bad_charname: {
2729
2730         /* The final %.*s makes sure that should the trailing NUL be missing
2731          * that this print won't run off the end of the string */
2732         yyerror_pv(
2733           Perl_form(aTHX_
2734             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2735             (int)(s - backslash_ptr + 1), backslash_ptr,
2736             (int)(e - s + 1), s + 1
2737           ),
2738           UTF ? SVf_UTF8 : 0);
2739         return NULL;
2740     }
2741
2742   multi_spaces:
2743         yyerror_pv(
2744           Perl_form(aTHX_
2745             "charnames alias definitions may not contain a sequence of "
2746             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2747             (int)(s - backslash_ptr + 1), backslash_ptr,
2748             (int)(e - s + 1), s + 1
2749           ),
2750           UTF ? SVf_UTF8 : 0);
2751         return NULL;
2752 }
2753
2754 /*
2755   scan_const
2756
2757   Extracts the next constant part of a pattern, double-quoted string,
2758   or transliteration.  This is terrifying code.
2759
2760   For example, in parsing the double-quoted string "ab\x63$d", it would
2761   stop at the '$' and return an OP_CONST containing 'abc'.
2762
2763   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2764   processing a pattern (PL_lex_inpat is true), a transliteration
2765   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2766
2767   Returns a pointer to the character scanned up to. If this is
2768   advanced from the start pointer supplied (i.e. if anything was
2769   successfully parsed), will leave an OP_CONST for the substring scanned
2770   in pl_yylval. Caller must intuit reason for not parsing further
2771   by looking at the next characters herself.
2772
2773   In patterns:
2774     expand:
2775       \N{FOO}  => \N{U+hex_for_character_FOO}
2776       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2777
2778     pass through:
2779         all other \-char, including \N and \N{ apart from \N{ABC}
2780
2781     stops on:
2782         @ and $ where it appears to be a var, but not for $ as tail anchor
2783         \l \L \u \U \Q \E
2784         (?{  or  (??{
2785
2786   In transliterations:
2787     characters are VERY literal, except for - not at the start or end
2788     of the string, which indicates a range.  However some backslash sequences
2789     are recognized: \r, \n, and the like
2790                     \007 \o{}, \x{}, \N{}
2791     If all elements in the transliteration are below 256,
2792     scan_const expands the range to the full set of intermediate
2793     characters. If the range is in utf8, the hyphen is replaced with
2794     a certain range mark which will be handled by pmtrans() in op.c.
2795
2796   In double-quoted strings:
2797     backslashes:
2798       all those recognized in transliterations
2799       deprecated backrefs: \1 (in substitution replacements)
2800       case and quoting: \U \Q \E
2801     stops on @ and $
2802
2803   scan_const does *not* construct ops to handle interpolated strings.
2804   It stops processing as soon as it finds an embedded $ or @ variable
2805   and leaves it to the caller to work out what's going on.
2806
2807   embedded arrays (whether in pattern or not) could be:
2808       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2809
2810   $ in double-quoted strings must be the symbol of an embedded scalar.
2811
2812   $ in pattern could be $foo or could be tail anchor.  Assumption:
2813   it's a tail anchor if $ is the last thing in the string, or if it's
2814   followed by one of "()| \r\n\t"
2815
2816   \1 (backreferences) are turned into $1 in substitutions
2817
2818   The structure of the code is
2819       while (there's a character to process) {
2820           handle transliteration ranges
2821           skip regexp comments /(?#comment)/ and codes /(?{code})/
2822           skip #-initiated comments in //x patterns
2823           check for embedded arrays
2824           check for embedded scalars
2825           if (backslash) {
2826               deprecate \1 in substitution replacements
2827               handle string-changing backslashes \l \U \Q \E, etc.
2828               switch (what was escaped) {
2829                   handle \- in a transliteration (becomes a literal -)
2830                   if a pattern and not \N{, go treat as regular character
2831                   handle \132 (octal characters)
2832                   handle \x15 and \x{1234} (hex characters)
2833                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2834                   handle \cV (control characters)
2835                   handle printf-style backslashes (\f, \r, \n, etc)
2836               } (end switch)
2837               continue
2838           } (end if backslash)
2839           handle regular character
2840     } (end while character to read)
2841
2842 */
2843
2844 STATIC char *
2845 S_scan_const(pTHX_ char *start)
2846 {
2847     char *send = PL_bufend;             /* end of the constant */
2848     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2849                                            on sizing. */
2850     char *s = start;                    /* start of the constant */
2851     char *d = SvPVX(sv);                /* destination for copies */
2852     bool dorange = FALSE;               /* are we in a translit range? */
2853     bool didrange = FALSE;              /* did we just finish a range? */
2854     bool in_charclass = FALSE;          /* within /[...]/ */
2855     bool has_utf8 = FALSE;              /* Output constant is UTF8 */
2856     bool has_above_latin1 = FALSE;      /* does something require special
2857                                            handling in tr/// ? */
2858     bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
2859                                            UTF8?  But, this can show as true
2860                                            when the source isn't utf8, as for
2861                                            example when it is entirely composed
2862                                            of hex constants */
2863     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
2864                                            number of characters found so far
2865                                            that will expand (into 2 bytes)
2866                                            should we have to convert to
2867                                            UTF-8) */
2868     SV *res;                            /* result from charnames */
2869     STRLEN offset_to_max;   /* The offset in the output to where the range
2870                                high-end character is temporarily placed */
2871
2872     /* Note on sizing:  The scanned constant is placed into sv, which is
2873      * initialized by newSV() assuming one byte of output for every byte of
2874      * input.  This routine expects newSV() to allocate an extra byte for a
2875      * trailing NUL, which this routine will append if it gets to the end of
2876      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2877      * CAPITAL LETTER A}), or more output than input if the constant ends up
2878      * recoded to utf8, but each time a construct is found that might increase
2879      * the needed size, SvGROW() is called.  Its size parameter each time is
2880      * based on the best guess estimate at the time, namely the length used so
2881      * far, plus the length the current construct will occupy, plus room for
2882      * the trailing NUL, plus one byte for every input byte still unscanned */
2883
2884     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2885                        before set */
2886 #ifdef EBCDIC
2887     int backslash_N = 0;            /* ? was the character from \N{} */
2888     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
2889                                        platform-specific like \x65 */
2890 #endif
2891
2892     PERL_ARGS_ASSERT_SCAN_CONST;
2893
2894     assert(PL_lex_inwhat != OP_TRANSR);
2895     if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
2896         /* If we are doing a trans and we know we want UTF8 set expectation */
2897         has_utf8   = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2898         this_utf8  = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2899     }
2900
2901     /* Protect sv from errors and fatal warnings. */
2902     ENTER_with_name("scan_const");
2903     SAVEFREESV(sv);
2904
2905     while (s < send
2906            || dorange   /* Handle tr/// range at right edge of input */
2907     ) {
2908
2909         /* get transliterations out of the way (they're most literal) */
2910         if (PL_lex_inwhat == OP_TRANS) {
2911
2912             /* But there isn't any special handling necessary unless there is a
2913              * range, so for most cases we just drop down and handle the value
2914              * as any other.  There are two exceptions.
2915              *
2916              * 1.  A minus sign indicates that we are actually going to have
2917              *     a range.  In this case, skip the '-', set a flag, then drop
2918              *     down to handle what should be the end range value.
2919              * 2.  After we've handled that value, the next time through, that
2920              *     flag is set and we fix up the range.
2921              *
2922              * Ranges entirely within Latin1 are expanded out entirely, in
2923              * order to make the transliteration a simple table look-up.
2924              * Ranges that extend above Latin1 have to be done differently, so
2925              * there is no advantage to expanding them here, so they are
2926              * stored here as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte
2927              * signifies a hyphen without any possible ambiguity.  On EBCDIC
2928              * machines, if the range is expressed as Unicode, the Latin1
2929              * portion is expanded out even if the range extends above
2930              * Latin1.  This is because each code point in it has to be
2931              * processed here individually to get its native translation */
2932
2933             if (! dorange) {
2934
2935                 /* Here, we don't think we're in a range.  If we've processed
2936                  * at least one character, then see if this next one is a '-',
2937                  * indicating the previous one was the start of a range.  But
2938                  * don't bother if we're too close to the end for the minus to
2939                  * mean that. */
2940                 if (*s != '-' || s >= send - 1 || s == start) {
2941
2942                     /* A regular character.  Process like any other, but first
2943                      * clear any flags */
2944                     didrange = FALSE;
2945                     dorange = FALSE;
2946 #ifdef EBCDIC
2947                     non_portable_endpoint = 0;
2948                     backslash_N = 0;
2949 #endif
2950                     /* The tests here and the following 'else' for being above
2951                      * Latin1 suffice to find all such occurences in the
2952                      * constant, except those added by a backslash escape
2953                      * sequence, like \x{100}.  And all those set
2954                      * 'has_above_latin1' as appropriate */
2955                     if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
2956                         has_above_latin1 = TRUE;
2957                     }
2958
2959                     /* Drops down to generic code to process current byte */
2960                 }
2961                 else {
2962                     if (didrange) { /* Something like y/A-C-Z// */
2963                         Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2964                     }
2965
2966                     dorange = TRUE;
2967
2968                     s++;    /* Skip past the minus */
2969
2970                     /* d now points to where the end-range character will be
2971                      * placed.  Save it so won't have to go finding it later,
2972                      * and drop down to get that character.  (Actually we
2973                      * instead save the offset, to handle the case where a
2974                      * realloc in the meantime could change the actual
2975                      * pointer).  We'll finish processing the range the next
2976                      * time through the loop */
2977                     offset_to_max = d - SvPVX_const(sv);
2978
2979                     if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
2980                         has_above_latin1 = TRUE;
2981                     }
2982                 }
2983             }  /* End of not a range */
2984             else {
2985                 /* Here we have parsed a range.  Now must handle it.  At this
2986                  * point:
2987                  * 'sv' is a SV* that contains the output string we are
2988                  *      constructing.  The final two characters in that string
2989                  *      are the range start and range end, in order.
2990                  * 'd'  points to just beyond the range end in the 'sv' string,
2991                  *      where we would next place something
2992                  * 'offset_to_max' is the offset in 'sv' at which the character
2993                  *      before 'd' begins.
2994                  */
2995                 const char * max_ptr = SvPVX_const(sv) + offset_to_max;
2996                 const char * min_ptr;
2997                 IV range_min;
2998                 IV range_max;   /* last character in range */
2999                 STRLEN save_offset;
3000                 STRLEN grow;
3001 #ifdef EBCDIC
3002                 bool convert_unicode;
3003                 IV real_range_max = 0;
3004 #endif
3005
3006                 /* Get the range-ends code point values. */
3007                 if (has_utf8) {
3008                     /* We know the utf8 is valid, because we just constructed
3009                      * it ourselves in previous loop iterations */
3010                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3011                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3012                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3013                 }
3014                 else {
3015                     min_ptr = max_ptr - 1;
3016                     range_min = * (U8*) min_ptr;
3017                     range_max = * (U8*) max_ptr;
3018                 }
3019
3020 #ifdef EBCDIC
3021                 /* On EBCDIC platforms, we may have to deal with portable
3022                  * ranges.  These happen if at least one range endpoint is a
3023                  * Unicode value (\N{...}), or if the range is a subset of
3024                  * [A-Z] or [a-z], and both ends are literal characters,
3025                  * like 'A', and not like \x{C1} */
3026                 convert_unicode =
3027                        cBOOL(backslash_N)   /* \N{} forces Unicode, hence
3028                                                portable range */
3029                       || (   ! non_portable_endpoint
3030                           && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3031                              || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3032                 if (convert_unicode) {
3033
3034                     /* Special handling is needed for these portable ranges.
3035                      * They are defined to all be in Unicode terms, which
3036                      * include all Unicode code points between the end points.
3037                      * Convert to Unicode to get the Unicode range.  Later we
3038                      * will convert each code point in the range back to
3039                      * native.  */
3040                     range_min = NATIVE_TO_UNI(range_min);
3041                     range_max = NATIVE_TO_UNI(range_max);
3042                 }
3043 #endif
3044
3045                 if (range_min > range_max) {
3046 #ifdef EBCDIC
3047                     if (convert_unicode) {
3048                         /* Need to convert back to native for meaningful
3049                          * messages for this platform */
3050                         range_min = UNI_TO_NATIVE(range_min);
3051                         range_max = UNI_TO_NATIVE(range_max);
3052                     }
3053 #endif
3054
3055                     /* Use the characters themselves for the error message if
3056                      * ASCII printables; otherwise some visible representation
3057                      * of them */
3058                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3059                         Perl_croak(aTHX_
3060                          "Invalid range \"%c-%c\" in transliteration operator",
3061                          (char)range_min, (char)range_max);
3062                     }
3063 #ifdef EBCDIC
3064                     else if (convert_unicode) {
3065                         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3066                         Perl_croak(aTHX_
3067                                "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" UVXf "}\""
3068                                " in transliteration operator",
3069                                range_min, range_max);
3070                     }
3071 #endif
3072                     else {
3073                         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3074                         Perl_croak(aTHX_
3075                                "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3076                                " in transliteration operator",
3077                                range_min, range_max);
3078                     }
3079                 }
3080
3081                 if (has_utf8) {
3082
3083                     /* If everything in the transliteration is below 256, we
3084                      * can avoid special handling later.  A translation table
3085                      * of each of those bytes is created.  And so we expand out
3086                      * all ranges to their constituent code points.  But if
3087                      * we've encountered something above 255, the expanding
3088                      * won't help, so skip doing that.  But if it's EBCDIC, we
3089                      * may have to look at each character below 256 if we have
3090                      * to convert to/from Unicode values */
3091                     if (   has_above_latin1
3092 #ifdef EBCDIC
3093                         && (range_min > 255 || ! convert_unicode)
3094 #endif
3095                     ) {
3096                         /* Move the high character one byte to the right; then
3097                          * insert between it and the range begin, an illegal
3098                          * byte which serves to indicate this is a range (using
3099                          * a '-' could be ambiguous). */
3100                         char *e = d++;
3101                         while (e-- > max_ptr) {
3102                             *(e + 1) = *e;
3103                         }
3104                         *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3105                         goto range_done;
3106                     }
3107
3108                     /* Here, we're going to expand out the range.  For EBCDIC
3109                      * the range can extend above 255 (not so in ASCII), so
3110                      * for EBCDIC, split it into the parts above and below
3111                      * 255/256 */
3112 #ifdef EBCDIC
3113                     if (range_max > 255) {
3114                         real_range_max = range_max;
3115                         range_max = 255;
3116                     }
3117 #endif
3118                 }
3119
3120                 /* Here we need to expand out the string to contain each
3121                  * character in the range.  Grow the output to handle this */
3122
3123                 save_offset  = min_ptr - SvPVX_const(sv);
3124
3125                 /* The base growth is the number of code points in the range */
3126                 grow = range_max - range_min + 1;
3127                 if (has_utf8) {
3128
3129                     /* But if the output is UTF-8, some of those characters may
3130                      * need two bytes (since the maximum range value here is
3131                      * 255, the max bytes per character is two).  On ASCII
3132                      * platforms, it's not much trouble to get an accurate
3133                      * count of what's needed.  But on EBCDIC, the ones that
3134                      * need 2 bytes are scattered around, so just use a worst
3135                      * case value instead of calculating for that platform.  */
3136 #ifdef EBCDIC
3137                     grow *= 2;
3138 #else
3139                     /* Only those above 127 require 2 bytes.  This may be
3140                      * everything in the range, or not */
3141                     if (range_min > 127) {
3142                         grow *= 2;
3143                     }
3144                     else if (range_max > 127) {
3145                         grow += range_max - 127;
3146                     }
3147 #endif
3148                 }
3149
3150                 /* Subtract 3 for the bytes that were already accounted for
3151                  * (min, max, and the hyphen) */
3152                 d = save_offset + SvGROW(sv, SvLEN(sv) + grow - 3);
3153
3154 #ifdef EBCDIC
3155                 /* Here, we expand out the range. */
3156                 if (convert_unicode) {
3157                     IV i;
3158
3159                     /* Recall that the min and max are now in Unicode terms, so
3160                      * we have to convert each character to its native
3161                      * equivalent */
3162                     if (has_utf8) {
3163                         for (i = range_min; i <= range_max; i++) {
3164                             append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
3165                                                          (U8 **) &d);
3166                         }
3167                     }
3168                     else {
3169                         for (i = range_min; i <= range_max; i++) {
3170                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3171                         }
3172                     }
3173                 }
3174                 else
3175 #endif
3176                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3177                 {
3178                     IV i;
3179
3180                     /* Here, no conversions are necessary, which means that the
3181                      * first character in the range is already in 'd' and
3182                      * valid, so we can skip overwriting it */
3183                     if (has_utf8) {
3184                         d += UTF8SKIP(d);
3185                         for (i = range_min + 1; i <= range_max; i++) {
3186                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3187                         }
3188                     }
3189                     else {
3190                         d++;
3191                         for (i = range_min + 1; i <= range_max; i++) {
3192                             *d++ = (char)i;
3193                         }
3194                     }
3195                 }
3196
3197 #ifdef EBCDIC
3198                 /* If the original range extended above 255, add in that portion. */
3199                 if (real_range_max) {
3200                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3201                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3202                     if (real_range_max > 0x101)
3203                         *d++ = (char) ILLEGAL_UTF8_BYTE;
3204                     if (real_range_max > 0x100)
3205                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3206                 }
3207 #endif
3208
3209               range_done:
3210                 /* mark the range as done, and continue */
3211                 didrange = TRUE;
3212                 dorange = FALSE;
3213 #ifdef EBCDIC
3214                 non_portable_endpoint = 0;
3215                 backslash_N = 0;
3216 #endif
3217                 continue;
3218             } /* End of is a range */
3219         } /* End of transliteration.  Joins main code after these else's */
3220         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3221             char *s1 = s-1;
3222             int esc = 0;
3223             while (s1 >= start && *s1-- == '\\')
3224                 esc = !esc;
3225             if (!esc)
3226                 in_charclass = TRUE;
3227         }
3228         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3229             char *s1 = s-1;
3230             int esc = 0;
3231             while (s1 >= start && *s1-- == '\\')
3232                 esc = !esc;
3233             if (!esc)
3234                 in_charclass = FALSE;
3235         }
3236
3237         /* skip for regexp comments /(?#comment)/, except for the last
3238          * char, which will be done separately.
3239          * Stop on (?{..}) and friends */
3240
3241         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3242             if (s[2] == '#') {
3243                 while (s+1 < send && *s != ')')
3244                     *d++ = *s++;
3245             }
3246             else if (!PL_lex_casemods
3247                      && (    s[2] == '{' /* This should match regcomp.c */
3248                          || (s[2] == '?' && s[3] == '{')))
3249             {
3250                 break;
3251             }
3252         }
3253
3254         /* likewise skip #-initiated comments in //x patterns */
3255         else if (*s == '#'
3256                  && PL_lex_inpat
3257                  && !in_charclass
3258                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3259         {
3260             while (s < send && *s != '\n')
3261                 *d++ = *s++;
3262         }
3263
3264         /* no further processing of single-quoted regex */
3265         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3266             goto default_action;
3267
3268         /* check for embedded arrays
3269            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3270            */
3271         else if (*s == '@' && s[1]) {
3272             if (UTF
3273                ? isIDFIRST_utf8_safe(s+1, send)
3274                : isWORDCHAR_A(s[1]))
3275             {
3276                 break;
3277             }
3278             if (strchr(":'{$", s[1]))
3279                 break;
3280             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3281                 break; /* in regexp, neither @+ nor @- are interpolated */
3282         }
3283
3284         /* check for embedded scalars.  only stop if we're sure it's a
3285            variable.
3286         */
3287         else if (*s == '$') {
3288             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3289                 break;
3290             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3291                 if (s[1] == '\\') {
3292                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3293                                    "Possible unintended interpolation of $\\ in regex");
3294                 }
3295                 break;          /* in regexp, $ might be tail anchor */
3296             }
3297         }
3298
3299         /* End of else if chain - OP_TRANS rejoin rest */
3300
3301         if (UNLIKELY(s >= send)) {
3302             assert(s == send);
3303             break;
3304         }
3305
3306         /* backslashes */
3307         if (*s == '\\' && s+1 < send) {
3308             char* e;    /* Can be used for ending '}', etc. */
3309
3310             s++;
3311
3312             /* warn on \1 - \9 in substitution replacements, but note that \11
3313              * is an octal; and \19 is \1 followed by '9' */
3314             if (PL_lex_inwhat == OP_SUBST
3315                 && !PL_lex_inpat
3316                 && isDIGIT(*s)
3317                 && *s != '0'
3318                 && !isDIGIT(s[1]))
3319             {
3320                 /* diag_listed_as: \%d better written as $%d */
3321                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3322                 *--s = '$';
3323                 break;
3324             }
3325
3326             /* string-change backslash escapes */
3327             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3328                 --s;
3329                 break;
3330             }
3331             /* In a pattern, process \N, but skip any other backslash escapes.
3332              * This is because we don't want to translate an escape sequence
3333              * into a meta symbol and have the regex compiler use the meta
3334              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3335              * in spite of this, we do have to process \N here while the proper
3336              * charnames handler is in scope.  See bugs #56444 and #62056.
3337              *
3338              * There is a complication because \N in a pattern may also stand
3339              * for 'match a non-nl', and not mean a charname, in which case its
3340              * processing should be deferred to the regex compiler.  To be a
3341              * charname it must be followed immediately by a '{', and not look
3342              * like \N followed by a curly quantifier, i.e., not something like
3343              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3344              * quantifier */
3345             else if (PL_lex_inpat
3346                     && (*s != 'N'
3347                         || s[1] != '{'
3348                         || regcurly(s + 1)))
3349             {
3350                 *d++ = '\\';
3351                 goto default_action;
3352             }
3353
3354             switch (*s) {
3355             default:
3356                 {
3357                     if ((isALPHANUMERIC(*s)))
3358                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3359                                        "Unrecognized escape \\%c passed through",
3360                                        *s);
3361                     /* default action is to copy the quoted character */
3362                     goto default_action;
3363                 }
3364
3365             /* eg. \132 indicates the octal constant 0132 */
3366             case '0': case '1': case '2': case '3':
3367             case '4': case '5': case '6': case '7':
3368                 {
3369                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3370                     STRLEN len = 3;
3371                     uv = grok_oct(s, &len, &flags, NULL);
3372                     s += len;
3373                     if (len < 3 && s < send && isDIGIT(*s)
3374                         && ckWARN(WARN_MISC))
3375                     {
3376                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3377                                     "%s", form_short_octal_warning(s, len));
3378                     }
3379                 }
3380                 goto NUM_ESCAPE_INSERT;
3381
3382             /* eg. \o{24} indicates the octal constant \024 */
3383             case 'o':
3384                 {
3385                     const char* error;
3386
3387                     bool valid = grok_bslash_o(&s, &uv, &error,
3388                                                TRUE, /* Output warning */
3389                                                FALSE, /* Not strict */
3390                                                TRUE, /* Output warnings for
3391                                                          non-portables */
3392                                                UTF);
3393                     if (! valid) {
3394                         yyerror(error);
3395                         continue;
3396                     }
3397                     goto NUM_ESCAPE_INSERT;
3398                 }
3399
3400             /* eg. \x24 indicates the hex constant 0x24 */
3401             case 'x':
3402                 {
3403                     const char* error;
3404
3405                     bool valid = grok_bslash_x(&s, &uv, &error,
3406                                                TRUE, /* Output warning */
3407                                                FALSE, /* Not strict */
3408                                                TRUE,  /* Output warnings for
3409                                                          non-portables */
3410                                                UTF);
3411                     if (! valid) {
3412                         yyerror(error);
3413                         continue;
3414                     }
3415                 }
3416
3417               NUM_ESCAPE_INSERT:
3418                 /* Insert oct or hex escaped character. */
3419
3420                 /* Here uv is the ordinal of the next character being added */
3421                 if (UVCHR_IS_INVARIANT(uv)) {
3422                     *d++ = (char) uv;
3423                 }
3424                 else {
3425                     if (!has_utf8 && uv > 255) {
3426
3427                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3428                          * If we've only seen invariants so far, all we have to
3429                          * do is turn on the flag */
3430                         if (utf8_variant_count == 0) {
3431                             SvUTF8_on(sv);
3432                         }
3433                         else {
3434                             SvCUR_set(sv, d - SvPVX_const(sv));
3435                             SvPOK_on(sv);
3436                             *d = '\0';
3437
3438                             sv_utf8_upgrade_flags_grow(
3439                                            sv,
3440                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3441
3442                                            /* Since we're having to grow here,
3443                                             * make sure we have enough room for
3444                                             * this escape and a NUL, so the
3445                                             * code immediately below won't have
3446                                             * to actually grow again */
3447                                           UVCHR_SKIP(uv)
3448                                         + (STRLEN)(send - s) + 1);
3449                             d = SvPVX(sv) + SvCUR(sv);
3450                         }
3451
3452                         has_above_latin1 = TRUE;
3453                         has_utf8 = TRUE;
3454                     }
3455
3456                     if (! has_utf8) {
3457                         *d++ = (char)uv;
3458                         utf8_variant_count++;
3459                     }
3460                     else {
3461                        /* Usually, there will already be enough room in 'sv'
3462                         * since such escapes are likely longer than any UTF-8
3463                         * sequence they can end up as.  This isn't the case on
3464                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3465                         * UTF-8 for it contains 14.  And, we have to allow for
3466                         * a trailing NUL.  It probably can't happen on ASCII
3467                         * platforms, but be safe.  See Note on sizing above. */
3468                         const STRLEN needed = d - SvPVX(sv)
3469                                             + UVCHR_SKIP(uv)
3470                                             + (send - s)
3471                                             + 1;
3472                         if (UNLIKELY(needed > SvLEN(sv))) {
3473                             SvCUR_set(sv, d - SvPVX_const(sv));
3474                             d = SvCUR(sv) + SvGROW(sv, needed);
3475                         }
3476
3477                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3478                         if (PL_lex_inwhat == OP_TRANS
3479                             && PL_parser->lex_sub_op)
3480                         {
3481                             PL_parser->lex_sub_op->op_private |=
3482                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3483                                              : OPpTRANS_TO_UTF);
3484                         }
3485                     }
3486                 }
3487 #ifdef EBCDIC
3488                 non_portable_endpoint++;
3489 #endif
3490                 continue;
3491
3492             case 'N':
3493                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3494                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3495                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3496                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3497                  * convenience all three forms are referred to as "named
3498                  * characters" below.
3499                  *
3500                  * For patterns, \N also can mean to match a non-newline.  Code
3501                  * before this 'switch' statement should already have handled
3502                  * this situation, and hence this code only has to deal with
3503                  * the named character cases.
3504                  *
3505                  * For non-patterns, the named characters are converted to
3506                  * their string equivalents.  In patterns, named characters are
3507                  * not converted to their ultimate forms for the same reasons
3508                  * that other escapes aren't.  Instead, they are converted to
3509                  * the \N{U+...} form to get the value from the charnames that
3510                  * is in effect right now, while preserving the fact that it
3511                  * was a named character, so that the regex compiler knows
3512                  * this.
3513                  *
3514                  * The structure of this section of code (besides checking for
3515                  * errors and upgrading to utf8) is:
3516                  *    If the named character is of the form \N{U+...}, pass it
3517                  *      through if a pattern; otherwise convert the code point
3518                  *      to utf8
3519                  *    Otherwise must be some \N{NAME}: convert to
3520                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3521                  *
3522                  * Transliteration is an exception.  The conversion to utf8 is
3523                  * only done if the code point requires it to be representable.
3524                  *
3525                  * Here, 's' points to the 'N'; the test below is guaranteed to
3526                  * succeed if we are being called on a pattern, as we already
3527                  * know from a test above that the next character is a '{'.  A
3528                  * non-pattern \N must mean 'named character', which requires
3529                  * braces */
3530                 s++;
3531                 if (*s != '{') {
3532                     yyerror("Missing braces on \\N{}");
3533                     continue;
3534                 }
3535                 s++;
3536
3537                 /* If there is no matching '}', it is an error. */
3538                 if (! (e = strchr(s, '}'))) {
3539                     if (! PL_lex_inpat) {
3540                         yyerror("Missing right brace on \\N{}");
3541                     } else {
3542                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3543                     }
3544                     continue;
3545                 }
3546
3547                 /* Here it looks like a named character */
3548
3549                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3550                     s += 2;         /* Skip to next char after the 'U+' */
3551                     if (PL_lex_inpat) {
3552
3553                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3554                         /* Check the syntax.  */
3555                         const char *orig_s;
3556                         orig_s = s - 5;
3557                         if (!isXDIGIT(*s)) {
3558                           bad_NU:
3559                             yyerror(
3560                                 "Invalid hexadecimal number in \\N{U+...}"
3561                             );
3562                             s = e + 1;
3563                             continue;
3564                         }
3565                         while (++s < e) {
3566                             if (isXDIGIT(*s))
3567                                 continue;
3568                             else if ((*s == '.' || *s == '_')
3569                                   && isXDIGIT(s[1]))
3570                                 continue;
3571                             goto bad_NU;
3572                         }
3573
3574                         /* Pass everything through unchanged.
3575                          * +1 is for the '}' */
3576                         Copy(orig_s, d, e - orig_s + 1, char);
3577                         d += e - orig_s + 1;
3578                     }
3579                     else {  /* Not a pattern: convert the hex to string */
3580                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3581                                 | PERL_SCAN_SILENT_ILLDIGIT
3582                                 | PERL_SCAN_DISALLOW_PREFIX;
3583                         STRLEN len = e - s;
3584                         uv = grok_hex(s, &len, &flags, NULL);
3585                         if (len == 0 || (len != (STRLEN)(e - s)))
3586                             goto bad_NU;
3587
3588                          /* For non-tr///, if the destination is not in utf8,
3589                           * unconditionally recode it to be so.  This is
3590                           * because \N{} implies Unicode semantics, and scalars
3591                           * have to be in utf8 to guarantee those semantics.
3592                           * tr/// doesn't care about Unicode rules, so no need
3593                           * there to upgrade to UTF-8 for small enough code
3594                           * points */
3595                         if (! has_utf8 && (   uv > 0xFF
3596                                            || PL_lex_inwhat != OP_TRANS))
3597                         {
3598                             /* See Note on sizing above.  */
3599                             const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3600
3601                             SvCUR_set(sv, d - SvPVX_const(sv));
3602                             SvPOK_on(sv);
3603                             *d = '\0';
3604
3605                             if (utf8_variant_count == 0) {
3606                                 SvUTF8_on(sv);
3607                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3608                             }
3609                             else {
3610                                 sv_utf8_upgrade_flags_grow(
3611                                                sv,
3612                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3613                                                extra);
3614                                 d = SvPVX(sv) + SvCUR(sv);
3615                             }
3616
3617                             has_utf8 = TRUE;
3618                             has_above_latin1 = TRUE;
3619                         }
3620
3621                         /* Add the (Unicode) code point to the output. */
3622                         if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3623                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3624                         }
3625                         else {
3626                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3627                         }
3628                     }
3629                 }
3630                 else /* Here is \N{NAME} but not \N{U+...}. */
3631                      if ((res = get_and_check_backslash_N_name(s, e)))
3632                 {
3633                     STRLEN len;
3634                     const char *str = SvPV_const(res, len);
3635                     if (PL_lex_inpat) {
3636
3637                         if (! len) { /* The name resolved to an empty string */
3638                             Copy("\\N{}", d, 4, char);
3639                             d += 4;
3640                         }
3641                         else {
3642                             /* In order to not lose information for the regex
3643                             * compiler, pass the result in the specially made
3644                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3645                             * the code points in hex of each character
3646                             * returned by charnames */
3647
3648                             const char *str_end = str + len;
3649                             const STRLEN off = d - SvPVX_const(sv);
3650
3651                             if (! SvUTF8(res)) {
3652                                 /* For the non-UTF-8 case, we can determine the
3653                                  * exact length needed without having to parse
3654                                  * through the string.  Each character takes up
3655                                  * 2 hex digits plus either a trailing dot or
3656                                  * the "}" */
3657                                 const char initial_text[] = "\\N{U+";
3658                                 const STRLEN initial_len = sizeof(initial_text)
3659                                                            - 1;
3660                                 d = off + SvGROW(sv, off
3661                                                     + 3 * len
3662
3663                                                     /* +1 for trailing NUL */
3664                                                     + initial_len + 1
3665
3666                                                     + (STRLEN)(send - e));
3667                                 Copy(initial_text, d, initial_len, char);
3668                                 d += initial_len;
3669                                 while (str < str_end) {
3670                                     char hex_string[4];
3671                                     int len =
3672                                         my_snprintf(hex_string,
3673                                                   sizeof(hex_string),
3674                                                   "%02X.",
3675
3676                                                   /* The regex compiler is
3677                                                    * expecting Unicode, not
3678                                                    * native */
3679                                                   NATIVE_TO_LATIN1(*str));
3680                                     PERL_MY_SNPRINTF_POST_GUARD(len,
3681                                                            sizeof(hex_string));
3682                                     Copy(hex_string, d, 3, char);
3683                                     d += 3;
3684                                     str++;
3685                                 }
3686                                 d--;    /* Below, we will overwrite the final
3687                                            dot with a right brace */
3688                             }
3689                             else {
3690                                 STRLEN char_length; /* cur char's byte length */
3691
3692                                 /* and the number of bytes after this is
3693                                  * translated into hex digits */
3694                                 STRLEN output_length;
3695
3696                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3697                                  * for max('U+', '.'); and 1 for NUL */
3698                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3699
3700                                 /* Get the first character of the result. */
3701                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3702                                                         len,
3703                                                         &char_length,
3704                                                         UTF8_ALLOW_ANYUV);
3705                                 /* Convert first code point to Unicode hex,
3706                                  * including the boiler plate before it. */
3707                                 output_length =
3708                                     my_snprintf(hex_string, sizeof(hex_string),
3709                                              "\\N{U+%X",
3710                                              (unsigned int) NATIVE_TO_UNI(uv));
3711
3712                                 /* Make sure there is enough space to hold it */
3713                                 d = off + SvGROW(sv, off
3714                                                     + output_length
3715                                                     + (STRLEN)(send - e)
3716                                                     + 2);       /* '}' + NUL */
3717                                 /* And output it */
3718                                 Copy(hex_string, d, output_length, char);
3719                                 d += output_length;
3720
3721                                 /* For each subsequent character, append dot and
3722                                 * its Unicode code point in hex */
3723                                 while ((str += char_length) < str_end) {
3724                                     const STRLEN off = d - SvPVX_const(sv);
3725                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3726                                                             str_end - str,
3727                                                             &char_length,
3728                                                             UTF8_ALLOW_ANYUV);
3729                                     output_length =
3730                                         my_snprintf(hex_string,
3731                                              sizeof(hex_string),
3732                                              ".%X",
3733                                              (unsigned int) NATIVE_TO_UNI(uv));
3734
3735                                     d = off + SvGROW(sv, off
3736                                                         + output_length
3737                                                         + (STRLEN)(send - e)
3738                                                         + 2);   /* '}' +  NUL */
3739                                     Copy(hex_string, d, output_length, char);
3740                                     d += output_length;
3741                                 }
3742                             }
3743
3744                             *d++ = '}'; /* Done.  Add the trailing brace */
3745                         }
3746                     }
3747                     else { /* Here, not in a pattern.  Convert the name to a
3748                             * string. */
3749
3750                         if (PL_lex_inwhat == OP_TRANS) {
3751                             str = SvPV_const(res, len);
3752                             if (len > ((SvUTF8(res))
3753                                        ? UTF8SKIP(str)
3754                                        : 1U))
3755                             {
3756                                 yyerror(Perl_form(aTHX_
3757                                     "%.*s must not be a named sequence"
3758                                     " in transliteration operator",
3759                                         /*  +1 to include the "}" */
3760                                     (int) (e + 1 - start), start));
3761                                 goto end_backslash_N;
3762                             }
3763
3764                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
3765                                 has_above_latin1 = TRUE;
3766                             }
3767
3768                         }
3769                         else if (! SvUTF8(res)) {
3770                             /* Make sure \N{} return is UTF-8.  This is because
3771                              * \N{} implies Unicode semantics, and scalars have
3772                              * to be in utf8 to guarantee those semantics; but
3773                              * not needed in tr/// */
3774                             sv_utf8_upgrade_flags(res, 0);
3775                             str = SvPV_const(res, len);
3776                         }
3777
3778                          /* Upgrade destination to be utf8 if this new
3779                           * component is */
3780                         if (! has_utf8 && SvUTF8(res)) {
3781                             /* See Note on sizing above.  */
3782                             const STRLEN extra = len + (send - s) + 1;
3783
3784                             SvCUR_set(sv, d - SvPVX_const(sv));
3785                             SvPOK_on(sv);
3786                             *d = '\0';
3787
3788                             if (utf8_variant_count == 0) {
3789                                 SvUTF8_on(sv);
3790                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3791                             }
3792                             else {
3793                                 sv_utf8_upgrade_flags_grow(sv,
3794                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3795                                                 extra);
3796                                 d = SvPVX(sv) + SvCUR(sv);
3797                             }
3798                             has_utf8 = TRUE;
3799                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3800
3801                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3802                              * set correctly here). */
3803                             const STRLEN extra = len + (send - e) + 1;
3804                             const STRLEN off = d - SvPVX_const(sv);
3805                             d = off + SvGROW(sv, off + extra);
3806                         }
3807                         Copy(str, d, len, char);
3808                         d += len;
3809                     }
3810
3811                     SvREFCNT_dec(res);
3812
3813                 } /* End \N{NAME} */
3814
3815               end_backslash_N:
3816 #ifdef EBCDIC
3817                 backslash_N++; /* \N{} is defined to be Unicode */
3818 #endif
3819                 s = e + 1;  /* Point to just after the '}' */
3820                 continue;
3821
3822             /* \c is a control character */
3823             case 'c':
3824                 s++;
3825                 if (s < send) {
3826                     *d++ = grok_bslash_c(*s++, 1);
3827                 }
3828                 else {
3829                     yyerror("Missing control char name in \\c");
3830                 }
3831 #ifdef EBCDIC
3832                 non_portable_endpoint++;
3833 #endif
3834                 continue;
3835
3836             /* printf-style backslashes, formfeeds, newlines, etc */
3837             case 'b':
3838                 *d++ = '\b';
3839                 break;
3840             case 'n':
3841                 *d++ = '\n';
3842                 break;
3843             case 'r':
3844                 *d++ = '\r';
3845                 break;
3846             case 'f':
3847                 *d++ = '\f';
3848                 break;
3849             case 't':
3850                 *d++ = '\t';
3851                 break;
3852             case 'e':
3853                 *d++ = ESC_NATIVE;
3854                 break;
3855             case 'a':
3856                 *d++ = '\a';
3857                 break;
3858             } /* end switch */
3859
3860             s++;
3861             continue;
3862         } /* end if (backslash) */
3863
3864     default_action:
3865         /* Just copy the input to the output, though we may have to convert
3866          * to/from UTF-8.
3867          *
3868          * If the input has the same representation in UTF-8 as not, it will be
3869          * a single byte, and we don't care about UTF8ness; just copy the byte */
3870         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
3871             *d++ = *s++;
3872         }
3873         else if (! this_utf8 && ! has_utf8) {
3874             /* If neither source nor output is UTF-8, is also a single byte,
3875              * just copy it; but this byte counts should we later have to
3876              * convert to UTF-8 */
3877             *d++ = *s++;
3878             utf8_variant_count++;
3879         }
3880         else if (this_utf8 && has_utf8) {   /* Both UTF-8, can just copy */
3881             const STRLEN len = UTF8SKIP(s);
3882
3883             /* We expect the source to have already been checked for
3884              * malformedness */
3885             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
3886
3887             Copy(s, d, len, U8);
3888             d += len;
3889             s += len;
3890         }
3891         else { /* UTF8ness matters and doesn't match, need to convert */
3892             STRLEN len = 1;
3893             const UV nextuv   = (this_utf8)
3894                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3895                                 : (UV) ((U8) *s);
3896             STRLEN need = UVCHR_SKIP(nextuv);
3897
3898             if (!has_utf8) {
3899                 SvCUR_set(sv, d - SvPVX_const(sv));
3900                 SvPOK_on(sv);
3901                 *d = '\0';
3902
3903                 /* See Note on sizing above. */
3904                 need += (STRLEN)(send - s) + 1;
3905
3906                 if (utf8_variant_count == 0) {
3907                     SvUTF8_on(sv);
3908                     d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
3909                 }
3910                 else {
3911                     sv_utf8_upgrade_flags_grow(sv,
3912                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3913                                                need);
3914                     d = SvPVX(sv) + SvCUR(sv);
3915                 }
3916                 has_utf8 = TRUE;
3917             } else if (need > len) {
3918                 /* encoded value larger than old, may need extra space (NOTE:
3919                  * SvCUR() is not set correctly here).   See Note on sizing
3920                  * above.  */
3921                 const STRLEN extra = need + (send - s) + 1;
3922                 const STRLEN off = d - SvPVX_const(sv);
3923                 d = off + SvGROW(sv, off + extra);
3924             }
3925             s += len;
3926
3927             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3928         }
3929     } /* while loop to process each character */
3930
3931     /* terminate the string and set up the sv */
3932     *d = '\0';
3933     SvCUR_set(sv, d - SvPVX_const(sv));
3934     if (SvCUR(sv) >= SvLEN(sv))
3935         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
3936                    " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3937
3938     SvPOK_on(sv);
3939     if (has_utf8) {
3940         SvUTF8_on(sv);
3941         if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
3942             PL_parser->lex_sub_op->op_private |=
3943                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3944         }
3945     }
3946
3947     /* shrink the sv if we allocated more than we used */
3948     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3949         SvPV_shrink_to_cur(sv);
3950     }
3951
3952     /* return the substring (via pl_yylval) only if we parsed anything */
3953     if (s > start) {
3954         char *s2 = start;
3955         for (; s2 < s; s2++) {
3956             if (*s2 == '\n')
3957                 COPLINE_INC_WITH_HERELINES;
3958         }
3959         SvREFCNT_inc_simple_void_NN(sv);
3960         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3961             && ! PL_parser->lex_re_reparsing)
3962         {
3963             const char *const key = PL_lex_inpat ? "qr" : "q";
3964             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3965             const char *type;
3966             STRLEN typelen;
3967
3968             if (PL_lex_inwhat == OP_TRANS) {
3969                 type = "tr";
3970                 typelen = 2;
3971             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3972                 type = "s";
3973                 typelen = 1;
3974             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3975                 type = "q";
3976                 typelen = 1;
3977             } else  {
3978                 type = "qq";
3979                 typelen = 2;
3980             }
3981
3982             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3983                                 type, typelen);
3984         }
3985         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
3986     }
3987     LEAVE_with_name("scan_const");
3988     return s;
3989 }
3990
3991 /* S_intuit_more
3992  * Returns TRUE if there's more to the expression (e.g., a subscript),
3993  * FALSE otherwise.
3994  *
3995  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3996  *
3997  * ->[ and ->{ return TRUE
3998  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3999  * { and [ outside a pattern are always subscripts, so return TRUE
4000  * if we're outside a pattern and it's not { or [, then return FALSE
4001  * if we're in a pattern and the first char is a {
4002  *   {4,5} (any digits around the comma) returns FALSE
4003  * if we're in a pattern and the first char is a [
4004  *   [] returns FALSE
4005  *   [SOMETHING] has a funky algorithm to decide whether it's a
4006  *      character class or not.  It has to deal with things like
4007  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4008  * anything else returns TRUE
4009  */
4010
4011 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4012
4013 STATIC int
4014 S_intuit_more(pTHX_ char *s)
4015 {
4016     PERL_ARGS_ASSERT_INTUIT_MORE;
4017
4018     if (PL_lex_brackets)
4019         return TRUE;
4020     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4021         return TRUE;
4022     if (*s == '-' && s[1] == '>'
4023      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4024      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4025         ||(s[2] == '@' && strchr("*[{",s[3])) ))
4026         return TRUE;
4027     if (*s != '{' && *s != '[')
4028         return FALSE;
4029     if (!PL_lex_inpat)
4030         return TRUE;
4031
4032     /* In a pattern, so maybe we have {n,m}. */
4033     if (*s == '{') {
4034         if (regcurly(s)) {
4035             return FALSE;
4036         }
4037         return TRUE;
4038     }
4039
4040     /* On the other hand, maybe we have a character class */
4041
4042     s++;
4043     if (*s == ']' || *s == '^')
4044         return FALSE;
4045     else {
4046         /* this is terrifying, and it works */
4047         int weight;
4048         char seen[256];
4049         const char * const send = strchr(s,']');
4050         unsigned char un_char, last_un_char;
4051         char tmpbuf[sizeof PL_tokenbuf * 4];
4052
4053         if (!send)              /* has to be an expression */
4054             return TRUE;
4055         weight = 2;             /* let's weigh the evidence */
4056
4057         if (*s == '$')
4058             weight -= 3;
4059         else if (isDIGIT(*s)) {
4060             if (s[1] != ']') {
4061                 if (isDIGIT(s[1]) && s[2] == ']')
4062                     weight -= 10;
4063             }
4064             else
4065                 weight -= 100;
4066         }
4067         Zero(seen,256,char);
4068         un_char = 255;
4069         for (; s < send; s++) {
4070             last_un_char = un_char;
4071             un_char = (unsigned char)*s;
4072             switch (*s) {
4073             case '@':
4074             case '&':
4075             case '$':
4076                 weight -= seen[un_char] * 10;
4077                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4078                     int len;
4079                     char *tmp = PL_bufend;
4080                     PL_bufend = (char*)send;
4081                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4082                     PL_bufend = tmp;
4083                     len = (int)strlen(tmpbuf);
4084                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4085                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4086                         weight -= 100;
4087                     else
4088                         weight -= 10;
4089                 }
4090                 else if (*s == '$'
4091                          && s[1]
4092                          && strchr("[#!%*<>()-=",s[1]))
4093                 {
4094                     if (/*{*/ strchr("])} =",s[2]))
4095                         weight -= 10;
4096                     else
4097                         weight -= 1;
4098                 }
4099                 break;
4100             case '\\':
4101                 un_char = 254;
4102                 if (s[1]) {
4103                     if (strchr("wds]",s[1]))
4104                         weight += 100;
4105                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4106                         weight += 1;
4107                     else if (strchr("rnftbxcav",s[1]))
4108                         weight += 40;
4109                     else if (isDIGIT(s[1])) {
4110                         weight += 40;
4111                         while (s[1] && isDIGIT(s[1]))
4112                             s++;
4113                     }
4114                 }
4115                 else
4116                     weight += 100;
4117                 break;
4118             case '-':
4119                 if (s[1] == '\\')
4120                     weight += 50;
4121                 if (strchr("aA01! ",last_un_char))
4122                     weight += 30;
4123                 if (strchr("zZ79~",s[1]))
4124                     weight += 30;
4125                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4126                     weight -= 5;        /* cope with negative subscript */
4127                 break;
4128             default:
4129                 if (!isWORDCHAR(last_un_char)
4130                     && !(last_un_char == '$' || last_un_char == '@'
4131                          || last_un_char == '&')
4132                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4133                     char *d = s;
4134                     while (isALPHA(*s))
4135                         s++;
4136                     if (keyword(d, s - d, 0))
4137                         weight -= 150;
4138                 }
4139                 if (un_char == last_un_char + 1)
4140                     weight += 5;
4141                 weight -= seen[un_char];
4142                 break;
4143             }
4144             seen[un_char]++;
4145         }
4146         if (weight >= 0)        /* probably a character class */
4147             return FALSE;
4148     }
4149
4150     return TRUE;
4151 }
4152
4153 /*
4154  * S_intuit_method
4155  *
4156  * Does all the checking to disambiguate
4157  *   foo bar
4158  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4159  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4160  *
4161  * First argument is the stuff after the first token, e.g. "bar".
4162  *
4163  * Not a method if foo is a filehandle.
4164  * Not a method if foo is a subroutine prototyped to take a filehandle.
4165  * Not a method if it's really "Foo $bar"
4166  * Method if it's "foo $bar"
4167  * Not a method if it's really "print foo $bar"
4168  * Method if it's really "foo package::" (interpreted as package->foo)
4169  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4170  * Not a method if bar is a filehandle or package, but is quoted with
4171  *   =>
4172  */
4173
4174 STATIC int
4175 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4176 {
4177     char *s = start + (*start == '$');
4178     char tmpbuf[sizeof PL_tokenbuf];
4179     STRLEN len;
4180     GV* indirgv;
4181         /* Mustn't actually add anything to a symbol table.
4182            But also don't want to "initialise" any placeholder
4183            constants that might already be there into full
4184            blown PVGVs with attached PVCV.  */
4185     GV * const gv =
4186         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4187
4188     PERL_ARGS_ASSERT_INTUIT_METHOD;
4189
4190     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4191             return 0;
4192     if (cv && SvPOK(cv)) {
4193         const char *proto = CvPROTO(cv);
4194         if (proto) {
4195             while (*proto && (isSPACE(*proto) || *proto == ';'))
4196                 proto++;
4197             if (*proto == '*')
4198                 return 0;
4199         }
4200     }
4201
4202     if (*start == '$') {
4203         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4204             || isUPPER(*PL_tokenbuf))
4205             return 0;
4206         s = skipspace(s);
4207         PL_bufptr = start;
4208         PL_expect = XREF;
4209         return *s == '(' ? FUNCMETH : METHOD;
4210     }
4211
4212     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4213     /* start is the beginning of the possible filehandle/object,
4214      * and s is the end of it
4215      * tmpbuf is a copy of it (but with single quotes as double colons)
4216      */
4217
4218     if (!keyword(tmpbuf, len, 0)) {
4219         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4220             len -= 2;
4221             tmpbuf[len] = '\0';
4222             goto bare_package;
4223         }
4224         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4225                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4226                                     SVt_PVCV);
4227         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4228          && (!isGV(indirgv) || GvCVu(indirgv)))
4229             return 0;
4230         /* filehandle or package name makes it a method */
4231         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4232             s = skipspace(s);
4233             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4234                 return 0;       /* no assumptions -- "=>" quotes bareword */
4235       bare_package:
4236             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4237                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4238             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4239             PL_expect = XTERM;
4240             force_next(BAREWORD);
4241             PL_bufptr = s;
4242             return *s == '(' ? FUNCMETH : METHOD;
4243         }
4244     }
4245     return 0;
4246 }
4247
4248 /* Encoded script support. filter_add() effectively inserts a
4249  * 'pre-processing' function into the current source input stream.
4250  * Note that the filter function only applies to the current source file
4251  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4252  *
4253  * The datasv parameter (which may be NULL) can be used to pass
4254  * private data to this instance of the filter. The filter function
4255  * can recover the SV using the FILTER_DATA macro and use it to
4256  * store private buffers and state information.
4257  *
4258  * The supplied datasv parameter is upgraded to a PVIO type
4259  * and the IoDIRP/IoANY field is used to store the function pointer,
4260  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4261  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4262  * private use must be set using malloc'd pointers.
4263  */
4264
4265 SV *
4266 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4267 {
4268     if (!funcp)
4269         return NULL;
4270
4271     if (!PL_parser)
4272         return NULL;
4273
4274     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4275         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4276
4277     if (!PL_rsfp_filters)
4278         PL_rsfp_filters = newAV();
4279     if (!datasv)
4280         datasv = newSV(0);
4281     SvUPGRADE(datasv, SVt_PVIO);
4282     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4283     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4284     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4285                           FPTR2DPTR(void *, IoANY(datasv)),
4286                           SvPV_nolen(datasv)));
4287     av_unshift(PL_rsfp_filters, 1);
4288     av_store(PL_rsfp_filters, 0, datasv) ;
4289     if (
4290         !PL_parser->filtered
4291      && PL_parser->lex_flags & LEX_EVALBYTES
4292      && PL_bufptr < PL_bufend
4293     ) {
4294         const char *s = PL_bufptr;
4295         while (s < PL_bufend) {
4296             if (*s == '\n') {
4297                 SV *linestr = PL_parser->linestr;
4298                 char *buf = SvPVX(linestr);
4299                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4300                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4301                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4302                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4303                 STRLEN const last_uni_pos =
4304                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4305                 STRLEN const last_lop_pos =
4306                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4307                 av_push(PL_rsfp_filters, linestr);
4308                 PL_parser->linestr =
4309                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4310                 buf = SvPVX(PL_parser->linestr);
4311                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4312                 PL_parser->bufptr = buf + bufptr_pos;
4313                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4314                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4315                 PL_parser->linestart = buf + linestart_pos;
4316                 if (PL_parser->last_uni)
4317                     PL_parser->last_uni = buf + last_uni_pos;
4318                 if (PL_parser->last_lop)
4319                     PL_parser->last_lop = buf + last_lop_pos;
4320                 SvLEN(linestr) = SvCUR(linestr);
4321                 SvCUR(linestr) = s-SvPVX(linestr);
4322                 PL_parser->filtered = 1;
4323                 break;
4324             }
4325             s++;
4326         }
4327     }
4328     return(datasv);
4329 }
4330
4331
4332 /* Delete most recently added instance of this filter function. */
4333 void
4334 Perl_filter_del(pTHX_ filter_t funcp)
4335 {
4336     SV *datasv;
4337
4338     PERL_ARGS_ASSERT_FILTER_DEL;
4339
4340 #ifdef DEBUGGING
4341     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4342                           FPTR2DPTR(void*, funcp)));
4343 #endif
4344     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4345         return;
4346     /* if filter is on top of stack (usual case) just pop it off */
4347     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4348     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4349         sv_free(av_pop(PL_rsfp_filters));
4350
4351         return;
4352     }
4353     /* we need to search for the correct entry and clear it     */
4354     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4355 }
4356
4357
4358 /* Invoke the idxth filter function for the current rsfp.        */
4359 /* maxlen 0 = read one text line */
4360 I32
4361 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4362 {
4363     filter_t funcp;
4364     SV *datasv = NULL;
4365     /* This API is bad. It should have been using unsigned int for maxlen.
4366        Not sure if we want to change the API, but if not we should sanity
4367        check the value here.  */
4368     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4369
4370     PERL_ARGS_ASSERT_FILTER_READ;
4371
4372     if (!PL_parser || !PL_rsfp_filters)
4373         return -1;
4374     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4375         /* Provide a default input filter to make life easy.    */
4376         /* Note that we append to the line. This is handy.      */
4377         DEBUG_P(PerlIO_printf(Perl_debug_log,
4378                               "filter_read %d: from rsfp\n", idx));
4379         if (correct_length) {
4380             /* Want a block */
4381             int len ;
4382             const int old_len = SvCUR(buf_sv);
4383
4384             /* ensure buf_sv is large enough */
4385             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4386             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4387                                    correct_length)) <= 0) {
4388                 if (PerlIO_error(PL_rsfp))
4389                     return -1;          /* error */
4390                 else
4391                     return 0 ;          /* end of file */
4392             }
4393             SvCUR_set(buf_sv, old_len + len) ;
4394             SvPVX(buf_sv)[old_len + len] = '\0';
4395         } else {
4396             /* Want a line */
4397             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4398                 if (PerlIO_error(PL_rsfp))
4399                     return -1;          /* error */
4400                 else
4401                     return 0 ;          /* end of file */
4402             }
4403         }
4404         return SvCUR(buf_sv);
4405     }
4406     /* Skip this filter slot if filter has been deleted */
4407     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4408         DEBUG_P(PerlIO_printf(Perl_debug_log,
4409                               "filter_read %d: skipped (filter deleted)\n",
4410                               idx));
4411         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4412     }
4413     if (SvTYPE(datasv) != SVt_PVIO) {
4414         if (correct_length) {
4415             /* Want a block */
4416             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4417             if (!remainder) return 0; /* eof */
4418             if (correct_length > remainder) correct_length = remainder;
4419             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4420             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4421         } else {
4422             /* Want a line */
4423             const char *s = SvEND(datasv);
4424             const char *send = SvPVX(datasv) + SvLEN(datasv);
4425             while (s < send) {
4426                 if (*s == '\n') {
4427                     s++;
4428                     break;
4429                 }
4430                 s++;
4431             }
4432             if (s == send) return 0; /* eof */
4433             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4434             SvCUR_set(datasv, s-SvPVX(datasv));
4435         }
4436         return SvCUR(buf_sv);
4437     }
4438     /* Get function pointer hidden within datasv        */
4439     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4440     DEBUG_P(PerlIO_printf(Perl_debug_log,
4441                           "filter_read %d: via function %p (%s)\n",
4442                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4443     /* Call function. The function is expected to       */
4444     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4445     /* Return: <0:error, =0:eof, >0:not eof             */
4446     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4447 }
4448
4449 STATIC char *
4450 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4451 {
4452     PERL_ARGS_ASSERT_FILTER_GETS;
4453
4454 #ifdef PERL_CR_FILTER
4455     if (!PL_rsfp_filters) {
4456         filter_add(S_cr_textfilter,NULL);
4457     }
4458 #endif
4459     if (PL_rsfp_filters) {
4460         if (!append)
4461             SvCUR_set(sv, 0);   /* start with empty line        */
4462         if (FILTER_READ(0, sv, 0) > 0)
4463             return ( SvPVX(sv) ) ;
4464         else
4465             return NULL ;
4466     }
4467     else
4468         return (sv_gets(sv, PL_rsfp, append));
4469 }
4470
4471 STATIC HV *
4472 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4473 {
4474     GV *gv;
4475
4476     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4477
4478     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4479         return PL_curstash;
4480
4481     if (len > 2
4482         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4483         && (gv = gv_fetchpvn_flags(pkgname,
4484                                    len,
4485                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4486     {
4487         return GvHV(gv);                        /* Foo:: */
4488     }
4489
4490     /* use constant CLASS => 'MyClass' */
4491     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4492     if (gv && GvCV(gv)) {
4493         SV * const sv = cv_const_sv(GvCV(gv));
4494         if (sv)
4495             return gv_stashsv(sv, 0);
4496     }
4497
4498     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4499 }
4500
4501
4502 STATIC char *
4503 S_tokenize_use(pTHX_ int is_use, char *s) {
4504     PERL_ARGS_ASSERT_TOKENIZE_USE;
4505
4506     if (PL_expect != XSTATE)
4507         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4508                     is_use ? "use" : "no"));
4509     PL_expect = XTERM;
4510     s = skipspace(s);
4511     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4512         s = force_version(s, TRUE);
4513         if (*s == ';' || *s == '}'
4514                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4515             NEXTVAL_NEXTTOKE.opval = NULL;
4516             force_next(BAREWORD);
4517         }
4518         else if (*s == 'v') {
4519             s = force_word(s,BAREWORD,FALSE,TRUE);
4520             s = force_version(s, FALSE);
4521         }
4522     }
4523     else {
4524         s = force_word(s,BAREWORD,FALSE,TRUE);
4525         s = force_version(s, FALSE);
4526     }
4527     pl_yylval.ival = is_use;
4528     return s;
4529 }
4530 #ifdef DEBUGGING
4531     static const char* const exp_name[] =
4532         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4533           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4534           "SIGVAR", "TERMORDORDOR"
4535         };
4536 #endif
4537
4538 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4539 STATIC bool
4540 S_word_takes_any_delimiter(char *p, STRLEN len)
4541 {
4542     return (len == 1 && strchr("msyq", p[0]))
4543             || (len == 2
4544                 && ((p[0] == 't' && p[1] == 'r')
4545                     || (p[0] == 'q' && strchr("qwxr", p[1]))));
4546 }
4547
4548 static void
4549 S_check_scalar_slice(pTHX_ char *s)
4550 {
4551     s++;
4552     while (SPACE_OR_TAB(*s)) s++;
4553     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4554                                                              PL_bufend,
4555                                                              UTF))
4556     {
4557         return;
4558     }
4559     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4560            || (*s && strchr(" \t$#+-'\"", *s)))
4561     {
4562         s += UTF ? UTF8SKIP(s) : 1;
4563     }
4564     if (*s == '}' || *s == ']')
4565         pl_yylval.ival = OPpSLICEWARNING;
4566 }
4567
4568 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4569 static void
4570 S_lex_token_boundary(pTHX)
4571 {
4572     PL_oldoldbufptr = PL_oldbufptr;
4573     PL_oldbufptr = PL_bufptr;
4574 }
4575
4576 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4577 static char *
4578 S_vcs_conflict_marker(pTHX_ char *s)
4579 {
4580     lex_token_boundary();
4581     PL_bufptr = s;
4582     yyerror("Version control conflict marker");
4583     while (s < PL_bufend && *s != '\n')
4584         s++;
4585     return s;
4586 }
4587
4588 /*
4589   yylex
4590
4591   Works out what to call the token just pulled out of the input
4592   stream.  The yacc parser takes care of taking the ops we return and
4593   stitching them into a tree.
4594
4595   Returns:
4596     The type of the next token
4597
4598   Structure:
4599       Check if we have already built the token; if so, use it.
4600       Switch based on the current state:
4601           - if we have a case modifier in a string, deal with that
4602           - handle other cases of interpolation inside a string
4603           - scan the next line if we are inside a format
4604       In the normal state, switch on the next character:
4605           - default:
4606             if alphabetic, go to key lookup
4607             unrecognized character - croak
4608           - 0/4/26: handle end-of-line or EOF
4609           - cases for whitespace
4610           - \n and #: handle comments and line numbers
4611           - various operators, brackets and sigils
4612           - numbers
4613           - quotes
4614           - 'v': vstrings (or go to key lookup)
4615           - 'x' repetition operator (or go to key lookup)
4616           - other ASCII alphanumerics (key lookup begins here):
4617               word before => ?
4618               keyword plugin
4619               scan built-in keyword (but do nothing with it yet)
4620               check for statement label
4621               check for lexical subs
4622                   goto just_a_word if there is one
4623               see whether built-in keyword is overridden
4624               switch on keyword number:
4625                   - default: just_a_word:
4626                       not a built-in keyword; handle bareword lookup
4627                       disambiguate between method and sub call
4628                       fall back to bareword
4629                   - cases for built-in keywords
4630 */
4631
4632
4633 int
4634 Perl_yylex(pTHX)
4635 {
4636     dVAR;
4637     char *s = PL_bufptr;
4638     char *d;
4639     STRLEN len;
4640     bool bof = FALSE;
4641     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4642     U8 formbrack = 0;
4643     U32 fake_eof = 0;
4644
4645     /* orig_keyword, gvp, and gv are initialized here because
4646      * jump to the label just_a_word_zero can bypass their
4647      * initialization later. */
4648     I32 orig_keyword = 0;
4649     GV *gv = NULL;
4650     GV **gvp = NULL;
4651
4652     DEBUG_T( {
4653         SV* tmp = newSVpvs("");
4654         PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
4655             (IV)CopLINE(PL_curcop),
4656             lex_state_names[PL_lex_state],
4657             exp_name[PL_expect],
4658             pv_display(tmp, s, strlen(s), 0, 60));
4659         SvREFCNT_dec(tmp);
4660     } );
4661
4662     /* when we've already built the next token, just pull it out of the queue */
4663     if (PL_nexttoke) {
4664         PL_nexttoke--;
4665         pl_yylval = PL_nextval[PL_nexttoke];
4666         {
4667             I32 next_type;
4668             next_type = PL_nexttype[PL_nexttoke];
4669             if (next_type & (7<<24)) {
4670                 if (next_type & (1<<24)) {
4671                     if (PL_lex_brackets > 100)
4672                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4673                     PL_lex_brackstack[PL_lex_brackets++] =
4674                         (char) ((next_type >> 16) & 0xff);
4675                 }
4676                 if (next_type & (2<<24))
4677                     PL_lex_allbrackets++;
4678                 if (next_type & (4<<24))
4679                     PL_lex_allbrackets--;
4680                 next_type &= 0xffff;
4681             }
4682             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4683         }
4684     }
4685
4686     switch (PL_lex_state) {
4687     case LEX_NORMAL:
4688     case LEX_INTERPNORMAL:
4689         break;
4690
4691     /* interpolated case modifiers like \L \U, including \Q and \E.
4692        when we get here, PL_bufptr is at the \
4693     */
4694     case LEX_INTERPCASEMOD:
4695 #ifdef DEBUGGING
4696         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4697             Perl_croak(aTHX_
4698                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4699                        PL_bufptr, PL_bufend, *PL_bufptr);
4700 #endif
4701         /* handle \E or end of string */
4702         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4703             /* if at a \E */
4704             if (PL_lex_casemods) {
4705                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4706                 PL_lex_casestack[PL_lex_casemods] = '\0';
4707
4708                 if (PL_bufptr != PL_bufend
4709                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4710                         || oldmod == 'F')) {
4711                     PL_bufptr += 2;
4712                     PL_lex_state = LEX_INTERPCONCAT;
4713                 }
4714                 PL_lex_allbrackets--;
4715                 return REPORT(')');
4716             }
4717             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4718                /* Got an unpaired \E */
4719                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4720                         "Useless use of \\E");
4721             }
4722             if (PL_bufptr != PL_bufend)
4723                 PL_bufptr += 2;
4724             PL_lex_state = LEX_INTERPCONCAT;
4725             return yylex();
4726         }
4727         else {
4728             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4729               "### Saw case modifier\n"); });
4730             s = PL_bufptr + 1;
4731             if (s[1] == '\\' && s[2] == 'E') {
4732                 PL_bufptr = s + 3;
4733                 PL_lex_state = LEX_INTERPCONCAT;
4734                 return yylex();
4735             }
4736             else {
4737                 I32 tmp;
4738                 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4739                     tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
4740                 if ((*s == 'L' || *s == 'U' || *s == 'F')
4741                     && (strpbrk(PL_lex_casestack, "LUF")))
4742                 {
4743                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4744                     PL_lex_allbrackets--;
4745                     return REPORT(')');
4746                 }
4747                 if (PL_lex_casemods > 10)
4748                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4749                 PL_lex_casestack[PL_lex_casemods++] = *s;
4750                 PL_lex_casestack[PL_lex_casemods] = '\0';
4751                 PL_lex_state = LEX_INTERPCONCAT;
4752                 NEXTVAL_NEXTTOKE.ival = 0;
4753                 force_next((2<<24)|'(');
4754                 if (*s == 'l')
4755                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4756                 else if (*s == 'u')
4757                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4758                 else if (*s == 'L')
4759                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4760                 else if (*s == 'U')
4761                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4762                 else if (*s == 'Q')
4763                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4764                 else if (*s == 'F')
4765                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4766                 else
4767                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4768                 PL_bufptr = s + 1;
4769             }
4770             force_next(FUNC);
4771             if (PL_lex_starts) {
4772                 s = PL_bufptr;
4773                 PL_lex_starts = 0;
4774                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4775                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4776                     TOKEN(',');
4777                 else
4778                     AopNOASSIGN(OP_CONCAT);
4779             }
4780             else
4781                 return yylex();
4782         }
4783
4784     case LEX_INTERPPUSH:
4785         return REPORT(sublex_push());
4786
4787     case LEX_INTERPSTART:
4788         if (PL_bufptr == PL_bufend)
4789             return REPORT(sublex_done());
4790         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4791               "### Interpolated variable\n"); });
4792         PL_expect = XTERM;
4793         /* for /@a/, we leave the joining for the regex engine to do
4794          * (unless we're within \Q etc) */
4795         PL_lex_dojoin = (*PL_bufptr == '@'
4796                             && (!PL_lex_inpat || PL_lex_casemods));
4797         PL_lex_state = LEX_INTERPNORMAL;
4798         if (PL_lex_dojoin) {
4799             NEXTVAL_NEXTTOKE.ival = 0;
4800             force_next(',');
4801             force_ident("\"", '$');
4802             NEXTVAL_NEXTTOKE.ival = 0;
4803             force_next('$');
4804             NEXTVAL_NEXTTOKE.ival = 0;
4805             force_next((2<<24)|'(');
4806             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4807             force_next(FUNC);
4808         }
4809         /* Convert (?{...}) and friends to 'do {...}' */
4810         if (PL_lex_inpat && *PL_bufptr == '(') {
4811             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4812             PL_bufptr += 2;
4813             if (*PL_bufptr != '{')
4814                 PL_bufptr++;
4815             PL_expect = XTERMBLOCK;
4816             force_next(DO);
4817         }
4818
4819         if (PL_lex_starts++) {
4820             s = PL_bufptr;
4821             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4822             if (!PL_lex_casemods && PL_lex_inpat)
4823                 TOKEN(',');
4824             else
4825                 AopNOASSIGN(OP_CONCAT);
4826         }
4827         return yylex();
4828
4829     case LEX_INTERPENDMAYBE:
4830         if (intuit_more(PL_bufptr)) {
4831             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4832             break;
4833         }
4834         /* FALLTHROUGH */
4835
4836     case LEX_INTERPEND:
4837         if (PL_lex_dojoin) {
4838             const U8 dojoin_was = PL_lex_dojoin;
4839             PL_lex_dojoin = FALSE;
4840             PL_lex_state = LEX_INTERPCONCAT;
4841             PL_lex_allbrackets--;
4842             return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
4843         }
4844         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4845             && SvEVALED(PL_lex_repl))
4846         {
4847             if (PL_bufptr != PL_bufend)
4848                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4849             PL_lex_repl = NULL;
4850         }
4851         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4852            re_eval_str.  If the here-doc body’s length equals the previous
4853            value of re_eval_start, re_eval_start will now be null.  So
4854            check re_eval_str as well. */
4855         if (PL_parser->lex_shared->re_eval_start
4856          || PL_parser->lex_shared->re_eval_str) {
4857             SV *sv;
4858             if (*PL_bufptr != ')')
4859                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4860             PL_bufptr++;
4861             /* having compiled a (?{..}) expression, return the original
4862              * text too, as a const */
4863             if (PL_parser->lex_shared->re_eval_str) {
4864                 sv = PL_parser->lex_shared->re_eval_str;
4865                 PL_parser->lex_shared->re_eval_str = NULL;
4866                 SvCUR_set(sv,
4867                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4868                 SvPV_shrink_to_cur(sv);
4869             }
4870             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4871                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4872             NEXTVAL_NEXTTOKE.opval =
4873                     newSVOP(OP_CONST, 0,
4874                                  sv);
4875             force_next(THING);
4876             PL_parser->lex_shared->re_eval_start = NULL;
4877             PL_expect = XTERM;
4878             return REPORT(',');
4879         }
4880
4881         /* FALLTHROUGH */
4882     case LEX_INTERPCONCAT:
4883 #ifdef DEBUGGING
4884         if (PL_lex_brackets)
4885             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4886                        (long) PL_lex_brackets);
4887 #endif
4888         if (PL_bufptr == PL_bufend)
4889             return REPORT(sublex_done());
4890
4891         /* m'foo' still needs to be parsed for possible (?{...}) */
4892         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4893             SV *sv = newSVsv(PL_linestr);
4894             sv = tokeq(sv);
4895             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4896             s = PL_bufend;
4897         }
4898         else {
4899             s = scan_const(PL_bufptr);
4900             if (*s == '\\')
4901                 PL_lex_state = LEX_INTERPCASEMOD;
4902             else
4903                 PL_lex_state = LEX_INTERPSTART;
4904         }
4905
4906         if (s != PL_bufptr) {
4907             NEXTVAL_NEXTTOKE = pl_yylval;
4908             PL_expect = XTERM;
4909             force_next(THING);
4910             if (PL_lex_starts++) {
4911                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4912                 if (!PL_lex_casemods && PL_lex_inpat)
4913                     TOKEN(',');
4914                 else
4915                     AopNOASSIGN(OP_CONCAT);
4916             }
4917             else {
4918                 PL_bufptr = s;
4919                 return yylex();
4920             }
4921         }
4922
4923         return yylex();
4924     case LEX_FORMLINE:
4925         s = scan_formline(PL_bufptr);
4926         if (!PL_lex_formbrack)
4927         {
4928             formbrack = 1;
4929             goto rightbracket;
4930         }
4931         PL_bufptr = s;
4932         return yylex();
4933     }
4934
4935     /* We really do *not* want PL_linestr ever becoming a COW. */
4936     assert (!SvIsCOW(PL_linestr));
4937     s = PL_bufptr;
4938     PL_oldoldbufptr = PL_oldbufptr;
4939     PL_oldbufptr = s;
4940     PL_parser->saw_infix_sigil = 0;
4941
4942     if (PL_in_my == KEY_sigvar) {
4943         /* we expect the sigil and optional var name part of a
4944          * signature element here. Since a '$' is not necessarily
4945          * followed by a var name, handle it specially here; the general
4946          * yylex code would otherwise try to interpret whatever follows
4947          * as a var; e.g. ($, ...) would be seen as the var '$,'
4948          */
4949
4950         char sigil;
4951
4952         s = skipspace(s);
4953         sigil = *s++;
4954         PL_bufptr = s; /* for error reporting */
4955         switch (sigil) {
4956         case '$':
4957         case '@':
4958         case '%':
4959             /* spot stuff that looks like an prototype */
4960             if (strchr("$:@%&*;\\[]", *s)) {
4961                 yyerror("Illegal character following sigil in a subroutine signature");
4962                 break;
4963             }
4964             /* '$#' is banned, while '$ # comment' isn't */
4965             if (*s == '#') {
4966                 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
4967                 break;
4968             }
4969             s = skipspace(s);
4970             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
4971                 char *dest = PL_tokenbuf + 1;
4972                 /* read var name, including sigil, into PL_tokenbuf */
4973                 PL_tokenbuf[0] = sigil;
4974                 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
4975                     0, cBOOL(UTF), FALSE);
4976                 *dest = '\0';
4977                 assert(PL_tokenbuf[1]); /* we have a variable name */
4978                 NEXTVAL_NEXTTOKE.ival = sigil;
4979                 force_next('p'); /* force a signature pending identifier */
4980             }
4981             else
4982                 PL_in_my = 0;
4983             PL_expect = XOPERATOR;
4984             break;
4985
4986         case ')':
4987             PL_expect = XBLOCK;
4988             break;
4989         case ',': /* handle ($a,,$b) */
4990             break;
4991
4992         default:
4993             PL_in_my = 0;
4994             yyerror("A signature parameter must start with '$', '@' or '%'");
4995             /* very crude error recovery: skip to likely next signature
4996              * element */
4997             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4998                 s++;
4999             break;
5000         }
5001         TOKEN(sigil);
5002     }
5003
5004   retry:
5005     switch (*s) {
5006     default:
5007         if (UTF) {
5008             if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
5009                 _force_out_malformed_utf8_message((U8 *) s, (U8 *) PL_bufend,
5010                                                   0,
5011                                                   1 /* 1 means die */ );
5012                 NOT_REACHED; /* NOTREACHED */
5013             }
5014             if (isIDFIRST_utf8_safe(s, PL_bufend)) {
5015                 goto keylookup;
5016             }
5017         }
5018         else if (isALNUMC(*s)) {
5019             goto keylookup;
5020         }
5021     {
5022         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5023         const char *c;
5024         if (UTF) {
5025             STRLEN skiplen = UTF8SKIP(s);
5026             STRLEN stravail = PL_bufend - s;
5027             c = sv_uni_display(dsv, newSVpvn_flags(s,
5028                                                    skiplen > stravail ? stravail : skiplen,
5029                                                    SVs_TEMP | SVf_UTF8),
5030                                10, UNI_DISPLAY_ISPRINT);
5031         }
5032         else {
5033             c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5034         }
5035         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5036         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5037             d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)PL_linestart) : s - UNRECOGNIZED_PRECEDE_COUNT;
5038         } else {
5039             d = PL_linestart;
5040         }
5041         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
5042                           UTF8fARG(UTF, (s - d), d),
5043                          (int) len + 1);
5044     }
5045     case 4:
5046     case 26:
5047         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
5048     case 0:
5049         if ((!PL_rsfp || PL_lex_inwhat)
5050          && (!PL_parser->filtered || s+1 < PL_bufend)) {
5051             PL_last_uni = 0;
5052             PL_last_lop = 0;
5053             if (PL_lex_brackets
5054                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
5055             {
5056                 yyerror((const char *)
5057                         (PL_lex_formbrack
5058                          ? "Format not terminated"
5059                          : "Missing right curly or square bracket"));
5060             }
5061             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5062                         "### Tokener got EOF\n");
5063             } );
5064             TOKEN(0);
5065         }
5066         if (s++ < PL_bufend)
5067             goto retry;                 /* ignore stray nulls */
5068         PL_last_uni = 0;
5069         PL_last_lop = 0;
5070         if (!PL_in_eval && !PL_preambled) {
5071             PL_preambled = TRUE;
5072             if (PL_perldb) {
5073                 /* Generate a string of Perl code to load the debugger.
5074                  * If PERL5DB is set, it will return the contents of that,
5075                  * otherwise a compile-time require of perl5db.pl.  */
5076
5077                 const char * const pdb = PerlEnv_getenv("PERL5DB");
5078
5079                 if (pdb) {
5080                     sv_setpv(PL_linestr, pdb);
5081                     sv_catpvs(PL_linestr,";");
5082                 } else {
5083                     SETERRNO(0,SS_NORMAL);
5084                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5085                 }
5086                 PL_parser->preambling = CopLINE(PL_curcop);
5087             } else
5088                 SvPVCLEAR(PL_linestr);
5089             if (PL_preambleav) {
5090                 SV **svp = AvARRAY(PL_preambleav);
5091                 SV **const end = svp + AvFILLp(PL_preambleav);
5092                 while(svp <= end) {
5093                     sv_catsv(PL_linestr, *svp);
5094                     ++svp;
5095                     sv_catpvs(PL_linestr, ";");
5096                 }
5097                 sv_free(MUTABLE_SV(PL_preambleav));
5098                 PL_preambleav = NULL;
5099             }
5100             if (PL_minus_E)
5101                 sv_catpvs(PL_linestr,
5102                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5103             if (PL_minus_n || PL_minus_p) {
5104                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5105                 if (PL_minus_l)
5106                     sv_catpvs(PL_linestr,"chomp;");
5107                 if (PL_minus_a) {
5108                     if (PL_minus_F) {
5109                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5110                              || *PL_splitstr == '"')
5111                               && strchr(PL_splitstr + 1, *PL_splitstr))
5112                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5113                         else {
5114                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5115                                bytes can be used as quoting characters.  :-) */
5116                             const char *splits = PL_splitstr;
5117                             sv_catpvs(PL_linestr, "our @F=split(q\0");
5118                             do {
5119                                 /* Need to \ \s  */
5120                                 if (*splits == '\\')
5121                                     sv_catpvn(PL_linestr, splits, 1);
5122                                 sv_catpvn(PL_linestr, splits, 1);
5123                             } while (*splits++);
5124                             /* This loop will embed the trailing NUL of
5125                                PL_linestr as the last thing it does before
5126                                terminating.  */
5127                             sv_catpvs(PL_linestr, ");");
5128                         }
5129                     }
5130                     else
5131                         sv_catpvs(PL_linestr,"our @F=split(' ');");
5132                 }
5133             }
5134             sv_catpvs(PL_linestr, "\n");
5135             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5136             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5137             PL_last_lop = PL_last_uni = NULL;
5138             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
5139                 update_debugger_info(PL_linestr, NULL, 0);
5140             goto retry;
5141         }
5142         do {
5143             fake_eof = 0;
5144             bof = PL_rsfp ? TRUE : FALSE;
5145             if (0) {
5146               fake_eof:
5147                 fake_eof = LEX_FAKE_EOF;
5148             }
5149             PL_bufptr = PL_bufend;
5150             COPLINE_INC_WITH_HERELINES;
5151             if (!lex_next_chunk(fake_eof)) {
5152                 CopLINE_dec(PL_curcop);
5153                 s = PL_bufptr;
5154                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5155             }
5156             CopLINE_dec(PL_curcop);
5157             s = PL_bufptr;
5158             /* If it looks like the start of a BOM or raw UTF-16,
5159              * check if it in fact is. */
5160             if (bof && PL_rsfp
5161                 && (*s == 0
5162                     || *(U8*)s == BOM_UTF8_FIRST_BYTE
5163                         || *(U8*)s >= 0xFE
5164                         || s[1] == 0))
5165             {
5166                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5167                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5168 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5169                 /* offset may include swallowed CR */
5170                 if (!bof)
5171                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5172 #endif
5173                 if (bof) {
5174                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5175                     s = swallow_bom((U8*)s);
5176                 }
5177             }
5178             if (PL_parser->in_pod) {
5179                 /* Incest with pod. */
5180                 if (*s == '=' && strEQs(s, "=cut") && !isALPHA(s[4])) {
5181                     SvPVCLEAR(PL_linestr);
5182                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5183                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5184                     PL_last_lop = PL_last_uni = NULL;
5185                     PL_parser->in_pod = 0;
5186                 }
5187             }
5188             if (PL_rsfp || PL_parser->filtered)
5189                 incline(s);
5190         } while (PL_parser->in_pod);
5191         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5192         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5193         PL_last_lop = PL_last_uni = NULL;
5194         if (CopLINE(PL_curcop) == 1) {
5195             while (s < PL_bufend && isSPACE(*s))
5196                 s++;
5197             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5198                 s++;
5199             d = NULL;
5200             if (!PL_in_eval) {
5201                 if (*s == '#' && *(s+1) == '!')
5202                     d = s + 2;
5203 #ifdef ALTERNATE_SHEBANG
5204                 else {
5205                     static char const as[] = ALTERNATE_SHEBANG;
5206                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5207                         d = s + (sizeof(as) - 1);
5208                 }
5209 #endif /* ALTERNATE_SHEBANG */
5210             }
5211             if (d) {
5212                 char *ipath;
5213                 char *ipathend;
5214
5215                 while (isSPACE(*d))
5216                     d++;
5217                 ipath = d;
5218                 while (*d && !isSPACE(*d))
5219                     d++;
5220                 ipathend = d;
5221
5222 #ifdef ARG_ZERO_IS_SCRIPT
5223                 if (ipathend > ipath) {
5224                     /*
5225                      * HP-UX (at least) sets argv[0] to the script name,
5226                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5227                      * at least, set argv[0] to the basename of the Perl
5228                      * interpreter. So, having found "#!", we'll set it right.
5229                      */
5230                     SV* copfilesv = CopFILESV(PL_curcop);
5231                     if (copfilesv) {
5232                         SV * const x =
5233                             GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5234                                              SVt_PV)); /* $^X */
5235                         assert(SvPOK(x) || SvGMAGICAL(x));
5236                         if (sv_eq(x, copfilesv)) {
5237                             sv_setpvn(x, ipath, ipathend - ipath);
5238                             SvSETMAGIC(x);
5239                         }
5240                         else {
5241                             STRLEN blen;
5242                             STRLEN llen;
5243                             const char *bstart = SvPV_const(copfilesv, blen);
5244                             const char * const lstart = SvPV_const(x, llen);
5245                             if (llen < blen) {
5246                                 bstart += blen - llen;
5247                                 if (strnEQ(bstart, lstart, llen) &&     bstart[-1] == '/') {
5248                                     sv_setpvn(x, ipath, ipathend - ipath);
5249                                     SvSETMAGIC(x);
5250                                 }
5251                             }
5252                         }
5253                     }
5254                     else {
5255                         /* Anything to do if no copfilesv? */
5256                     }
5257                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5258                 }
5259 #endif /* ARG_ZERO_IS_SCRIPT */
5260
5261                 /*
5262                  * Look for options.
5263                  */
5264                 d = instr(s,"perl -");
5265                 if (!d) {
5266                     d = instr(s,"perl");
5267                     if (d && d[4] == '6')
5268                         d = NULL;
5269 #if defined(DOSISH)
5270                     /* avoid getting into infinite loops when shebang
5271                      * line contains "Perl" rather than "perl" */
5272                     if (!d) {
5273                         for (d = ipathend-4; d >= ipath; --d) {
5274                             if (isALPHA_FOLD_EQ(*d, 'p')
5275                                 && !ibcmp(d, "perl", 4))
5276                             {
5277                                 break;
5278                             }
5279                         }
5280                         if (d < ipath)
5281                             d = NULL;
5282                     }
5283 #endif
5284                 }
5285 #ifdef ALTERNATE_SHEBANG
5286                 /*
5287                  * If the ALTERNATE_SHEBANG on this system starts with a
5288                  * character that can be part of a Perl expression, then if
5289                  * we see it but not "perl", we're probably looking at the
5290                  * start of Perl code, not a request to hand off to some
5291                  * other interpreter.  Similarly, if "perl" is there, but
5292                  * not in the first 'word' of the line, we assume the line
5293                  * contains the start of the Perl program.
5294                  */
5295                 if (d && *s != '#') {
5296                     const char *c = ipath;
5297                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5298                         c++;
5299                     if (c < d)
5300                         d = NULL;       /* "perl" not in first word; ignore */
5301                     else
5302                         *s = '#';       /* Don't try to parse shebang line */
5303                 }
5304 #endif /* ALTERNATE_SHEBANG */
5305                 if (!d
5306                     && *s == '#'
5307                     && ipathend > ipath
5308                     && !PL_minus_c
5309                     && !instr(s,"indir")
5310                     && instr(PL_origargv[0],"perl"))
5311                 {
5312                     dVAR;
5313                     char **newargv;
5314
5315                     *ipathend = '\0';
5316                     s = ipathend + 1;
5317                     while (s < PL_bufend && isSPACE(*s))
5318                         s++;
5319                     if (s < PL_bufend) {
5320                         Newx(newargv,PL_origargc+3,char*);
5321                         newargv[1] = s;
5322                         while (s < PL_bufend && !isSPACE(*s))
5323                             s++;
5324                         *s = '\0';
5325                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5326                     }
5327                     else
5328                         newargv = PL_origargv;
5329                     newargv[0] = ipath;
5330                     PERL_FPU_PRE_EXEC
5331                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5332                     PERL_FPU_POST_EXEC
5333                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5334                 }
5335                 if (d) {
5336                     while (*d && !isSPACE(*d))
5337                         d++;
5338                     while (SPACE_OR_TAB(*d))
5339                         d++;
5340
5341                     if (*d++ == '-') {
5342                         const bool switches_done = PL_doswitches;
5343                         const U32 oldpdb = PL_perldb;
5344                         const bool oldn = PL_minus_n;
5345                         const bool oldp = PL_minus_p;
5346                         const char *d1 = d;
5347
5348                         do {
5349                             bool baduni = FALSE;
5350                             if (*d1 == 'C') {
5351                                 const char *d2 = d1 + 1;
5352                                 if (parse_unicode_opts((const char **)&d2)
5353                                     != PL_unicode)
5354                                     baduni = TRUE;
5355                             }
5356                             if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5357                                 const char * const m = d1;
5358                                 while (*d1 && !isSPACE(*d1))
5359                                     d1++;
5360                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5361                                       (int)(d1 - m), m);
5362                             }
5363                             d1 = moreswitches(d1);
5364                         } while (d1);
5365                         if (PL_doswitches && !switches_done) {
5366                             int argc = PL_origargc;
5367                             char **argv = PL_origargv;
5368                             do {
5369                                 argc--,argv++;
5370                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5371                             init_argv_symbols(argc,argv);
5372                         }
5373                         if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5374                             || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5375                               /* if we have already added "LINE: while (<>) {",
5376                                  we must not do it again */
5377                         {
5378                             SvPVCLEAR(PL_linestr);
5379                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5380                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5381                             PL_last_lop = PL_last_uni = NULL;
5382                             PL_preambled = FALSE;
5383                             if (PERLDB_LINE_OR_SAVESRC)
5384                                 (void)gv_fetchfile(PL_origfilename);
5385                             goto retry;
5386                         }
5387                     }
5388                 }
5389             }
5390         }
5391         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5392             PL_lex_state = LEX_FORMLINE;
5393             force_next(FORMRBRACK);
5394             TOKEN(';');
5395         }
5396         goto retry;
5397     case '\r':
5398 #ifdef PERL_STRICT_CR
5399         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5400         Perl_croak(aTHX_
5401       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5402 #endif
5403     case ' ': case '\t': case '\f': case '\v':
5404         s++;
5405         goto retry;
5406     case '#':
5407     case '\n':
5408         if (PL_lex_state != LEX_NORMAL
5409             || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5410         {
5411             const bool in_comment = *s == '#';
5412             if (*s == '#' && s == PL_linestart && PL_in_eval
5413              && !PL_rsfp && !PL_parser->filtered) {
5414                 /* handle eval qq[#line 1 "foo"\n ...] */
5415                 CopLINE_dec(PL_curcop);
5416                 incline(s);
5417             }
5418             d = s;
5419             while (d < PL_bufend && *d != '\n')
5420                 d++;
5421             if (d < PL_bufend)
5422                 d++;
5423             else if (d > PL_bufend)
5424                 /* Found by Ilya: feed random input to Perl. */
5425                 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5426                            d, PL_bufend);
5427             s = d;
5428             if (in_comment && d == PL_bufend
5429                 && PL_lex_state == LEX_INTERPNORMAL
5430                 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5431                 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5432             else
5433                 incline(s);
5434             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5435                 PL_lex_state = LEX_FORMLINE;
5436                 force_next(FORMRBRACK);
5437                 TOKEN(';');
5438             }
5439         }
5440         else {
5441             while (s < PL_bufend && *s != '\n')
5442                 s++;
5443             if (s < PL_bufend)
5444                 {
5445                     s++;
5446                     if (s < PL_bufend)
5447                         incline(s);
5448                 }
5449             else if (s > PL_bufend)
5450                 /* Found by Ilya: feed random input to Perl. */
5451                 Perl_croak(aTHX_ "panic: input overflow");
5452         }
5453         goto retry;
5454     case '-':
5455         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5456             I32 ftst = 0;
5457             char tmp;
5458
5459             s++;
5460             PL_bufptr = s;
5461             tmp = *s++;
5462
5463             while (s < PL_bufend && SPACE_OR_TAB(*s))
5464                 s++;
5465
5466             if (strEQs(s,"=>")) {
5467                 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5468                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5469                 OPERATOR('-');          /* unary minus */
5470             }
5471             switch (tmp) {
5472             case 'r': ftst = OP_FTEREAD;        break;
5473             case 'w': ftst = OP_FTEWRITE;       break;
5474             case 'x': ftst = OP_FTEEXEC;        break;
5475             case 'o': ftst = OP_FTEOWNED;       break;
5476             case 'R': ftst = OP_FTRREAD;        break;
5477             case 'W': ftst = OP_FTRWRITE;       break;
5478             case 'X': ftst = OP_FTREXEC;        break;
5479             case 'O': ftst = OP_FTROWNED;       break;
5480             case 'e': ftst = OP_FTIS;           break;
5481             case 'z': ftst = OP_FTZERO;         break;
5482             case 's': ftst = OP_FTSIZE;         break;
5483             case 'f': ftst = OP_FTFILE;         break;
5484             case 'd': ftst = OP_FTDIR;          break;
5485             case 'l': ftst = OP_FTLINK;         break;
5486             case 'p': ftst = OP_FTPIPE;         break;
5487             case 'S': ftst = OP_FTSOCK;         break;
5488             case 'u': ftst = OP_FTSUID;         break;
5489             case 'g': ftst = OP_FTSGID;         break;
5490             case 'k': ftst = OP_FTSVTX;         break;
5491             case 'b': ftst = OP_FTBLK;          break;
5492             case 'c': ftst = OP_FTCHR;          break;
5493             case 't': ftst = OP_FTTTY;          break;
5494             case 'T': ftst = OP_FTTEXT;         break;
5495             case 'B': ftst = OP_FTBINARY;       break;
5496             case 'M': case 'A': case 'C':
5497                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5498                 switch (tmp) {
5499                 case 'M': ftst = OP_FTMTIME;    break;
5500                 case 'A': ftst = OP_FTATIME;    break;
5501                 case 'C': ftst = OP_FTCTIME;    break;
5502                 default:                        break;
5503                 }
5504                 break;
5505             default:
5506                 break;
5507             }
5508             if (ftst) {
5509                 PL_last_uni = PL_oldbufptr;
5510                 PL_last_lop_op = (OPCODE)ftst;
5511                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5512                         "### Saw file test %c\n", (int)tmp);
5513                 } );
5514                 FTST(ftst);
5515             }
5516             else {
5517                 /* Assume it was a minus followed by a one-letter named
5518                  * subroutine call (or a -bareword), then. */
5519                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5520                         "### '-%c' looked like a file test but was not\n",
5521                         (int) tmp);
5522                 } );
5523                 s = --PL_bufptr;
5524             }
5525         }
5526         {
5527             const char tmp = *s++;
5528             if (*s == tmp) {
5529                 s++;
5530                 if (PL_expect == XOPERATOR)
5531                     TERM(POSTDEC);
5532                 else
5533                     OPERATOR(PREDEC);
5534             }
5535             else if (*s == '>') {
5536                 s++;
5537                 s = skipspace(s);
5538                 if (((*s == '$' || *s == '&') && s[1] == '*')
5539                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5540                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5541                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5542                  )
5543                 {
5544                     PL_expect = XPOSTDEREF;
5545                     TOKEN(ARROW);
5546                 }
5547                 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5548                     s = force_word(s,METHOD,FALSE,TRUE);
5549                     TOKEN(ARROW);
5550                 }
5551                 else if (*s == '$')
5552                     OPERATOR(ARROW);
5553                 else
5554                     TERM(ARROW);
5555             }
5556             if (PL_expect == XOPERATOR) {
5557                 if (*s == '='
5558                     && !PL_lex_allbrackets
5559                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5560                 {
5561                     s--;
5562                     TOKEN(0);
5563                 }
5564                 Aop(OP_SUBTRACT);
5565             }
5566             else {
5567                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5568                     check_uni();
5569                 OPERATOR('-');          /* unary minus */
5570             }
5571         }
5572
5573     case '+':
5574         {
5575             const char tmp = *s++;
5576             if (*s == tmp) {
5577                 s++;
5578                 if (PL_expect == XOPERATOR)
5579                     TERM(POSTINC);
5580                 else
5581                     OPERATOR(PREINC);
5582             }
5583             if (PL_expect == XOPERATOR) {
5584                 if (*s == '='
5585                     && !PL_lex_allbrackets
5586                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5587                 {
5588                     s--;
5589                     TOKEN(0);
5590                 }
5591                 Aop(OP_ADD);
5592             }
5593             else {
5594                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5595                     check_uni();
5596                 OPERATOR('+');
5597             }
5598         }
5599
5600     case '*':
5601         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5602         if (PL_expect != XOPERATOR) {
5603             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5604             PL_expect = XOPERATOR;
5605             force_ident(PL_tokenbuf, '*');
5606             if (!*PL_tokenbuf)
5607                 PREREF('*');
5608             TERM('*');
5609         }
5610         s++;
5611         if (*s == '*') {
5612             s++;
5613             if (*s == '=' && !PL_lex_allbrackets
5614                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5615             {
5616                 s -= 2;
5617                 TOKEN(0);
5618             }
5619             PWop(OP_POW);
5620         }
5621         if (*s == '='
5622             && !PL_lex_allbrackets
5623             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5624         {
5625             s--;
5626             TOKEN(0);
5627         }
5628         PL_parser->saw_infix_sigil = 1;
5629         Mop(OP_MULTIPLY);
5630
5631     case '%':
5632     {
5633         if (PL_expect == XOPERATOR) {
5634             if (s[1] == '='
5635                 && !PL_lex_allbrackets
5636                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5637             {
5638                 TOKEN(0);
5639             }
5640             ++s;
5641             PL_parser->saw_infix_sigil = 1;
5642             Mop(OP_MODULO);
5643         }
5644         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5645         PL_tokenbuf[0] = '%';
5646         s = scan_ident(s, PL_tokenbuf + 1,
5647                 sizeof PL_tokenbuf - 1, FALSE);
5648         pl_yylval.ival = 0;
5649         if (!PL_tokenbuf[1]) {
5650             PREREF('%');
5651         }
5652         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5653             if (*s == '[')
5654                 PL_tokenbuf[0] = '@';
5655         }
5656         PL_expect = XOPERATOR;
5657         force_ident_maybe_lex('%');
5658         TERM('%');
5659     }
5660     case '^':
5661         d = s;
5662         bof = FEATURE_BITWISE_IS_ENABLED;
5663         if (bof && s[1] == '.')
5664             s++;
5665         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5666                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5667         {
5668             s = d;
5669             TOKEN(0);
5670         }
5671         s++;
5672         BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5673     case '[':
5674         if (PL_lex_brackets > 100)
5675             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5676         PL_lex_brackstack[PL_lex_brackets++] = 0;
5677         PL_lex_allbrackets++;
5678         {
5679             const char tmp = *s++;
5680             OPERATOR(tmp);
5681         }
5682     case '~':
5683         if (s[1] == '~'
5684             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5685         {
5686             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5687                 TOKEN(0);
5688             s += 2;
5689             Perl_ck_warner_d(aTHX_
5690                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5691                 "Smartmatch is experimental");
5692             Eop(OP_SMARTMATCH);
5693         }
5694         s++;
5695         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5696             s++;
5697             BCop(OP_SCOMPLEMENT);
5698         }
5699         BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5700     case ',':
5701         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5702             TOKEN(0);
5703         s++;
5704         OPERATOR(',');
5705     case ':':
5706         if (s[1] == ':') {
5707             len = 0;
5708             goto just_a_word_zero_gv;
5709         }
5710         s++;
5711         {
5712         OP *attrs;
5713
5714         switch (PL_expect) {
5715         case XOPERATOR:
5716             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5717                 break;
5718             PL_bufptr = s;      /* update in case we back off */
5719             if (*s == '=') {
5720                 Perl_croak(aTHX_
5721                            "Use of := for an empty attribute list is not allowed");
5722             }
5723             goto grabattrs;
5724         case XATTRBLOCK:
5725             PL_expect = XBLOCK;
5726             goto grabattrs;
5727         case XATTRTERM:
5728             PL_expect = XTERMBLOCK;
5729          grabattrs:
5730             s = skipspace(s);
5731             attrs = NULL;
5732             while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5733                 I32 tmp;
5734                 SV *sv;
5735                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5736                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5737                     if (tmp < 0) tmp = -tmp;
5738                     switch (tmp) {
5739                     case KEY_or:
5740                     case KEY_and:
5741                     case KEY_for:
5742                     case KEY_foreach:
5743                     case KEY_unless:
5744                     case KEY_if:
5745                     case KEY_while:
5746                     case KEY_until:
5747                         goto got_attrs;
5748                     default:
5749                         break;
5750                     }
5751                 }
5752                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5753                 if (*d == '(') {
5754                     d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5755                     if (!d) {
5756                         if (attrs)
5757                             op_free(attrs);
5758                         sv_free(sv);
5759                         Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5760                     }
5761                     COPLINE_SET_FROM_MULTI_END;
5762                 }
5763                 if (PL_lex_stuff) {
5764                     sv_catsv(sv, PL_lex_stuff);
5765                     attrs = op_append_elem(OP_LIST, attrs,
5766                                         newSVOP(OP_CONST, 0, sv));
5767                     SvREFCNT_dec_NN(PL_lex_stuff);
5768                     PL_lex_stuff = NULL;
5769                 }
5770                 else {
5771                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5772                         sv_free(sv);
5773                         if (PL_in_my == KEY_our) {
5774                             deprecate(":unique");
5775                         }
5776                         else
5777                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5778                     }
5779
5780                     /* NOTE: any CV attrs applied here need to be part of
5781                        the CVf_BUILTIN_ATTRS define in cv.h! */
5782                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5783                         sv_free(sv);
5784                         CvLVALUE_on(PL_compcv);
5785                     }
5786                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5787                         sv_free(sv);
5788                         deprecate(":locked");
5789                     }
5790                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5791                         sv_free(sv);
5792                         CvMETHOD_on(PL_compcv);
5793                     }
5794                     else if (!PL_in_my && len == 5
5795                           && strnEQ(SvPVX(sv), "const", len))
5796                     {
5797                         sv_free(sv);
5798                         Perl_ck_warner_d(aTHX_
5799                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5800                            ":const is experimental"
5801                         );
5802                         CvANONCONST_on(PL_compcv);
5803                         if (!CvANON(PL_compcv))
5804                             yyerror(":const is not permitted on named "
5805                                     "subroutines");
5806                     }
5807                     /* After we've set the flags, it could be argued that
5808                        we don't need to do the attributes.pm-based setting
5809                        process, and shouldn't bother appending recognized
5810                        flags.  To experiment with that, uncomment the
5811                        following "else".  (Note that's already been
5812                        uncommented.  That keeps the above-applied built-in
5813                        attributes from being intercepted (and possibly
5814                        rejected) by a package's attribute routines, but is
5815                        justified by the performance win for the common case
5816                        of applying only built-in attributes.) */
5817                     else
5818                         attrs = op_append_elem(OP_LIST, attrs,
5819                                             newSVOP(OP_CONST, 0,
5820                                                     sv));
5821                 }
5822                 s = skipspace(d);
5823                 if (*s == ':' && s[1] != ':')
5824                     s = skipspace(s+1);
5825                 else if (s == d)
5826                     break;      /* require real whitespace or :'s */
5827                 /* XXX losing whitespace on sequential attributes here */
5828             }
5829             {
5830                 if (*s != ';'
5831                     && *s != '}'
5832                     && !(PL_expect == XOPERATOR
5833                          ? (*s == '=' ||  *s == ')')
5834                          : (*s == '{' ||  *s == '(')))
5835                 {
5836                     const char q = ((*s == '\'') ? '"' : '\'');
5837                     /* If here for an expression, and parsed no attrs, back
5838                        off. */
5839                     if (PL_expect == XOPERATOR && !attrs) {
5840                         s = PL_bufptr;
5841                         break;
5842                     }
5843                     /* MUST advance bufptr here to avoid bogus "at end of line"
5844                        context messages from yyerror().
5845                     */
5846                     PL_bufptr = s;
5847                     yyerror( (const char *)
5848                              (*s
5849                               ? Perl_form(aTHX_ "Invalid separator character "
5850                                           "%c%c%c in attribute list", q, *s, q)
5851                               : "Unterminated attribute list" ) );
5852                     if (attrs)
5853                         op_free(attrs);
5854                     OPERATOR(':');
5855                 }
5856             }
5857         got_attrs:
5858             if (attrs) {
5859                 NEXTVAL_NEXTTOKE.opval = attrs;
5860                 force_next(THING);
5861             }
5862             TOKEN(COLONATTR);
5863         }
5864         }
5865         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5866             s--;
5867             TOKEN(0);
5868         }
5869         PL_lex_allbrackets--;
5870         OPERATOR(':');
5871     case '(':
5872         s++;
5873         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5874             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5875         else
5876             PL_expect = XTERM;
5877         s = skipspace(s);
5878         PL_lex_allbrackets++;
5879         TOKEN('(');
5880     case ';':
5881         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5882             TOKEN(0);
5883         CLINE;
5884         s++;
5885         PL_expect = XSTATE;
5886         TOKEN(';');
5887     case ')':
5888         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5889             TOKEN(0);
5890         s++;
5891         PL_lex_allbrackets--;
5892         s = skipspace(s);
5893         if (*s == '{')
5894             PREBLOCK(')');
5895         TERM(')');
5896     case ']':
5897         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5898             TOKEN(0);
5899         s++;
5900         if (PL_lex_brackets <= 0)
5901             /* diag_listed_as: Unmatched right %s bracket */
5902             yyerror("Unmatched right square bracket");
5903         else
5904             --PL_lex_brackets;
5905         PL_lex_allbrackets--;
5906         if (PL_lex_state == LEX_INTERPNORMAL) {
5907             if (PL_lex_brackets == 0) {
5908                 if (*s == '-' && s[1] == '>')
5909                     PL_lex_state = LEX_INTERPENDMAYBE;
5910                 else if (*s != '[' && *s != '{')
5911                     PL_lex_state = LEX_INTERPEND;
5912             }
5913         }
5914         TERM(']');
5915     case '{':
5916         s++;
5917       leftbracket:
5918         if (PL_lex_brackets > 100) {
5919             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5920         }
5921         switch (PL_expect) {
5922         case XTERM:
5923         case XTERMORDORDOR:
5924             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5925             PL_lex_allbrackets++;
5926             OPERATOR(HASHBRACK);
5927         case XOPERATOR:
5928             while (s < PL_bufend && SPACE_OR_TAB(*s))
5929                 s++;
5930             d = s;
5931             PL_tokenbuf[0] = '\0';
5932             if (d < PL_bufend && *d == '-') {
5933                 PL_tokenbuf[0] = '-';
5934                 d++;
5935                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5936                     d++;
5937             }
5938             if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
5939                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5940                               FALSE, &len);
5941                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5942                     d++;
5943                 if (*d == '}') {
5944                     const char minus = (PL_tokenbuf[0] == '-');
5945                     s = force_word(s + minus, BAREWORD, FALSE, TRUE);
5946                     if (minus)
5947                         force_next('-');
5948                 }
5949             }
5950             /* FALLTHROUGH */
5951         case XATTRTERM:
5952         case XTERMBLOCK:
5953             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5954             PL_lex_allbrackets++;
5955             PL_expect = XSTATE;
5956             break;
5957         case XATTRBLOCK:
5958         case XBLOCK:
5959             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5960             PL_lex_allbrackets++;
5961             PL_expect = XSTATE;
5962             break;
5963         case XBLOCKTERM:
5964             PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5965             PL_lex_allbrackets++;
5966             PL_expect = XSTATE;
5967             break;
5968         default: {
5969                 const char *t;
5970                 if (PL_oldoldbufptr == PL_last_lop)
5971                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5972                 else
5973                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5974                 PL_lex_allbrackets++;
5975                 s = skipspace(s);
5976                 if (*s == '}') {
5977                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5978                         PL_expect = XTERM;
5979                         /* This hack is to get the ${} in the message. */
5980                         PL_bufptr = s+1;
5981                         yyerror("syntax error");
5982                         break;
5983                     }
5984                     OPERATOR(HASHBRACK);
5985                 }
5986                 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
5987                     /* ${...} or @{...} etc., but not print {...}
5988                      * Skip the disambiguation and treat this as a block.
5989                      */
5990                     goto block_expectation;
5991                 }
5992                 /* This hack serves to disambiguate a pair of curlies
5993                  * as being a block or an anon hash.  Normally, expectation
5994                  * determines that, but in cases where we're not in a
5995                  * position to expect anything in particular (like inside
5996                  * eval"") we have to resolve the ambiguity.  This code
5997                  * covers the case where the first term in the curlies is a
5998                  * quoted string.  Most other cases need to be explicitly
5999                  * disambiguated by prepending a "+" before the opening
6000                  * curly in order to force resolution as an anon hash.
6001                  *
6002                  * XXX should probably propagate the outer expectation
6003                  * into eval"" to rely less on this hack, but that could
6004                  * potentially break current behavior of eval"".
6005                  * GSAR 97-07-21
6006                  */
6007                 t = s;
6008                 if (*s == '\'' || *s == '"' || *s == '`') {
6009                     /* common case: get past first string, handling escapes */
6010                     for (t++; t < PL_bufend && *t != *s;)
6011                         if (*t++ == '\\')
6012                             t++;
6013                     t++;
6014                 }
6015                 else if (*s == 'q') {
6016                     if (++t < PL_bufend
6017                         && (!isWORDCHAR(*t)
6018                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6019                                 && !isWORDCHAR(*t))))
6020                     {
6021                         /* skip q//-like construct */
6022                         const char *tmps;
6023                         char open, close, term;
6024                         I32 brackets = 1;
6025
6026                         while (t < PL_bufend && isSPACE(*t))
6027                             t++;
6028                         /* check for q => */
6029                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6030                             OPERATOR(HASHBRACK);
6031                         }
6032                         term = *t;
6033                         open = term;
6034                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6035                             term = tmps[5];
6036                         close = term;
6037                         if (open == close)
6038                             for (t++; t < PL_bufend; t++) {
6039                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6040                                     t++;
6041                                 else if (*t == open)
6042                                     break;
6043                             }
6044                         else {
6045                             for (t++; t < PL_bufend; t++) {
6046                                 if (*t == '\\' && t+1 < PL_bufend)
6047                                     t++;
6048                                 else if (*t == close && --brackets <= 0)
6049                                     break;
6050                                 else if (*t == open)
6051                                     brackets++;
6052                             }
6053                         }
6054                         t++;
6055                     }
6056                     else
6057                         /* skip plain q word */
6058                         while (   t < PL_bufend
6059                                && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6060                         {
6061                             t += UTF ? UTF8SKIP(t) : 1;
6062                         }
6063                 }
6064                 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6065                     t += UTF ? UTF8SKIP(t) : 1;
6066                     while (   t < PL_bufend
6067                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6068                     {
6069                         t += UTF ? UTF8SKIP(t) : 1;
6070                     }
6071                 }
6072                 while (t < PL_bufend && isSPACE(*t))
6073                     t++;
6074                 /* if comma follows first term, call it an anon hash */
6075                 /* XXX it could be a comma expression with loop modifiers */
6076                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6077                                    || (*t == '=' && t[1] == '>')))
6078                     OPERATOR(HASHBRACK);
6079                 if (PL_expect == XREF)
6080                 {
6081                   block_expectation:
6082                     /* If there is an opening brace or 'sub:', treat it
6083                        as a term to make ${{...}}{k} and &{sub:attr...}
6084                        dwim.  Otherwise, treat it as a statement, so
6085                        map {no strict; ...} works.
6086                      */
6087                     s = skipspace(s);
6088                     if (*s == '{') {
6089                         PL_expect = XTERM;
6090                         break;
6091                     }
6092                     if (strEQs(s, "sub")) {
6093                         d = s + 3;
6094                         d = skipspace(d);
6095                         if (*d == ':') {
6096                             PL_expect = XTERM;
6097                             break;
6098                         }
6099                     }
6100                     PL_expect = XSTATE;
6101                 }
6102                 else {
6103                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6104                     PL_expect = XSTATE;
6105                 }
6106             }
6107             break;
6108         }
6109         pl_yylval.ival = CopLINE(PL_curcop);
6110         PL_copline = NOLINE;   /* invalidate current command line number */
6111         TOKEN(formbrack ? '=' : '{');
6112     case '}':
6113         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6114             TOKEN(0);
6115       rightbracket:
6116         s++;
6117         if (PL_lex_brackets <= 0)
6118             /* diag_listed_as: Unmatched right %s bracket */
6119             yyerror("Unmatched right curly bracket");
6120         else
6121             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6122         PL_lex_allbrackets--;
6123         if (PL_lex_state == LEX_INTERPNORMAL) {
6124             if (PL_lex_brackets == 0) {
6125                 if (PL_expect & XFAKEBRACK) {
6126                     PL_expect &= XENUMMASK;
6127                     PL_lex_state = LEX_INTERPEND;
6128                     PL_bufptr = s;
6129                     return yylex();     /* ignore fake brackets */
6130                 }
6131                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6132                  && SvEVALED(PL_lex_repl))
6133                     PL_lex_state = LEX_INTERPEND;
6134                 else if (*s == '-' && s[1] == '>')
6135                     PL_lex_state = LEX_INTERPENDMAYBE;
6136                 else if (*s != '[' && *s != '{')
6137                     PL_lex_state = LEX_INTERPEND;
6138             }
6139         }
6140         if (PL_expect & XFAKEBRACK) {
6141             PL_expect &= XENUMMASK;
6142             PL_bufptr = s;
6143             return yylex();             /* ignore fake brackets */
6144         }
6145         force_next(formbrack ? '.' : '}');
6146         if (formbrack) LEAVE;
6147         if (formbrack == 2) { /* means . where arguments were expected */
6148             force_next(';');
6149             TOKEN(FORMRBRACK);
6150         }
6151         TOKEN(';');
6152     case '&':
6153         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6154         s++;
6155         if (*s++ == '&') {
6156             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6157                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6158                 s -= 2;
6159                 TOKEN(0);
6160             }
6161             AOPERATOR(ANDAND);
6162         }
6163         s--;
6164         if (PL_expect == XOPERATOR) {
6165             if (   PL_bufptr == PL_linestart
6166                 && ckWARN(WARN_SEMICOLON)
6167                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6168             {
6169                 CopLINE_dec(PL_curcop);
6170                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6171                 CopLINE_inc(PL_curcop);
6172             }
6173             d = s;
6174             if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6175                 s++;
6176             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6177                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6178                 s = d;
6179                 s--;
6180                 TOKEN(0);
6181             }
6182             if (d == s) {
6183                 PL_parser->saw_infix_sigil = 1;
6184                 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6185             }
6186             else
6187                 BAop(OP_SBIT_AND);
6188         }
6189
6190         PL_tokenbuf[0] = '&';
6191         s = scan_ident(s - 1, PL_tokenbuf + 1,
6192                        sizeof PL_tokenbuf - 1, TRUE);
6193         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6194         if (PL_tokenbuf[1]) {
6195             force_ident_maybe_lex('&');
6196         }
6197         else
6198             PREREF('&');
6199         TERM('&');
6200
6201     case '|':
6202         s++;
6203         if (*s++ == '|') {
6204             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6205                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6206                 s -= 2;
6207                 TOKEN(0);
6208             }
6209             AOPERATOR(OROR);
6210         }
6211         s--;
6212         d = s;
6213         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6214             s++;
6215         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6216                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6217             s = d - 1;
6218             TOKEN(0);
6219         }
6220         BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6221     case '=':
6222         s++;
6223         {
6224             const char tmp = *s++;
6225             if (tmp == '=') {
6226                 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, "=====")) {
6227                     s = vcs_conflict_marker(s + 5);
6228                     goto retry;
6229                 }
6230                 if (!PL_lex_allbrackets
6231                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6232                 {
6233                     s -= 2;
6234                     TOKEN(0);
6235                 }
6236                 Eop(OP_EQ);
6237             }
6238             if (tmp == '>') {
6239                 if (!PL_lex_allbrackets
6240                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6241                 {
6242                     s -= 2;
6243                     TOKEN(0);
6244                 }
6245                 OPERATOR(',');
6246             }
6247             if (tmp == '~')
6248                 PMop(OP_MATCH);
6249             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6250                 && strchr("+-*/%.^&|<",tmp))
6251                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6252                             "Reversed %c= operator",(int)tmp);
6253             s--;
6254             if (PL_expect == XSTATE
6255                 && isALPHA(tmp)
6256                 && (s == PL_linestart+1 || s[-2] == '\n') )
6257             {
6258                 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6259                     || PL_lex_state != LEX_NORMAL) {
6260                     d = PL_bufend;
6261                     while (s < d) {
6262                         if (*s++ == '\n') {
6263                             incline(s);
6264                             if (strEQs(s,"=cut")) {
6265                                 s = strchr(s,'\n');
6266                                 if (s)
6267                                     s++;
6268                                 else
6269                                     s = d;
6270                                 incline(s);
6271                                 goto retry;
6272                             }
6273                         }
6274                     }
6275                     goto retry;
6276                 }
6277                 s = PL_bufend;
6278                 PL_parser->in_pod = 1;
6279                 goto retry;
6280             }
6281         }
6282         if (PL_expect == XBLOCK) {
6283             const char *t = s;
6284 #ifdef PERL_STRICT_CR
6285             while (SPACE_OR_TAB(*t))
6286 #else
6287             while (SPACE_OR_TAB(*t) || *t == '\r')
6288 #endif
6289                 t++;
6290             if (*t == '\n' || *t == '#') {
6291                 formbrack = 1;
6292                 ENTER;
6293                 SAVEI8(PL_parser->form_lex_state);
6294                 SAVEI32(PL_lex_formbrack);
6295                 PL_parser->form_lex_state = PL_lex_state;
6296                 PL_lex_formbrack = PL_lex_brackets + 1;
6297                 goto leftbracket;
6298             }
6299         }
6300         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6301             s--;
6302             TOKEN(0);
6303         }
6304         pl_yylval.ival = 0;
6305         OPERATOR(ASSIGNOP);
6306     case '!':
6307         s++;
6308         {
6309             const char tmp = *s++;
6310             if (tmp == '=') {
6311                 /* was this !=~ where !~ was meant?
6312                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6313
6314                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6315                     const char *t = s+1;
6316
6317                     while (t < PL_bufend && isSPACE(*t))
6318                         ++t;
6319
6320                     if (*t == '/' || *t == '?'
6321                         || ((*t == 'm' || *t == 's' || *t == 'y')
6322                             && !isWORDCHAR(t[1]))
6323                         || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6324                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6325                                     "!=~ should be !~");
6326                 }
6327                 if (!PL_lex_allbrackets
6328                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6329                 {
6330                     s -= 2;
6331                     TOKEN(0);
6332                 }
6333                 Eop(OP_NE);
6334             }
6335             if (tmp == '~')
6336                 PMop(OP_NOT);
6337         }
6338         s--;
6339         OPERATOR('!');
6340     case '<':
6341         if (PL_expect != XOPERATOR) {
6342             if (s[1] != '<' && !strchr(s,'>'))
6343                 check_uni();
6344             if (s[1] == '<' && s[2] != '>') {
6345                 if ((s == PL_linestart || s[-1] == '\n') && strEQs(s+2, "<<<<<")) {
6346                     s = vcs_conflict_marker(s + 7);
6347                     goto retry;
6348                 }
6349                 s = scan_heredoc(s);
6350             }
6351             else
6352                 s = scan_inputsymbol(s);
6353             PL_expect = XOPERATOR;
6354             TOKEN(sublex_start());
6355         }
6356         s++;
6357         {
6358             char tmp = *s++;
6359             if (tmp == '<') {
6360                 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, "<<<<<")) {
6361                     s = vcs_conflict_marker(s + 5);
6362                     goto retry;
6363                 }
6364                 if (*s == '=' && !PL_lex_allbrackets
6365                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6366                 {
6367                     s -= 2;
6368                     TOKEN(0);
6369                 }
6370                 SHop(OP_LEFT_SHIFT);
6371             }
6372             if (tmp == '=') {
6373                 tmp = *s++;
6374                 if (tmp == '>') {
6375                     if (!PL_lex_allbrackets
6376                         && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6377                     {
6378                         s -= 3;
6379                         TOKEN(0);
6380                     }
6381                     Eop(OP_NCMP);
6382                 }
6383                 s--;
6384                 if (!PL_lex_allbrackets
6385                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6386                 {
6387                     s -= 2;
6388                     TOKEN(0);
6389                 }
6390                 Rop(OP_LE);
6391             }
6392         }
6393         s--;
6394         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6395             s--;
6396             TOKEN(0);
6397         }
6398         Rop(OP_LT);
6399     case '>':
6400         s++;
6401         {
6402             const char tmp = *s++;
6403             if (tmp == '>') {
6404                 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, ">>>>>")) {
6405                     s = vcs_conflict_marker(s + 5);
6406                     goto retry;
6407                 }
6408                 if (*s == '=' && !PL_lex_allbrackets
6409                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6410                 {
6411                     s -= 2;
6412                     TOKEN(0);
6413                 }
6414                 SHop(OP_RIGHT_SHIFT);
6415             }
6416             else if (tmp == '=') {
6417                 if (!PL_lex_allbrackets
6418                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6419                 {
6420                     s -= 2;
6421                     TOKEN(0);
6422                 }
6423                 Rop(OP_GE);
6424             }
6425         }
6426         s--;
6427         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6428             s--;
6429             TOKEN(0);
6430         }
6431         Rop(OP_GT);
6432
6433     case '$':
6434         CLINE;
6435
6436         if (PL_expect == XOPERATOR) {
6437             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6438                 return deprecate_commaless_var_list();
6439             }
6440         }
6441         else if (PL_expect == XPOSTDEREF) {
6442             if (s[1] == '#') {
6443                 s++;
6444                 POSTDEREF(DOLSHARP);
6445             }
6446             POSTDEREF('$');
6447         }
6448
6449         if (   s[1] == '#'
6450             && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
6451                 || strchr("{$:+-@", s[2])))
6452         {
6453             PL_tokenbuf[0] = '@';
6454             s = scan_ident(s + 1, PL_tokenbuf + 1,
6455                            sizeof PL_tokenbuf - 1, FALSE);
6456             if (PL_expect == XOPERATOR) {
6457                 d = s;
6458                 if (PL_bufptr > s) {
6459                     d = PL_bufptr-1;
6460                     PL_bufptr = PL_oldbufptr;
6461                 }
6462                 no_op("Array length", d);
6463             }
6464             if (!PL_tokenbuf[1])
6465                 PREREF(DOLSHARP);
6466             PL_expect = XOPERATOR;
6467             force_ident_maybe_lex('#');
6468             TOKEN(DOLSHARP);
6469         }
6470
6471         PL_tokenbuf[0] = '$';
6472         s = scan_ident(s, PL_tokenbuf + 1,
6473                        sizeof PL_tokenbuf - 1, FALSE);
6474         if (PL_expect == XOPERATOR) {
6475             d = s;
6476             if (PL_bufptr > s) {
6477                 d = PL_bufptr-1;
6478                 PL_bufptr = PL_oldbufptr;
6479             }
6480             no_op("Scalar", d);
6481         }
6482         if (!PL_tokenbuf[1]) {
6483             if (s == PL_bufend)
6484                 yyerror("Final $ should be \\$ or $name");
6485             PREREF('$');
6486         }
6487
6488         d = s;
6489         {
6490             const char tmp = *s;
6491             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6492                 s = skipspace(s);
6493
6494             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6495                 && intuit_more(s)) {
6496                 if (*s == '[') {
6497                     PL_tokenbuf[0] = '@';
6498                     if (ckWARN(WARN_SYNTAX)) {
6499                         char *t = s+1;
6500
6501                         while (   isSPACE(*t)
6502                                || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
6503                                || *t == '$')
6504                         {
6505                             t += UTF ? UTF8SKIP(t) : 1;
6506                         }
6507                         if (*t++ == ',') {
6508                             PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6509                             while (t < PL_bufend && *t != ']')
6510                                 t++;
6511                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6512                                         "Multidimensional syntax %" UTF8f " not supported",
6513                                         UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6514                         }
6515                     }
6516                 }
6517                 else if (*s == '{') {
6518                     char *t;
6519                     PL_tokenbuf[0] = '%';
6520                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6521                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6522                         {
6523                             char tmpbuf[sizeof PL_tokenbuf];
6524                             do {
6525                                 t++;
6526                             } while (isSPACE(*t));
6527                             if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
6528                                 STRLEN len;
6529                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6530                                               &len);
6531                                 while (isSPACE(*t))
6532                                     t++;
6533                                 if (  *t == ';'
6534                                     && get_cvn_flags(tmpbuf, len, UTF
6535                                                                   ? SVf_UTF8
6536                                                                   : 0))
6537                                 {
6538                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6539                                         "You need to quote \"%" UTF8f "\"",
6540                                          UTF8fARG(UTF, len, tmpbuf));
6541                                 }
6542                             }
6543                         }
6544                 }
6545             }
6546
6547             PL_expect = XOPERATOR;
6548             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6549                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6550                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6551                     PL_expect = XOPERATOR;
6552                 else if (strchr("$@\"'`q", *s))
6553                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6554                 else if (   strchr("&*<%", *s)
6555                          && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
6556                 {
6557                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6558                 }
6559                 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6560                     char tmpbuf[sizeof PL_tokenbuf];
6561                     int t2;
6562                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6563                     if ((t2 = keyword(tmpbuf, len, 0))) {
6564                         /* binary operators exclude handle interpretations */
6565                         switch (t2) {
6566                         case -KEY_x:
6567                         case -KEY_eq:
6568                         case -KEY_ne:
6569                         case -KEY_gt:
6570                         case -KEY_lt:
6571                         case -KEY_ge:
6572                         case -KEY_le:
6573                         case -KEY_cmp:
6574                             break;
6575                         default:
6576                             PL_expect = XTERM;  /* e.g. print $fh length() */
6577                             break;
6578                         }
6579                     }
6580                     else {
6581                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6582                     }
6583                 }
6584                 else if (isDIGIT(*s))
6585                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6586                 else if (*s == '.' && isDIGIT(s[1]))
6587                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6588                 else if ((*s == '?' || *s == '-' || *s == '+')
6589                          && !isSPACE(s[1]) && s[1] != '=')
6590                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6591                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6592                          && s[1] != '/')
6593                     PL_expect = XTERM;          /* e.g. print $fh /.../
6594                                                    XXX except DORDOR operator
6595                                                 */
6596                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6597                          && s[2] != '=')
6598                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6599             }
6600         }
6601         force_ident_maybe_lex('$');
6602         TOKEN('$');
6603
6604     case '@':
6605         if (PL_expect == XPOSTDEREF)
6606             POSTDEREF('@');
6607         PL_tokenbuf[0] = '@';
6608         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6609         if (PL_expect == XOPERATOR) {
6610             d = s;
6611             if (PL_bufptr > s) {
6612                 d = PL_bufptr-1;
6613                 PL_bufptr = PL_oldbufptr;
6614             }
6615             no_op("Array", d);
6616         }
6617         pl_yylval.ival = 0;
6618         if (!PL_tokenbuf[1]) {
6619             PREREF('@');
6620         }
6621         if (PL_lex_state == LEX_NORMAL)
6622             s = skipspace(s);
6623         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6624             if (*s == '{')
6625                 PL_tokenbuf[0] = '%';
6626
6627             /* Warn about @ where they meant $. */
6628             if (*s == '[' || *s == '{') {
6629                 if (ckWARN(WARN_SYNTAX)) {
6630                     S_check_scalar_slice(aTHX_ s);
6631                 }
6632             }
6633         }
6634         PL_expect = XOPERATOR;
6635         force_ident_maybe_lex('@');
6636         TERM('@');
6637
6638      case '/':                  /* may be division, defined-or, or pattern */
6639         if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6640             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6641                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6642                 TOKEN(0);
6643             s += 2;
6644             AOPERATOR(DORDOR);
6645         }
6646         else if (PL_expect == XOPERATOR) {
6647             s++;
6648             if (*s == '=' && !PL_lex_allbrackets
6649                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6650             {
6651                 s--;
6652                 TOKEN(0);
6653             }
6654             Mop(OP_DIVIDE);
6655         }
6656         else {
6657             /* Disable warning on "study /blah/" */
6658             if (    PL_oldoldbufptr == PL_last_uni
6659                 && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6660                     || memNE(PL_last_uni, "study", 5)
6661                     || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6662              ))
6663                 check_uni();
6664             s = scan_pat(s,OP_MATCH);
6665             TERM(sublex_start());
6666         }
6667
6668      case '?':                  /* conditional */
6669         s++;
6670         if (!PL_lex_allbrackets
6671             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6672         {
6673             s--;
6674             TOKEN(0);
6675         }
6676         PL_lex_allbrackets++;
6677         OPERATOR('?');
6678
6679     case '.':
6680         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6681 #ifdef PERL_STRICT_CR
6682             && s[1] == '\n'
6683 #else
6684             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6685 #endif
6686             && (s == PL_linestart || s[-1] == '\n') )
6687         {
6688             PL_expect = XSTATE;
6689             formbrack = 2; /* dot seen where arguments expected */
6690             goto rightbracket;
6691         }
6692         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6693             s += 3;
6694             OPERATOR(YADAYADA);
6695         }
6696         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6697             char tmp = *s++;
6698             if (*s == tmp) {
6699                 if (!PL_lex_allbrackets
6700                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6701                 {
6702                     s--;
6703                     TOKEN(0);
6704                 }
6705                 s++;
6706                 if (*s == tmp) {
6707                     s++;
6708                     pl_yylval.ival = OPf_SPECIAL;
6709                 }
6710                 else
6711                     pl_yylval.ival = 0;
6712                 OPERATOR(DOTDOT);
6713             }
6714             if (*s == '=' && !PL_lex_allbrackets
6715                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6716             {
6717                 s--;
6718                 TOKEN(0);
6719             }
6720             Aop(OP_CONCAT);
6721         }
6722         /* FALLTHROUGH */
6723     case '0': case '1': case '2': case '3': case '4':
6724     case '5': case '6': case '7': case '8': case '9':
6725         s = scan_num(s, &pl_yylval);
6726         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6727         if (PL_expect == XOPERATOR)
6728             no_op("Number",s);
6729         TERM(THING);
6730
6731     case '\'':
6732         if (   PL_expect == XOPERATOR
6733             && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
6734                 return deprecate_commaless_var_list();
6735
6736         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6737         if (!s)
6738             missingterm(NULL);
6739         COPLINE_SET_FROM_MULTI_END;
6740         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6741         if (PL_expect == XOPERATOR) {
6742             no_op("String",s);
6743         }
6744         pl_yylval.ival = OP_CONST;
6745         TERM(sublex_start());
6746
6747     case '"':
6748         if (   PL_expect == XOPERATOR
6749             && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
6750                 return deprecate_commaless_var_list();
6751
6752         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6753         DEBUG_T( {
6754             if (s)
6755                 printbuf("### Saw string before %s\n", s);
6756             else
6757                 PerlIO_printf(Perl_debug_log,
6758                              "### Saw unterminated string\n");
6759         } );
6760         if (PL_expect == XOPERATOR) {
6761                 no_op("String",s);
6762         }
6763         if (!s)
6764             missingterm(NULL);
6765         pl_yylval.ival = OP_CONST;
6766         /* FIXME. I think that this can be const if char *d is replaced by
6767            more localised variables.  */
6768         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6769             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6770                 pl_yylval.ival = OP_STRINGIFY;
6771                 break;
6772             }
6773         }
6774         if (pl_yylval.ival == OP_CONST)
6775             COPLINE_SET_FROM_MULTI_END;
6776         TERM(sublex_start());
6777
6778     case '`':
6779         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6780         DEBUG_T( {
6781             if (s)
6782                 printbuf("### Saw backtick string before %s\n", s);
6783             else
6784                 PerlIO_printf(Perl_debug_log,
6785                              "### Saw unterminated backtick string\n");
6786         } );
6787         if (PL_expect == XOPERATOR)
6788             no_op("Backticks",s);
6789         if (!s)
6790             missingterm(NULL);
6791         pl_yylval.ival = OP_BACKTICK;
6792         TERM(sublex_start());
6793
6794     case '\\':
6795         s++;
6796         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6797          && isDIGIT(*s))
6798             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6799                            *s, *s);
6800         if (PL_expect == XOPERATOR)
6801             no_op("Backslash",s);
6802         OPERATOR(REFGEN);
6803
6804     case 'v':
6805         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6806             char *start = s + 2;
6807             while (isDIGIT(*start) || *start == '_')
6808                 start++;
6809             if (*start == '.' && isDIGIT(start[1])) {
6810                 s = scan_num(s, &pl_yylval);
6811                 TERM(THING);
6812             }
6813             else if ((*start == ':' && start[1] == ':')
6814                   || (PL_expect == XSTATE && *start == ':'))
6815                 goto keylookup;
6816             else if (PL_expect == XSTATE) {
6817                 d = start;
6818                 while (d < PL_bufend && isSPACE(*d)) d++;
6819                 if (*d == ':') goto keylookup;
6820             }
6821             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6822             if (!isALPHA(*start) && (PL_expect == XTERM
6823                         || PL_expect == XREF || PL_expect == XSTATE
6824                         || PL_expect == XTERMORDORDOR)) {
6825                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6826                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6827                 if (!gv) {
6828                     s = scan_num(s, &pl_yylval);
6829                     TERM(THING);
6830                 }
6831             }
6832         }
6833         goto keylookup;
6834     case 'x':
6835         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6836             s++;
6837             Mop(OP_REPEAT);
6838         }
6839         goto keylookup;
6840
6841     case '_':
6842     case 'a': case 'A':
6843     case 'b': case 'B':
6844     case 'c': case 'C':
6845     case 'd': case 'D':
6846     case 'e': case 'E':
6847     case 'f': case 'F':
6848     case 'g': case 'G':
6849     case 'h': case 'H':
6850     case 'i': case 'I':
6851     case 'j': case 'J':
6852     case 'k': case 'K':
6853     case 'l': case 'L':
6854     case 'm': case 'M':
6855     case 'n': case 'N':
6856     case 'o': case 'O':
6857     case 'p': case 'P':
6858     case 'q': case 'Q':
6859     case 'r': case 'R':
6860     case 's': case 'S':
6861     case 't': case 'T':
6862     case 'u': case 'U':
6863               case 'V':
6864     case 'w': case 'W':
6865               case 'X':
6866     case 'y': case 'Y':
6867     case 'z': case 'Z':
6868
6869       keylookup: {
6870         bool anydelim;
6871         bool lex;
6872         I32 tmp;
6873         SV *sv;
6874         CV *cv;
6875         PADOFFSET off;
6876         OP *rv2cv_op;
6877
6878         lex = FALSE;
6879         orig_keyword = 0;
6880         off = 0;
6881         sv = NULL;
6882         cv = NULL;
6883         gv = NULL;
6884         gvp = NULL;
6885         rv2cv_op = NULL;
6886
6887         PL_bufptr = s;
6888         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6889
6890         /* Some keywords can be followed by any delimiter, including ':' */
6891         anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
6892
6893         /* x::* is just a word, unless x is "CORE" */
6894         if (!anydelim && *s == ':' && s[1] == ':') {
6895             if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6896             goto just_a_word;
6897         }
6898
6899         d = s;
6900         while (d < PL_bufend && isSPACE(*d))
6901                 d++;    /* no comments skipped here, or s### is misparsed */
6902
6903         /* Is this a word before a => operator? */
6904         if (*d == '=' && d[1] == '>') {
6905           fat_arrow:
6906             CLINE;
6907             pl_yylval.opval
6908                 = newSVOP(OP_CONST, 0,
6909                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6910             pl_yylval.opval->op_private = OPpCONST_BARE;
6911             TERM(BAREWORD);
6912         }
6913
6914         /* Check for plugged-in keyword */
6915         {
6916             OP *o;
6917             int result;
6918             char *saved_bufptr = PL_bufptr;
6919             PL_bufptr = s;
6920             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6921             s = PL_bufptr;
6922             if (result == KEYWORD_PLUGIN_DECLINE) {
6923                 /* not a plugged-in keyword */
6924                 PL_bufptr = saved_bufptr;
6925             } else if (result == KEYWORD_PLUGIN_STMT) {
6926                 pl_yylval.opval = o;
6927                 CLINE;
6928                 if (!PL_nexttoke) PL_expect = XSTATE;
6929                 return REPORT(PLUGSTMT);
6930             } else if (result == KEYWORD_PLUGIN_EXPR) {
6931                 pl_yylval.opval = o;
6932                 CLINE;
6933                 if (!PL_nexttoke) PL_expect = XOPERATOR;
6934                 return REPORT(PLUGEXPR);
6935             } else {
6936                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6937                                         PL_tokenbuf);
6938             }
6939         }
6940
6941         /* Check for built-in keyword */
6942         tmp = keyword(PL_tokenbuf, len, 0);
6943
6944         /* Is this a label? */
6945         if (!anydelim && PL_expect == XSTATE
6946               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6947             s = d + 1;
6948             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6949             pl_yylval.pval[len] = '\0';
6950             pl_yylval.pval[len+1] = UTF ? 1 : 0;
6951             CLINE;
6952             TOKEN(LABEL);
6953         }
6954
6955         /* Check for lexical sub */
6956         if (PL_expect != XOPERATOR) {
6957             char tmpbuf[sizeof PL_tokenbuf + 1];
6958             *tmpbuf = '&';
6959             Copy(PL_tokenbuf, tmpbuf+1, len, char);
6960             off = pad_findmy_pvn(tmpbuf, len+1, 0);
6961             if (off != NOT_IN_PAD) {
6962                 assert(off); /* we assume this is boolean-true below */
6963                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6964                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
6965                     HEK * const stashname = HvNAME_HEK(stash);
6966                     sv = newSVhek(stashname);
6967                     sv_catpvs(sv, "::");
6968                     sv_catpvn_flags(sv, PL_tokenbuf, len,
6969                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
6970                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6971                                     SVt_PVCV);
6972                     off = 0;
6973                     if (!gv) {
6974                         sv_free(sv);
6975                         sv = NULL;
6976                         goto just_a_word;
6977                     }
6978                 }
6979                 else {
6980                     rv2cv_op = newOP(OP_PADANY, 0);
6981                     rv2cv_op->op_targ = off;
6982                     cv = find_lexical_cv(off);
6983                 }
6984                 lex = TRUE;
6985                 goto just_a_word;
6986             }
6987             off = 0;
6988         }
6989
6990         if (tmp < 0) {                  /* second-class keyword? */
6991             GV *ogv = NULL;     /* override (winner) */
6992             GV *hgv = NULL;     /* hidden (loser) */
6993             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6994                 CV *cv;
6995                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6996                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6997                                             SVt_PVCV))
6998                     && (cv = GvCVu(gv)))
6999                 {
7000                     if (GvIMPORTED_CV(gv))
7001                         ogv = gv;
7002                     else if (! CvMETHOD(cv))
7003                         hgv = gv;
7004                 }
7005                 if (!ogv
7006                     && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7007                                                               len, FALSE))
7008                     && (gv = *gvp)
7009                     && (isGV_with_GP(gv)
7010                         ? GvCVu(gv) && GvIMPORTED_CV(gv)
7011                         :   SvPCS_IMPORTED(gv)
7012                         && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7013                                                                  len, 0), 1)))
7014                 {
7015                     ogv = gv;
7016                 }
7017             }
7018             if (ogv) {
7019                 orig_keyword = tmp;
7020                 tmp = 0;                /* overridden by import or by GLOBAL */
7021             }
7022             else if (gv && !gvp
7023                      && -tmp==KEY_lock  /* XXX generalizable kludge */
7024                      && GvCVu(gv))
7025             {
7026                 tmp = 0;                /* any sub overrides "weak" keyword */
7027             }
7028             else {                      /* no override */
7029                 tmp = -tmp;
7030                 if (tmp == KEY_dump) {
7031                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7032                                    "dump() better written as CORE::dump()");
7033                 }
7034                 gv = NULL;
7035                 gvp = 0;
7036                 if (hgv && tmp != KEY_x)        /* never ambiguous */
7037                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7038                                    "Ambiguous call resolved as CORE::%s(), "
7039                                    "qualify as such or use &",
7040                                    GvENAME(hgv));
7041             }
7042         }
7043
7044         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7045          && (!anydelim || *s != '#')) {
7046             /* no override, and not s### either; skipspace is safe here
7047              * check for => on following line */
7048             bool arrow;
7049             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7050             STRLEN   soff = s         - SvPVX(PL_linestr);
7051             s = peekspace(s);
7052             arrow = *s == '=' && s[1] == '>';
7053             PL_bufptr = SvPVX(PL_linestr) + bufoff;
7054             s         = SvPVX(PL_linestr) +   soff;
7055             if (arrow)
7056                 goto fat_arrow;
7057         }
7058
7059       reserved_word:
7060         switch (tmp) {
7061
7062             /* Trade off - by using this evil construction we can pull the
7063                variable gv into the block labelled keylookup. If not, then
7064                we have to give it function scope so that the goto from the
7065                earlier ':' case doesn't bypass the initialisation.  */
7066             just_a_word_zero_gv:
7067                 sv = NULL;
7068                 cv = NULL;
7069                 gv = NULL;
7070                 gvp = NULL;
7071                 rv2cv_op = NULL;
7072                 orig_keyword = 0;
7073                 lex = 0;
7074                 off = 0;
7075         default:                        /* not a keyword */
7076           just_a_word: {
7077                 int pkgname = 0;
7078                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7079                 bool safebw;
7080
7081
7082                 /* Get the rest if it looks like a package qualifier */
7083
7084                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7085                     STRLEN morelen;
7086                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7087                                   TRUE, &morelen);
7088                     if (!morelen)
7089                         Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7090                                 UTF8fARG(UTF, len, PL_tokenbuf),
7091                                 *s == '\'' ? "'" : "::");
7092                     len += morelen;
7093                     pkgname = 1;
7094                 }
7095
7096                 if (PL_expect == XOPERATOR) {
7097                     if (PL_bufptr == PL_linestart) {
7098                         CopLINE_dec(PL_curcop);
7099                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7100                         CopLINE_inc(PL_curcop);
7101                     }
7102                     else
7103                         no_op("Bareword",s);
7104                 }
7105
7106                 /* See if the name is "Foo::",
7107                    in which case Foo is a bareword
7108                    (and a package name). */
7109
7110                 if (len > 2
7111                     && PL_tokenbuf[len - 2] == ':'
7112                     && PL_tokenbuf[len - 1] == ':')
7113                 {
7114                     if (ckWARN(WARN_BAREWORD)
7115                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7116                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7117                                     "Bareword \"%" UTF8f
7118                                     "\" refers to nonexistent package",
7119                                     UTF8fARG(UTF, len, PL_tokenbuf));
7120                     len -= 2;
7121                     PL_tokenbuf[len] = '\0';
7122                     gv = NULL;
7123                     gvp = 0;
7124                     safebw = TRUE;
7125                 }
7126                 else {
7127                     safebw = FALSE;
7128                 }
7129
7130                 /* if we saw a global override before, get the right name */
7131
7132                 if (!sv)
7133                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7134                                                 len);
7135                 if (gvp) {
7136                     SV * const tmp_sv = sv;
7137                     sv = newSVpvs("CORE::GLOBAL::");
7138                     sv_catsv(sv, tmp_sv);
7139                     SvREFCNT_dec(tmp_sv);
7140                 }
7141
7142
7143                 /* Presume this is going to be a bareword of some sort. */
7144                 CLINE;
7145                 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
7146                 pl_yylval.opval->op_private = OPpCONST_BARE;
7147
7148                 /* And if "Foo::", then that's what it certainly is. */
7149                 if (safebw)
7150                     goto safe_bareword;
7151
7152                 if (!off)
7153                 {
7154                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7155                     const_op->op_private = OPpCONST_BARE;
7156                     rv2cv_op =
7157                         newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7158                     cv = lex
7159                         ? isGV(gv)
7160                             ? GvCV(gv)
7161                             : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
7162                                 ? (CV *)SvRV(gv)
7163                                 : ((CV *)gv)
7164                         : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
7165                 }
7166
7167                 /* Use this var to track whether intuit_method has been
7168                    called.  intuit_method returns 0 or > 255.  */
7169                 tmp = 1;
7170
7171                 /* See if it's the indirect object for a list operator. */
7172
7173                 if (PL_oldoldbufptr
7174                     && PL_oldoldbufptr < PL_bufptr
7175                     && (PL_oldoldbufptr == PL_last_lop
7176                         || PL_oldoldbufptr == PL_last_uni)
7177                     && /* NO SKIPSPACE BEFORE HERE! */
7178                        (PL_expect == XREF
7179                         || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7180                                                                == OA_FILEREF))
7181                 {
7182                     bool immediate_paren = *s == '(';
7183
7184                     /* (Now we can afford to cross potential line boundary.) */
7185                     s = skipspace(s);
7186
7187                     /* Two barewords in a row may indicate method call. */
7188                     if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7189                             || *s == '$')
7190                         && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7191                     {
7192                         goto method;
7193                     }
7194
7195                     /* If not a declared subroutine, it's an indirect object. */
7196                     /* (But it's an indir obj regardless for sort.) */
7197                     /* Also, if "_" follows a filetest operator, it's a bareword */
7198
7199                     if (
7200                         ( !immediate_paren && (PL_last_lop_op == OP_SORT
7201                          || (!cv
7202                              && (PL_last_lop_op != OP_MAPSTART
7203                                  && PL_last_lop_op != OP_GREPSTART))))
7204                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7205                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7206                                                             == OA_FILESTATOP))
7207                        )
7208                     {
7209                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7210                         goto bareword;
7211                     }
7212                 }
7213
7214                 PL_expect = XOPERATOR;
7215                 s = skipspace(s);
7216
7217                 /* Is this a word before a => operator? */
7218                 if (*s == '=' && s[1] == '>' && !pkgname) {
7219                     op_free(rv2cv_op);
7220                     CLINE;
7221                     if (gvp || (lex && !off)) {
7222                         assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7223                         /* This is our own scalar, created a few lines
7224                            above, so this is safe. */
7225                         SvREADONLY_off(sv);
7226                         sv_setpv(sv, PL_tokenbuf);
7227                         if (UTF && !IN_BYTES
7228                          && is_utf8_string((U8*)PL_tokenbuf, len))
7229                               SvUTF8_on(sv);
7230                         SvREADONLY_on(sv);
7231                     }
7232                     TERM(BAREWORD);
7233                 }
7234
7235                 /* If followed by a paren, it's certainly a subroutine. */
7236                 if (*s == '(') {
7237                     CLINE;
7238                     if (cv) {
7239                         d = s + 1;
7240                         while (SPACE_OR_TAB(*d))
7241                             d++;
7242                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7243                             s = d + 1;
7244                             goto its_constant;
7245                         }
7246                     }
7247                     NEXTVAL_NEXTTOKE.opval =
7248                         off ? rv2cv_op : pl_yylval.opval;
7249                     if (off)
7250                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7251                     else op_free(rv2cv_op),        force_next(BAREWORD);
7252                     pl_yylval.ival = 0;
7253                     TOKEN('&');
7254                 }
7255
7256                 /* If followed by var or block, call it a method (unless sub) */
7257
7258                 if ((*s == '$' || *s == '{') && !cv) {
7259                     op_free(rv2cv_op);
7260                     PL_last_lop = PL_oldbufptr;
7261                     PL_last_lop_op = OP_METHOD;
7262                     if (!PL_lex_allbrackets
7263                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7264                     {
7265                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7266                     }
7267                     PL_expect = XBLOCKTERM;
7268                     PL_bufptr = s;
7269                     return REPORT(METHOD);
7270                 }
7271
7272                 /* If followed by a bareword, see if it looks like indir obj. */
7273
7274                 if (   tmp == 1
7275                     && !orig_keyword
7276                     && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7277                     && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7278                 {
7279                   method:
7280                     if (lex && !off) {
7281                         assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7282                         SvREADONLY_off(sv);
7283                         sv_setpvn(sv, PL_tokenbuf, len);
7284                         if (UTF && !IN_BYTES
7285                          && is_utf8_string((U8*)PL_tokenbuf, len))
7286                             SvUTF8_on (sv);
7287                         else SvUTF8_off(sv);
7288                     }
7289                     op_free(rv2cv_op);
7290                     if (tmp == METHOD && !PL_lex_allbrackets
7291                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7292                     {
7293                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7294                     }
7295                     return REPORT(tmp);
7296                 }
7297
7298                 /* Not a method, so call it a subroutine (if defined) */
7299
7300                 if (cv) {
7301                     /* Check for a constant sub */
7302                     if ((sv = cv_const_sv_or_av(cv))) {
7303                   its_constant:
7304                         op_free(rv2cv_op);
7305                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7306                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7307                         if (SvTYPE(sv) == SVt_PVAV)
7308                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7309                                                       pl_yylval.opval);
7310                         else {
7311                             pl_yylval.opval->op_private = 0;
7312                             pl_yylval.opval->op_folded = 1;
7313                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7314                         }
7315                         TOKEN(BAREWORD);
7316                     }
7317
7318                     op_free(pl_yylval.opval);
7319                     pl_yylval.opval =
7320                         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7321                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7322                     PL_last_lop = PL_oldbufptr;
7323                     PL_last_lop_op = OP_ENTERSUB;
7324                     /* Is there a prototype? */
7325                     if (
7326                         SvPOK(cv))
7327                     {
7328                         STRLEN protolen = CvPROTOLEN(cv);
7329                         const char *proto = CvPROTO(cv);
7330                         bool optional;
7331                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7332                         if (!protolen)
7333                             TERM(FUNC0SUB);
7334                         if ((optional = *proto == ';'))
7335                           do
7336                             proto++;
7337                           while (*proto == ';');
7338                         if (
7339                             (
7340                                 (
7341                                     *proto == '$' || *proto == '_'
7342                                  || *proto == '*' || *proto == '+'
7343                                 )
7344                              && proto[1] == '\0'
7345                             )
7346                          || (
7347                              *proto == '\\' && proto[1] && proto[2] == '\0'
7348                             )
7349                         )
7350                             UNIPROTO(UNIOPSUB,optional);
7351                         if (*proto == '\\' && proto[1] == '[') {
7352                             const char *p = proto + 2;
7353                             while(*p && *p != ']')
7354                                 ++p;
7355                             if(*p == ']' && !p[1])
7356                                 UNIPROTO(UNIOPSUB,optional);
7357                         }
7358                         if (*proto == '&' && *s == '{') {
7359                             if (PL_curstash)
7360                                 sv_setpvs(PL_subname, "__ANON__");
7361                             else
7362                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7363                             if (!PL_lex_allbrackets
7364                                 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7365                             {
7366                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7367                             }
7368                             PREBLOCK(LSTOPSUB);
7369                         }
7370                     }
7371                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7372                     PL_expect = XTERM;
7373                     force_next(off ? PRIVATEREF : BAREWORD);
7374                     if (!PL_lex_allbrackets
7375                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7376                     {
7377                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7378                     }
7379                     TOKEN(NOAMP);
7380                 }
7381
7382                 /* Call it a bare word */
7383
7384                 if (PL_hints & HINT_STRICT_SUBS)
7385                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7386                 else {
7387                 bareword:
7388                     /* after "print" and similar functions (corresponding to
7389                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7390                      * a filehandle should be subject to "strict subs".
7391                      * Likewise for the optional indirect-object argument to system
7392                      * or exec, which can't be a bareword */
7393                     if ((PL_last_lop_op == OP_PRINT
7394                             || PL_last_lop_op == OP_PRTF
7395                             || PL_last_lop_op == OP_SAY
7396                             || PL_last_lop_op == OP_SYSTEM
7397                             || PL_last_lop_op == OP_EXEC)
7398                             && (PL_hints & HINT_STRICT_SUBS))
7399                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7400                     if (lastchar != '-') {
7401                         if (ckWARN(WARN_RESERVED)) {
7402                             d = PL_tokenbuf;
7403                             while (isLOWER(*d))
7404                                 d++;
7405                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7406                             {
7407                                 /* PL_warn_reserved is constant */
7408                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7409                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7410                                        PL_tokenbuf);
7411                                 GCC_DIAG_RESTORE;
7412                             }
7413                         }
7414                     }
7415                 }
7416                 op_free(rv2cv_op);
7417
7418             safe_bareword:
7419                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7420                  && saw_infix_sigil) {
7421                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7422                                      "Operator or semicolon missing before %c%" UTF8f,
7423                                      lastchar,
7424                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7425                                               PL_tokenbuf));
7426                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7427                                      "Ambiguous use of %c resolved as operator %c",
7428                                      lastchar, lastchar);
7429                 }
7430                 TOKEN(BAREWORD);
7431             }
7432
7433         case KEY___FILE__:
7434             FUN0OP(
7435                 newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7436             );
7437
7438         case KEY___LINE__:
7439             FUN0OP(
7440                 newSVOP(OP_CONST, 0,
7441                     Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7442             );
7443
7444         case KEY___PACKAGE__:
7445             FUN0OP(
7446                 newSVOP(OP_CONST, 0,
7447                                         (PL_curstash
7448                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7449                                          : &PL_sv_undef))
7450             );
7451
7452         case KEY___DATA__:
7453         case KEY___END__: {
7454             GV *gv;
7455             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7456                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7457                                         ? PL_curstash
7458                                         : PL_defstash;
7459                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7460                 if (!isGV(gv))
7461                     gv_init(gv,stash,"DATA",4,0);
7462                 GvMULTI_on(gv);
7463                 if (!GvIO(gv))
7464                     GvIOp(gv) = newIO();
7465                 IoIFP(GvIOp(gv)) = PL_rsfp;
7466 #if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
7467                 {
7468                     const int fd = PerlIO_fileno(PL_rsfp);
7469                     if (fd >= 3) {
7470                         fcntl(fd,F_SETFD, FD_CLOEXEC);
7471                     }
7472                 }
7473 #endif
7474                 /* Mark this internal pseudo-handle as clean */
7475                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7476                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7477                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7478                 else
7479                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7480 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7481                 /* if the script was opened in binmode, we need to revert
7482                  * it to text mode for compatibility; but only iff it has CRs
7483                  * XXX this is a questionable hack at best. */
7484                 if (PL_bufend-PL_bufptr > 2
7485                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7486                 {
7487                     Off_t loc = 0;
7488                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7489                         loc = PerlIO_tell(PL_rsfp);
7490                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7491                     }
7492 #ifdef NETWARE
7493                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7494 #else
7495                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7496 #endif  /* NETWARE */
7497                         if (loc > 0)
7498                             PerlIO_seek(PL_rsfp, loc, 0);
7499                     }
7500                 }
7501 #endif
7502 #ifdef PERLIO_LAYERS
7503                 if (!IN_BYTES) {
7504                     if (UTF)
7505                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7506                 }
7507 #endif
7508                 PL_rsfp = NULL;
7509             }
7510             goto fake_eof;
7511         }
7512
7513         case KEY___SUB__:
7514             FUN0OP(CvCLONE(PL_compcv)
7515                         ? newOP(OP_RUNCV, 0)
7516                         : newPVOP(OP_RUNCV,0,NULL));
7517
7518         case KEY_AUTOLOAD:
7519         case KEY_DESTROY:
7520         case KEY_BEGIN:
7521         case KEY_UNITCHECK:
7522         case KEY_CHECK:
7523         case KEY_INIT:
7524         case KEY_END:
7525             if (PL_expect == XSTATE) {
7526                 s = PL_bufptr;
7527                 goto really_sub;
7528             }
7529             goto just_a_word;
7530
7531         case_KEY_CORE:
7532             {
7533                 STRLEN olen = len;
7534                 d = s;
7535                 s += 2;
7536                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7537                 if ((*s == ':' && s[1] == ':')
7538                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7539                 {
7540                     s = d;
7541                     len = olen;
7542                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7543                     goto just_a_word;
7544                 }
7545                 if (!tmp)
7546                     Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
7547                                       UTF8fARG(UTF, len, PL_tokenbuf));
7548                 if (tmp < 0)
7549                     tmp = -tmp;
7550                 else if (tmp == KEY_require || tmp == KEY_do
7551                       || tmp == KEY_glob)
7552                     /* that's a way to remember we saw "CORE::" */
7553                     orig_keyword = tmp;
7554                 goto reserved_word;
7555             }
7556
7557         case KEY_abs:
7558             UNI(OP_ABS);
7559
7560         case KEY_alarm:
7561             UNI(OP_ALARM);
7562
7563         case KEY_accept:
7564             LOP(OP_ACCEPT,XTERM);
7565
7566         case KEY_and:
7567             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7568                 return REPORT(0);
7569             OPERATOR(ANDOP);
7570
7571         case KEY_atan2:
7572             LOP(OP_ATAN2,XTERM);
7573
7574         case KEY_bind:
7575             LOP(OP_BIND,XTERM);
7576
7577         case KEY_binmode:
7578             LOP(OP_BINMODE,XTERM);
7579
7580         case KEY_bless:
7581             LOP(OP_BLESS,XTERM);
7582
7583         case KEY_break:
7584             FUN0(OP_BREAK);
7585
7586         case KEY_chop:
7587             UNI(OP_CHOP);
7588
7589         case KEY_continue:
7590                     /* We have to disambiguate the two senses of
7591                       "continue". If the next token is a '{' then
7592                       treat it as the start of a continue block;
7593                       otherwise treat it as a control operator.
7594                      */
7595                     s = skipspace(s);
7596                     if (*s == '{')
7597             PREBLOCK(CONTINUE);
7598                     else
7599                         FUN0(OP_CONTINUE);
7600
7601         case KEY_chdir:
7602             /* may use HOME */
7603             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7604             UNI(OP_CHDIR);
7605
7606         case KEY_close:
7607             UNI(OP_CLOSE);
7608
7609         case KEY_closedir:
7610             UNI(OP_CLOSEDIR);
7611
7612         case KEY_cmp:
7613             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7614                 return REPORT(0);
7615             Eop(OP_SCMP);
7616
7617         case KEY_caller:
7618             UNI(OP_CALLER);
7619
7620         case KEY_crypt:
7621 #ifdef FCRYPT
7622             if (!PL_cryptseen) {
7623                 PL_cryptseen = TRUE;
7624                 init_des();
7625             }
7626 #endif
7627             LOP(OP_CRYPT,XTERM);
7628
7629         case KEY_chmod:
7630             LOP(OP_CHMOD,XTERM);
7631
7632         case KEY_chown:
7633             LOP(OP_CHOWN,XTERM);
7634
7635         case KEY_connect:
7636             LOP(OP_CONNECT,XTERM);
7637
7638         case KEY_chr:
7639             UNI(OP_CHR);
7640
7641         case KEY_cos:
7642             UNI(OP_COS);
7643
7644         case KEY_chroot:
7645             UNI(OP_CHROOT);
7646
7647         case KEY_default:
7648             PREBLOCK(DEFAULT);
7649
7650         case KEY_do:
7651             s = skipspace(s);
7652             if (*s == '{')
7653                 PRETERMBLOCK(DO);
7654             if (*s != '\'') {
7655                 *PL_tokenbuf = '&';
7656                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7657                               1, &len);
7658                 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7659                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7660                     SSize_t off = s-SvPVX(PL_linestr);
7661                     d = skipspace(d);
7662                     s = SvPVX(PL_linestr)+off;
7663                     if (*d == '(') {
7664                         force_ident_maybe_lex('&');
7665                         s = d;
7666                     }
7667                 }
7668             }
7669             if (orig_keyword == KEY_do) {
7670                 orig_keyword = 0;
7671                 pl_yylval.ival = 1;
7672             }
7673             else
7674                 pl_yylval.ival = 0;
7675             OPERATOR(DO);
7676
7677         case KEY_die:
7678             PL_hints |= HINT_BLOCK_SCOPE;
7679             LOP(OP_DIE,XTERM);
7680
7681         case KEY_defined:
7682             UNI(OP_DEFINED);
7683
7684         case KEY_delete:
7685             UNI(OP_DELETE);
7686
7687         case KEY_dbmopen:
7688             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7689                               STR_WITH_LEN("NDBM_File::"),
7690                               STR_WITH_LEN("DB_File::"),
7691                               STR_WITH_LEN("GDBM_File::"),
7692                               STR_WITH_LEN("SDBM_File::"),
7693                               STR_WITH_LEN("ODBM_File::"),
7694                               NULL);
7695             LOP(OP_DBMOPEN,XTERM);
7696
7697         case KEY_dbmclose:
7698             UNI(OP_DBMCLOSE);
7699
7700         case KEY_dump:
7701             LOOPX(OP_DUMP);
7702
7703         case KEY_else:
7704             PREBLOCK(ELSE);
7705
7706         case KEY_elsif:
7707             pl_yylval.ival = CopLINE(PL_curcop);
7708             OPERATOR(ELSIF);
7709
7710         case KEY_eq:
7711             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7712                 return REPORT(0);
7713             Eop(OP_SEQ);
7714
7715         case KEY_exists:
7716             UNI(OP_EXISTS);
7717
7718         case KEY_exit:
7719             UNI(OP_EXIT);
7720
7721         case KEY_eval:
7722             s = skipspace(s);
7723             if (*s == '{') { /* block eval */
7724                 PL_expect = XTERMBLOCK;
7725                 UNIBRACK(OP_ENTERTRY);
7726             }
7727             else { /* string eval */
7728                 PL_expect = XTERM;
7729                 UNIBRACK(OP_ENTEREVAL);
7730             }
7731
7732         case KEY_evalbytes:
7733             PL_expect = XTERM;
7734             UNIBRACK(-OP_ENTEREVAL);
7735
7736         case KEY_eof:
7737             UNI(OP_EOF);
7738
7739         case KEY_exp:
7740             UNI(OP_EXP);
7741
7742         case KEY_each:
7743             UNI(OP_EACH);
7744
7745         case KEY_exec:
7746             LOP(OP_EXEC,XREF);
7747
7748         case KEY_endhostent:
7749             FUN0(OP_EHOSTENT);
7750
7751         case KEY_endnetent:
7752             FUN0(OP_ENETENT);
7753
7754         case KEY_endservent:
7755             FUN0(OP_ESERVENT);
7756
7757         case KEY_endprotoent:
7758             FUN0(OP_EPROTOENT);
7759
7760         case KEY_endpwent:
7761             FUN0(OP_EPWENT);
7762
7763         case KEY_endgrent:
7764             FUN0(OP_EGRENT);
7765
7766         case KEY_for:
7767         case KEY_foreach:
7768             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7769                 return REPORT(0);
7770             pl_yylval.ival = CopLINE(PL_curcop);
7771             s = skipspace(s);
7772             if (   PL_expect == XSTATE
7773                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
7774             {
7775                 char *p = s;
7776
7777                 if ((PL_bufend - p) >= 3
7778                     && strEQs(p, "my") && isSPACE(*(p + 2)))
7779                 {
7780                     p += 2;
7781                 }
7782                 else if ((PL_bufend - p) >= 4
7783                          && strEQs(p, "our") && isSPACE(*(p + 3)))
7784                     p += 3;
7785                 p = skipspace(p);
7786                 /* skip optional package name, as in "for my abc $x (..)" */
7787                 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
7788                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7789                     p = skipspace(p);
7790                 }
7791                 if (*p != '$' && *p != '\\')
7792                     Perl_croak(aTHX_ "Missing $ on loop variable");
7793             }
7794             OPERATOR(FOR);
7795
7796         case KEY_formline:
7797             LOP(OP_FORMLINE,XTERM);
7798
7799         case KEY_fork:
7800             FUN0(OP_FORK);
7801
7802         case KEY_fc:
7803             UNI(OP_FC);
7804
7805         case KEY_fcntl:
7806             LOP(OP_FCNTL,XTERM);
7807
7808         case KEY_fileno:
7809             UNI(OP_FILENO);
7810
7811         case KEY_flock:
7812             LOP(OP_FLOCK,XTERM);
7813
7814         case KEY_gt:
7815             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7816                 return REPORT(0);
7817             Rop(OP_SGT);
7818
7819         case KEY_ge:
7820             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7821                 return REPORT(0);
7822             Rop(OP_SGE);
7823
7824         case KEY_grep:
7825             LOP(OP_GREPSTART, XREF);
7826
7827         case KEY_goto:
7828             LOOPX(OP_GOTO);
7829
7830         case KEY_gmtime:
7831             UNI(OP_GMTIME);
7832
7833         case KEY_getc:
7834             UNIDOR(OP_GETC);
7835
7836         case KEY_getppid:
7837             FUN0(OP_GETPPID);
7838
7839         case KEY_getpgrp:
7840             UNI(OP_GETPGRP);
7841
7842         case KEY_getpriority:
7843             LOP(OP_GETPRIORITY,XTERM);
7844
7845         case KEY_getprotobyname:
7846             UNI(OP_GPBYNAME);
7847
7848         case KEY_getprotobynumber:
7849             LOP(OP_GPBYNUMBER,XTERM);
7850
7851         case KEY_getprotoent:
7852             FUN0(OP_GPROTOENT);
7853
7854         case KEY_getpwent:
7855             FUN0(OP_GPWENT);
7856
7857         case KEY_getpwnam:
7858             UNI(OP_GPWNAM);
7859
7860         case KEY_getpwuid:
7861             UNI(OP_GPWUID);
7862
7863         case KEY_getpeername:
7864             UNI(OP_GETPEERNAME);
7865
7866         case KEY_gethostbyname:
7867             UNI(OP_GHBYNAME);
7868
7869         case KEY_gethostbyaddr:
7870             LOP(OP_GHBYADDR,XTERM);
7871
7872         case KEY_gethostent:
7873             FUN0(OP_GHOSTENT);
7874
7875         case KEY_getnetbyname:
7876             UNI(OP_GNBYNAME);
7877
7878         case KEY_getnetbyaddr:
7879             LOP(OP_GNBYADDR,XTERM);
7880
7881         case KEY_getnetent:
7882             FUN0(OP_GNETENT);
7883
7884         case KEY_getservbyname:
7885             LOP(OP_GSBYNAME,XTERM);
7886
7887         case KEY_getservbyport:
7888             LOP(OP_GSBYPORT,XTERM);
7889
7890         case KEY_getservent:
7891             FUN0(OP_GSERVENT);
7892
7893         case KEY_getsockname:
7894             UNI(OP_GETSOCKNAME);
7895
7896         case KEY_getsockopt:
7897             LOP(OP_GSOCKOPT,XTERM);
7898
7899         case KEY_getgrent:
7900             FUN0(OP_GGRENT);
7901
7902         case KEY_getgrnam:
7903             UNI(OP_GGRNAM);
7904
7905         case KEY_getgrgid:
7906             UNI(OP_GGRGID);
7907
7908         case KEY_getlogin:
7909             FUN0(OP_GETLOGIN);
7910
7911         case KEY_given:
7912             pl_yylval.ival = CopLINE(PL_curcop);
7913             Perl_ck_warner_d(aTHX_
7914                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7915                 "given is experimental");
7916             OPERATOR(GIVEN);
7917
7918         case KEY_glob:
7919             LOP(
7920              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7921              XTERM
7922             );
7923
7924         case KEY_hex:
7925             UNI(OP_HEX);
7926
7927         case KEY_if:
7928             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7929                 return REPORT(0);
7930             pl_yylval.ival = CopLINE(PL_curcop);
7931             OPERATOR(IF);
7932
7933         case KEY_index:
7934             LOP(OP_INDEX,XTERM);
7935
7936         case KEY_int:
7937             UNI(OP_INT);
7938
7939         case KEY_ioctl:
7940             LOP(OP_IOCTL,XTERM);
7941
7942         case KEY_join:
7943             LOP(OP_JOIN,XTERM);
7944
7945         case KEY_keys:
7946             UNI(OP_KEYS);
7947
7948         case KEY_kill:
7949             LOP(OP_KILL,XTERM);
7950
7951         case KEY_last:
7952             LOOPX(OP_LAST);
7953
7954         case KEY_lc:
7955             UNI(OP_LC);
7956
7957         case KEY_lcfirst:
7958             UNI(OP_LCFIRST);
7959
7960         case KEY_local:
7961             OPERATOR(LOCAL);
7962
7963         case KEY_length:
7964             UNI(OP_LENGTH);
7965
7966         case KEY_lt:
7967             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7968                 return REPORT(0);
7969             Rop(OP_SLT);
7970
7971         case KEY_le:
7972             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7973                 return REPORT(0);
7974             Rop(OP_SLE);
7975
7976         case KEY_localtime:
7977             UNI(OP_LOCALTIME);
7978
7979         case KEY_log:
7980             UNI(OP_LOG);
7981
7982         case KEY_link:
7983             LOP(OP_LINK,XTERM);
7984
7985         case KEY_listen:
7986             LOP(OP_LISTEN,XTERM);
7987
7988         case KEY_lock:
7989             UNI(OP_LOCK);
7990
7991         case KEY_lstat:
7992             UNI(OP_LSTAT);
7993
7994         case KEY_m:
7995             s = scan_pat(s,OP_MATCH);
7996             TERM(sublex_start());
7997
7998         case KEY_map:
7999             LOP(OP_MAPSTART, XREF);
8000
8001         case KEY_mkdir:
8002             LOP(OP_MKDIR,XTERM);
8003
8004         case KEY_msgctl:
8005             LOP(OP_MSGCTL,XTERM);
8006
8007         case KEY_msgget:
8008             LOP(OP_MSGGET,XTERM);
8009
8010         case KEY_msgrcv:
8011             LOP(OP_MSGRCV,XTERM);
8012
8013         case KEY_msgsnd:
8014             LOP(OP_MSGSND,XTERM);
8015
8016         case KEY_our:
8017         case KEY_my:
8018         case KEY_state:
8019             if (PL_in_my) {
8020                 PL_bufptr = s;
8021                 yyerror(Perl_form(aTHX_
8022                                   "Can't redeclare \"%s\" in \"%s\"",
8023                                    tmp      == KEY_my    ? "my" :
8024                                    tmp      == KEY_state ? "state" : "our",
8025                                    PL_in_my == KEY_my    ? "my" :
8026                                    PL_in_my == KEY_state ? "state" : "our"));
8027             }
8028             PL_in_my = (U16)tmp;
8029             s = skipspace(s);
8030             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8031                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8032                 if (len == 3 && strEQs(PL_tokenbuf, "sub"))
8033                     goto really_sub;
8034                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8035                 if (!PL_in_my_stash) {
8036                     char tmpbuf[1024];
8037                     int len;
8038                     PL_bufptr = s;
8039                     len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8040                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
8041                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8042                 }
8043             }
8044             else if (*s == '\\') {
8045                 if (!FEATURE_MYREF_IS_ENABLED)
8046                     Perl_croak(aTHX_ "The experimental declared_refs "
8047                                      "feature is not enabled");
8048                 Perl_ck_warner_d(aTHX_
8049                      packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
8050                     "Declaring references is experimental");
8051             }
8052             OPERATOR(MY);
8053
8054         case KEY_next:
8055             LOOPX(OP_NEXT);
8056
8057         case KEY_ne:
8058             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8059                 return REPORT(0);
8060             Eop(OP_SNE);
8061
8062         case KEY_no:
8063             s = tokenize_use(0, s);
8064             TOKEN(USE);
8065
8066         case KEY_not:
8067             if (*s == '(' || (s = skipspace(s), *s == '('))
8068                 FUN1(OP_NOT);
8069             else {
8070                 if (!PL_lex_allbrackets
8071                     && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8072                 {
8073                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8074                 }
8075                 OPERATOR(NOTOP);
8076             }
8077
8078         case KEY_open:
8079             s = skipspace(s);
8080             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8081                 const char *t;
8082                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8083                               &len);
8084                 for (t=d; isSPACE(*t);)
8085                     t++;
8086                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8087                     /* [perl #16184] */
8088                     && !(t[0] == '=' && t[1] == '>')
8089                     && !(t[0] == ':' && t[1] == ':')
8090                     && !keyword(s, d-s, 0)
8091                 ) {
8092                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8093                        "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8094                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8095                 }
8096             }
8097             LOP(OP_OPEN,XTERM);
8098
8099         case KEY_or:
8100             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8101                 return REPORT(0);
8102             pl_yylval.ival = OP_OR;
8103             OPERATOR(OROP);
8104
8105         case KEY_ord:
8106             UNI(OP_ORD);
8107
8108         case KEY_oct:
8109             UNI(OP_OCT);
8110
8111         case KEY_opendir:
8112             LOP(OP_OPEN_DIR,XTERM);
8113
8114         case KEY_print:
8115             checkcomma(s,PL_tokenbuf,"filehandle");
8116             LOP(OP_PRINT,XREF);
8117
8118         case KEY_printf:
8119             checkcomma(s,PL_tokenbuf,"filehandle");
8120             LOP(OP_PRTF,XREF);
8121
8122         case KEY_prototype:
8123             UNI(OP_PROTOTYPE);
8124
8125         case KEY_push:
8126             LOP(OP_PUSH,XTERM);
8127
8128         case KEY_pop:
8129             UNIDOR(OP_POP);
8130
8131         case KEY_pos:
8132             UNIDOR(OP_POS);
8133
8134         case KEY_pack:
8135             LOP(OP_PACK,XTERM);
8136
8137         case KEY_package:
8138             s = force_word(s,BAREWORD,FALSE,TRUE);
8139             s = skipspace(s);
8140             s = force_strict_version(s);
8141             PREBLOCK(PACKAGE);
8142
8143         case KEY_pipe:
8144             LOP(OP_PIPE_OP,XTERM);
8145
8146         case KEY_q:
8147             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8148             if (!s)
8149                 missingterm(NULL);
8150             COPLINE_SET_FROM_MULTI_END;
8151             pl_yylval.ival = OP_CONST;
8152             TERM(sublex_start());
8153
8154         case KEY_quotemeta:
8155             UNI(OP_QUOTEMETA);
8156
8157         case KEY_qw: {
8158             OP *words = NULL;
8159             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8160             if (!s)
8161                 missingterm(NULL);
8162             COPLINE_SET_FROM_MULTI_END;
8163             PL_expect = XOPERATOR;
8164             if (SvCUR(PL_lex_stuff)) {
8165                 int warned_comma = !ckWARN(WARN_QW);
8166                 int warned_comment = warned_comma;
8167                 d = SvPV_force(PL_lex_stuff, len);
8168                 while (len) {
8169                     for (; isSPACE(*d) && len; --len, ++d)
8170                         /**/;
8171                     if (len) {
8172                         SV *sv;
8173                         const char *b = d;
8174                         if (!warned_comma || !warned_comment) {
8175                             for (; !isSPACE(*d) && len; --len, ++d) {
8176                                 if (!warned_comma && *d == ',') {
8177                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8178                                         "Possible attempt to separate words with commas");
8179                                     ++warned_comma;
8180                                 }
8181                                 else if (!warned_comment && *d == '#') {
8182                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8183                                         "Possible attempt to put comments in qw() list");
8184                                     ++warned_comment;
8185                                 }
8186                             }
8187                         }
8188                         else {
8189                             for (; !isSPACE(*d) && len; --len, ++d)
8190                                 /**/;
8191                         }
8192                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8193                         words = op_append_elem(OP_LIST, words,
8194                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8195                     }
8196                 }
8197             }
8198             if (!words)
8199                 words = newNULLLIST();
8200             SvREFCNT_dec_NN(PL_lex_stuff);
8201             PL_lex_stuff = NULL;
8202             PL_expect = XOPERATOR;
8203             pl_yylval.opval = sawparens(words);
8204             TOKEN(QWLIST);
8205         }
8206
8207         case KEY_qq:
8208             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8209             if (!s)
8210                 missingterm(NULL);
8211             pl_yylval.ival = OP_STRINGIFY;
8212             if (SvIVX(PL_lex_stuff) == '\'')
8213                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8214             TERM(sublex_start());
8215
8216         case KEY_qr:
8217             s = scan_pat(s,OP_QR);
8218             TERM(sublex_start());
8219
8220         case KEY_qx:
8221             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8222             if (!s)
8223                 missingterm(NULL);
8224             pl_yylval.ival = OP_BACKTICK;
8225             TERM(sublex_start());
8226
8227         case KEY_return:
8228             OLDLOP(OP_RETURN);
8229
8230         case KEY_require:
8231             s = skipspace(s);
8232             if (isDIGIT(*s)) {
8233                 s = force_version(s, FALSE);
8234             }
8235             else if (*s != 'v' || !isDIGIT(s[1])
8236                     || (s = force_version(s, TRUE), *s == 'v'))
8237             {
8238                 *PL_tokenbuf = '\0';
8239                 s = force_word(s,BAREWORD,TRUE,TRUE);
8240                 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
8241                                            PL_tokenbuf + sizeof(PL_tokenbuf),
8242                                            UTF))
8243                 {
8244                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8245                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8246                 }
8247                 else if (*s == '<')
8248                     yyerror("<> at require-statement should be quotes");
8249             }
8250             if (orig_keyword == KEY_require) {
8251                 orig_keyword = 0;
8252                 pl_yylval.ival = 1;
8253             }
8254             else
8255                 pl_yylval.ival = 0;
8256             PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8257             PL_bufptr = s;
8258             PL_last_uni = PL_oldbufptr;
8259             PL_last_lop_op = OP_REQUIRE;
8260             s = skipspace(s);
8261             return REPORT( (int)REQUIRE );
8262
8263         case KEY_reset:
8264             UNI(OP_RESET);
8265
8266         case KEY_redo:
8267             LOOPX(OP_REDO);
8268
8269         case KEY_rename:
8270             LOP(OP_RENAME,XTERM);
8271
8272         case KEY_rand:
8273             UNI(OP_RAND);
8274
8275         case KEY_rmdir:
8276             UNI(OP_RMDIR);
8277
8278         case KEY_rindex:
8279             LOP(OP_RINDEX,XTERM);
8280
8281         case KEY_read:
8282             LOP(OP_READ,XTERM);
8283
8284         case KEY_readdir:
8285             UNI(OP_READDIR);
8286
8287         case KEY_readline:
8288             UNIDOR(OP_READLINE);
8289
8290         case KEY_readpipe:
8291             UNIDOR(OP_BACKTICK);
8292
8293         case KEY_rewinddir:
8294             UNI(OP_REWINDDIR);
8295
8296         case KEY_recv:
8297             LOP(OP_RECV,XTERM);
8298
8299         case KEY_reverse:
8300             LOP(OP_REVERSE,XTERM);
8301
8302         case KEY_readlink:
8303             UNIDOR(OP_READLINK);
8304
8305         case KEY_ref:
8306             UNI(OP_REF);
8307
8308         case KEY_s:
8309             s = scan_subst(s);
8310             if (pl_yylval.opval)
8311                 TERM(sublex_start());
8312             else
8313                 TOKEN(1);       /* force error */
8314
8315         case KEY_say:
8316             checkcomma(s,PL_tokenbuf,"filehandle");
8317             LOP(OP_SAY,XREF);
8318
8319         case KEY_chomp:
8320             UNI(OP_CHOMP);
8321
8322         case KEY_scalar:
8323             UNI(OP_SCALAR);
8324
8325         case KEY_select:
8326             LOP(OP_SELECT,XTERM);
8327
8328         case KEY_seek:
8329             LOP(OP_SEEK,XTERM);
8330
8331         case KEY_semctl:
8332             LOP(OP_SEMCTL,XTERM);
8333
8334         case KEY_semget:
8335             LOP(OP_SEMGET,XTERM);
8336
8337         case KEY_semop:
8338             LOP(OP_SEMOP,XTERM);
8339
8340         case KEY_send:
8341             LOP(OP_SEND,XTERM);
8342
8343         case KEY_setpgrp:
8344             LOP(OP_SETPGRP,XTERM);
8345
8346         case KEY_setpriority:
8347             LOP(OP_SETPRIORITY,XTERM);
8348
8349         case KEY_sethostent:
8350             UNI(OP_SHOSTENT);
8351
8352         case KEY_setnetent:
8353             UNI(OP_SNETENT);
8354
8355         case KEY_setservent:
8356             UNI(OP_SSERVENT);
8357
8358         case KEY_setprotoent:
8359             UNI(OP_SPROTOENT);
8360
8361         case KEY_setpwent:
8362             FUN0(OP_SPWENT);
8363
8364         case KEY_setgrent:
8365             FUN0(OP_SGRENT);
8366
8367         case KEY_seekdir:
8368             LOP(OP_SEEKDIR,XTERM);
8369
8370         case KEY_setsockopt:
8371             LOP(OP_SSOCKOPT,XTERM);
8372
8373         case KEY_shift:
8374             UNIDOR(OP_SHIFT);
8375
8376         case KEY_shmctl:
8377             LOP(OP_SHMCTL,XTERM);
8378
8379         case KEY_shmget:
8380             LOP(OP_SHMGET,XTERM);
8381
8382         case KEY_shmread:
8383             LOP(OP_SHMREAD,XTERM);
8384
8385         case KEY_shmwrite:
8386             LOP(OP_SHMWRITE,XTERM);
8387
8388         case KEY_shutdown:
8389             LOP(OP_SHUTDOWN,XTERM);
8390
8391         case KEY_sin:
8392             UNI(OP_SIN);
8393
8394         case KEY_sleep:
8395             UNI(OP_SLEEP);
8396
8397         case KEY_socket:
8398             LOP(OP_SOCKET,XTERM);
8399
8400         case KEY_socketpair:
8401             LOP(OP_SOCKPAIR,XTERM);
8402
8403         case KEY_sort:
8404             checkcomma(s,PL_tokenbuf,"subroutine name");
8405             s = skipspace(s);
8406             PL_expect = XTERM;
8407             s = force_word(s,BAREWORD,TRUE,TRUE);
8408             LOP(OP_SORT,XREF);
8409
8410         case KEY_split:
8411             LOP(OP_SPLIT,XTERM);
8412
8413         case KEY_sprintf:
8414             LOP(OP_SPRINTF,XTERM);
8415
8416         case KEY_splice:
8417             LOP(OP_SPLICE,XTERM);
8418
8419         case KEY_sqrt:
8420             UNI(OP_SQRT);
8421
8422         case KEY_srand:
8423             UNI(OP_SRAND);
8424
8425         case KEY_stat:
8426             UNI(OP_STAT);
8427
8428         case KEY_study:
8429             UNI(OP_STUDY);
8430
8431         case KEY_substr:
8432             LOP(OP_SUBSTR,XTERM);
8433
8434         case KEY_format:
8435         case KEY_sub:
8436           really_sub:
8437             {
8438                 char * const tmpbuf = PL_tokenbuf + 1;
8439                 expectation attrful;
8440                 bool have_name, have_proto;
8441                 const int key = tmp;
8442                 SV *format_name = NULL;
8443
8444                 SSize_t off = s-SvPVX(PL_linestr);
8445                 s = skipspace(s);
8446                 d = SvPVX(PL_linestr)+off;
8447
8448                 if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
8449                     || *s == '\''
8450                     || (*s == ':' && s[1] == ':'))
8451                 {
8452
8453                     PL_expect = XBLOCK;
8454                     attrful = XATTRBLOCK;
8455                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8456                                   &len);
8457                     if (key == KEY_format)
8458                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8459                     *PL_tokenbuf = '&';
8460                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8461                      || pad_findmy_pvn(
8462                             PL_tokenbuf, len + 1, 0
8463                         ) != NOT_IN_PAD)
8464                         sv_setpvn(PL_subname, tmpbuf, len);
8465                     else {
8466                         sv_setsv(PL_subname,PL_curstname);
8467                         sv_catpvs(PL_subname,"::");
8468                         sv_catpvn(PL_subname,tmpbuf,len);
8469                     }
8470                     if (SvUTF8(PL_linestr))
8471                         SvUTF8_on(PL_subname);
8472                     have_name = TRUE;
8473
8474
8475                     s = skipspace(d);
8476                 }
8477                 else {
8478                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8479                     {
8480                         *d = '\0';
8481                         /* diag_listed_as: Missing name in "%s sub" */
8482                         Perl_croak(aTHX_
8483                                   "Missing name in \"%s\"", PL_bufptr);
8484                     }
8485                     PL_expect = XTERMBLOCK;
8486                     attrful = XATTRTERM;
8487                     sv_setpvs(PL_subname,"?");
8488                     have_name = FALSE;
8489                 }
8490
8491                 if (key == KEY_format) {
8492                     if (format_name) {
8493                         NEXTVAL_NEXTTOKE.opval
8494                             = newSVOP(OP_CONST,0, format_name);
8495                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8496                         force_next(BAREWORD);
8497                     }
8498                     PREBLOCK(FORMAT);
8499                 }
8500
8501                 /* Look for a prototype */
8502                 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8503                     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8504                     COPLINE_SET_FROM_MULTI_END;
8505                     if (!s)
8506                         Perl_croak(aTHX_ "Prototype not terminated");
8507                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8508                     have_proto = TRUE;
8509
8510                     s = skipspace(s);
8511                 }
8512                 else
8513                     have_proto = FALSE;
8514
8515                 if (*s == ':' && s[1] != ':')
8516                     PL_expect = attrful;
8517                 else if ((*s != '{' && *s != '(') && key != KEY_format) {
8518                     assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8519                            key == KEY_DESTROY || key == KEY_BEGIN ||
8520                            key == KEY_UNITCHECK || key == KEY_CHECK ||
8521                            key == KEY_INIT || key == KEY_END ||
8522                            key == KEY_my || key == KEY_state ||
8523                            key == KEY_our);
8524                     if (!have_name)
8525                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8526                     else if (*s != ';' && *s != '}')
8527                         Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
8528                 }
8529
8530                 if (have_proto) {
8531                     NEXTVAL_NEXTTOKE.opval =
8532                         newSVOP(OP_CONST, 0, PL_lex_stuff);
8533                     PL_lex_stuff = NULL;
8534                     force_next(THING);
8535                 }
8536                 if (!have_name) {
8537                     if (PL_curstash)
8538                         sv_setpvs(PL_subname, "__ANON__");
8539                     else
8540                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8541                     TOKEN(ANONSUB);
8542                 }
8543                 force_ident_maybe_lex('&');
8544                 TOKEN(SUB);
8545             }
8546
8547         case KEY_system:
8548             LOP(OP_SYSTEM,XREF);
8549
8550         case KEY_symlink:
8551             LOP(OP_SYMLINK,XTERM);
8552
8553         case KEY_syscall:
8554             LOP(OP_SYSCALL,XTERM);
8555
8556         case KEY_sysopen:
8557             LOP(OP_SYSOPEN,XTERM);
8558
8559         case KEY_sysseek:
8560             LOP(OP_SYSSEEK,XTERM);
8561
8562         case KEY_sysread:
8563             LOP(OP_SYSREAD,XTERM);
8564
8565         case KEY_syswrite:
8566             LOP(OP_SYSWRITE,XTERM);
8567
8568         case KEY_tr:
8569         case KEY_y:
8570             s = scan_trans(s);
8571             TERM(sublex_start());
8572
8573         case KEY_tell:
8574             UNI(OP_TELL);
8575
8576         case KEY_telldir:
8577             UNI(OP_TELLDIR);
8578
8579         case KEY_tie:
8580             LOP(OP_TIE,XTERM);
8581
8582         case KEY_tied:
8583             UNI(OP_TIED);
8584
8585         case KEY_time:
8586             FUN0(OP_TIME);
8587
8588         case KEY_times:
8589             FUN0(OP_TMS);
8590
8591         case KEY_truncate:
8592             LOP(OP_TRUNCATE,XTERM);
8593
8594         case KEY_uc:
8595             UNI(OP_UC);
8596
8597         case KEY_ucfirst:
8598             UNI(OP_UCFIRST);
8599
8600         case KEY_untie:
8601             UNI(OP_UNTIE);
8602
8603         case KEY_until:
8604             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8605                 return REPORT(0);
8606             pl_yylval.ival = CopLINE(PL_curcop);
8607             OPERATOR(UNTIL);
8608
8609         case KEY_unless:
8610             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8611                 return REPORT(0);
8612             pl_yylval.ival = CopLINE(PL_curcop);
8613             OPERATOR(UNLESS);
8614
8615         case KEY_unlink:
8616             LOP(OP_UNLINK,XTERM);
8617
8618         case KEY_undef:
8619             UNIDOR(OP_UNDEF);
8620
8621         case KEY_unpack:
8622             LOP(OP_UNPACK,XTERM);
8623
8624         case KEY_utime:
8625             LOP(OP_UTIME,XTERM);
8626
8627         case KEY_umask:
8628             UNIDOR(OP_UMASK);
8629
8630         case KEY_unshift:
8631             LOP(OP_UNSHIFT,XTERM);
8632
8633         case KEY_use:
8634             s = tokenize_use(1, s);
8635             TOKEN(USE);
8636
8637         case KEY_values:
8638             UNI(OP_VALUES);
8639
8640         case KEY_vec:
8641             LOP(OP_VEC,XTERM);
8642
8643         case KEY_when:
8644             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8645                 return REPORT(0);
8646             pl_yylval.ival = CopLINE(PL_curcop);
8647             Perl_ck_warner_d(aTHX_
8648                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8649                 "when is experimental");
8650             OPERATOR(WHEN);
8651
8652         case KEY_while:
8653             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8654                 return REPORT(0);
8655             pl_yylval.ival = CopLINE(PL_curcop);
8656             OPERATOR(WHILE);
8657
8658         case KEY_warn:
8659             PL_hints |= HINT_BLOCK_SCOPE;
8660             LOP(OP_WARN,XTERM);
8661
8662         case KEY_wait:
8663             FUN0(OP_WAIT);
8664
8665         case KEY_waitpid:
8666             LOP(OP_WAITPID,XTERM);
8667
8668         case KEY_wantarray:
8669             FUN0(OP_WANTARRAY);
8670
8671         case KEY_write:
8672             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8673              * we use the same number on EBCDIC */
8674             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8675             UNI(OP_ENTERWRITE);
8676
8677         case KEY_x:
8678             if (PL_expect == XOPERATOR) {
8679                 if (*s == '=' && !PL_lex_allbrackets
8680                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8681                 {
8682                     return REPORT(0);
8683                 }
8684                 Mop(OP_REPEAT);
8685             }
8686             check_uni();
8687             goto just_a_word;
8688
8689         case KEY_xor:
8690             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8691                 return REPORT(0);
8692             pl_yylval.ival = OP_XOR;
8693             OPERATOR(OROP);
8694         }
8695     }}
8696 }
8697
8698 /*
8699   S_pending_ident
8700
8701   Looks up an identifier in the pad or in a package
8702
8703   is_sig indicates that this is a subroutine signature variable
8704   rather than a plain pad var.
8705
8706   Returns:
8707     PRIVATEREF if this is a lexical name.
8708     BAREWORD   if this belongs to a package.
8709
8710   Structure:
8711       if we're in a my declaration
8712           croak if they tried to say my($foo::bar)
8713           build the ops for a my() declaration
8714       if it's an access to a my() variable
8715           build ops for access to a my() variable
8716       if in a dq string, and they've said @foo and we can't find @foo
8717           warn
8718       build ops for a bareword
8719 */
8720
8721 static int
8722 S_pending_ident(pTHX)
8723 {
8724     PADOFFSET tmp = 0;
8725     const char pit = (char)pl_yylval.ival;
8726     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8727     /* All routes through this function want to know if there is a colon.  */
8728     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8729
8730     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8731           "### Pending identifier '%s'\n", PL_tokenbuf); });
8732
8733     /* if we're in a my(), we can't allow dynamics here.
8734        $foo'bar has already been turned into $foo::bar, so
8735        just check for colons.
8736
8737        if it's a legal name, the OP is a PADANY.
8738     */
8739     if (PL_in_my) {
8740         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8741             if (has_colon)
8742                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8743                                   "variable %s in \"our\"",
8744                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8745             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8746         }
8747         else {
8748             OP *o;
8749             if (has_colon) {
8750                 /* "my" variable %s can't be in a package */
8751                 /* PL_no_myglob is constant */
8752                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8753                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8754                             PL_in_my == KEY_my ? "my" : "state",
8755                             *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8756                             PL_tokenbuf),
8757                             UTF ? SVf_UTF8 : 0);
8758                 GCC_DIAG_RESTORE;
8759             }
8760
8761             if (PL_in_my == KEY_sigvar) {
8762                 /* A signature 'padop' needs in addition, an op_first to
8763                  * point to a child sigdefelem, and an extra field to hold
8764                  * the signature index. We can achieve both by using an
8765                  * UNOP_AUX and (ab)using the op_aux field to hold the
8766                  * index. If we ever need more fields, use a real malloced
8767                  * aux strut instead.
8768                  */
8769                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
8770                                     INT2PTR(UNOP_AUX_item *,
8771                                         (PL_parser->sig_elems)));
8772                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
8773                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
8774                                   :                         OPpARGELEM_HV);
8775             }
8776             else
8777                 o = newOP(OP_PADANY, 0);
8778             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8779                                                         UTF ? SVf_UTF8 : 0);
8780             if (PL_in_my == KEY_sigvar)
8781                 PL_in_my = 0;
8782
8783             pl_yylval.opval = o;
8784             return PRIVATEREF;
8785         }
8786     }
8787
8788     /*
8789        build the ops for accesses to a my() variable.
8790     */
8791
8792     if (!has_colon) {
8793         if (!PL_in_my)
8794             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8795                                  0);
8796         if (tmp != NOT_IN_PAD) {
8797             /* might be an "our" variable" */
8798             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8799                 /* build ops for a bareword */
8800                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8801                 HEK * const stashname = HvNAME_HEK(stash);
8802                 SV *  const sym = newSVhek(stashname);
8803                 sv_catpvs(sym, "::");
8804                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8805                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
8806                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8807                 if (pit != '&')
8808                   gv_fetchsv(sym,
8809                     GV_ADDMULTI,
8810                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8811                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8812                      : SVt_PVHV));
8813                 return BAREWORD;
8814             }
8815
8816             pl_yylval.opval = newOP(OP_PADANY, 0);
8817             pl_yylval.opval->op_targ = tmp;
8818             return PRIVATEREF;
8819         }
8820     }
8821
8822     /*
8823        Whine if they've said @foo or @foo{key} in a doublequoted string,
8824        and @foo (or %foo) isn't a variable we can find in the symbol
8825        table.
8826     */
8827     if (ckWARN(WARN_AMBIGUOUS)
8828         && pit == '@'
8829         && PL_lex_state != LEX_NORMAL
8830         && !PL_lex_brackets)
8831     {
8832         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8833                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
8834                                          SVt_PVAV);
8835         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8836            )
8837         {
8838             /* Downgraded from fatal to warning 20000522 mjd */
8839             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8840                         "Possible unintended interpolation of %" UTF8f
8841                         " in string",
8842                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8843         }
8844     }
8845
8846     /* build ops for a bareword */
8847     pl_yylval.opval = newSVOP(OP_CONST, 0,
8848                                    newSVpvn_flags(PL_tokenbuf + 1,
8849                                                       tokenbuf_len - 1,
8850                                                       UTF ? SVf_UTF8 : 0 ));
8851     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8852     if (pit != '&')
8853         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8854                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8855                      | ( UTF ? SVf_UTF8 : 0 ),
8856                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8857                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8858                       : SVt_PVHV));
8859     return BAREWORD;
8860 }
8861
8862 STATIC void
8863 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8864 {
8865     PERL_ARGS_ASSERT_CHECKCOMMA;
8866
8867     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8868         if (ckWARN(WARN_SYNTAX)) {
8869             int level = 1;
8870             const char *w;
8871             for (w = s+2; *w && level; w++) {
8872                 if (*w == '(')
8873                     ++level;
8874                 else if (*w == ')')
8875                     --level;
8876             }
8877             while (isSPACE(*w))
8878                 ++w;
8879             /* the list of chars below is for end of statements or
8880              * block / parens, boolean operators (&&, ||, //) and branch
8881              * constructs (or, and, if, until, unless, while, err, for).
8882              * Not a very solid hack... */
8883             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8884                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8885                             "%s (...) interpreted as function",name);
8886         }
8887     }
8888     while (s < PL_bufend && isSPACE(*s))
8889         s++;
8890     if (*s == '(')
8891         s++;
8892     while (s < PL_bufend && isSPACE(*s))
8893         s++;
8894     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8895         const char * const w = s;
8896         s += UTF ? UTF8SKIP(s) : 1;
8897         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
8898             s += UTF ? UTF8SKIP(s) : 1;
8899         while (s < PL_bufend && isSPACE(*s))
8900             s++;
8901         if (*s == ',') {
8902             GV* gv;
8903             PADOFFSET off;
8904             if (keyword(w, s - w, 0))
8905                 return;
8906
8907             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8908             if (gv && GvCVu(gv))
8909                 return;
8910             if (s - w <= 254) {
8911                 char tmpbuf[256];
8912                 Copy(w, tmpbuf+1, s - w, char);
8913                 *tmpbuf = '&';
8914                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
8915                 if (off != NOT_IN_PAD) return;
8916             }
8917             Perl_croak(aTHX_ "No comma allowed after %s", what);
8918         }
8919     }
8920 }
8921
8922 /* S_new_constant(): do any overload::constant lookup.
8923
8924    Either returns sv, or mortalizes/frees sv and returns a new SV*.
8925    Best used as sv=new_constant(..., sv, ...).
8926    If s, pv are NULL, calls subroutine with one argument,
8927    and <type> is used with error messages only.
8928    <type> is assumed to be well formed UTF-8 */
8929
8930 STATIC SV *
8931 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8932                SV *sv, SV *pv, const char *type, STRLEN typelen)
8933 {
8934     dSP;
8935     HV * table = GvHV(PL_hintgv);                /* ^H */
8936     SV *res;
8937     SV *errsv = NULL;
8938     SV **cvp;
8939     SV *cv, *typesv;
8940     const char *why1 = "", *why2 = "", *why3 = "";
8941
8942     PERL_ARGS_ASSERT_NEW_CONSTANT;
8943     /* We assume that this is true: */
8944     if (*key == 'c') { assert (strEQ(key, "charnames")); }
8945     assert(type || s);
8946
8947     /* charnames doesn't work well if there have been errors found */
8948     if (PL_error_count > 0 && *key == 'c')
8949     {
8950         SvREFCNT_dec_NN(sv);
8951         return &PL_sv_undef;
8952     }
8953
8954     sv_2mortal(sv);                     /* Parent created it permanently */
8955     if (!table
8956         || ! (PL_hints & HINT_LOCALIZE_HH)
8957         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8958         || ! SvOK(*cvp))
8959     {
8960         char *msg;
8961
8962         /* Here haven't found what we're looking for.  If it is charnames,
8963          * perhaps it needs to be loaded.  Try doing that before giving up */
8964         if (*key == 'c') {
8965             Perl_load_module(aTHX_
8966                             0,
8967                             newSVpvs("_charnames"),
8968                              /* version parameter; no need to specify it, as if
8969                               * we get too early a version, will fail anyway,
8970                               * not being able to find '_charnames' */
8971                             NULL,
8972                             newSVpvs(":full"),
8973                             newSVpvs(":short"),
8974                             NULL);
8975             assert(sp == PL_stack_sp);
8976             table = GvHV(PL_hintgv);
8977             if (table
8978                 && (PL_hints & HINT_LOCALIZE_HH)
8979                 && (cvp = hv_fetch(table, key, keylen, FALSE))
8980                 && SvOK(*cvp))
8981             {
8982                 goto now_ok;
8983             }
8984         }
8985         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8986             msg = Perl_form(aTHX_
8987                                "Constant(%.*s) unknown",
8988                                 (int)(type ? typelen : len),
8989                                 (type ? type: s));
8990         }
8991         else {
8992             why1 = "$^H{";
8993             why2 = key;
8994             why3 = "} is not defined";
8995         report:
8996             if (*key == 'c') {
8997                 msg = Perl_form(aTHX_
8998                             /* The +3 is for '\N{'; -4 for that, plus '}' */
8999                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9000                       );
9001             }
9002             else {
9003                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9004                                     (int)(type ? typelen : len),
9005                                     (type ? type: s), why1, why2, why3);
9006             }
9007         }
9008         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9009         return SvREFCNT_inc_simple_NN(sv);
9010     }
9011   now_ok:
9012     cv = *cvp;
9013     if (!pv && s)
9014         pv = newSVpvn_flags(s, len, SVs_TEMP);
9015     if (type && pv)
9016         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9017     else
9018         typesv = &PL_sv_undef;
9019
9020     PUSHSTACKi(PERLSI_OVERLOAD);
9021     ENTER ;
9022     SAVETMPS;
9023
9024     PUSHMARK(SP) ;
9025     EXTEND(sp, 3);
9026     if (pv)
9027         PUSHs(pv);
9028     PUSHs(sv);
9029     if (pv)
9030         PUSHs(typesv);
9031     PUTBACK;
9032     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9033
9034     SPAGAIN ;
9035
9036     /* Check the eval first */
9037     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9038         STRLEN errlen;
9039         const char * errstr;
9040         sv_catpvs(errsv, "Propagated");
9041         errstr = SvPV_const(errsv, errlen);
9042         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9043         (void)POPs;
9044         res = SvREFCNT_inc_simple_NN(sv);
9045     }
9046     else {
9047         res = POPs;
9048         SvREFCNT_inc_simple_void_NN(res);
9049     }
9050
9051     PUTBACK ;
9052     FREETMPS ;
9053     LEAVE ;
9054     POPSTACK;
9055
9056     if (!SvOK(res)) {
9057         why1 = "Call to &{$^H{";
9058         why2 = key;
9059         why3 = "}} did not return a defined value";
9060         sv = res;
9061         (void)sv_2mortal(sv);
9062         goto report;
9063     }
9064
9065     return res;
9066 }
9067
9068 PERL_STATIC_INLINE void
9069 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9070                     bool is_utf8, bool check_dollar)
9071 {
9072     PERL_ARGS_ASSERT_PARSE_IDENT;
9073
9074     while (*s < PL_bufend) {
9075         if (*d >= e)
9076             Perl_croak(aTHX_ "%s", ident_too_long);
9077         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9078              /* The UTF-8 case must come first, otherwise things
9079              * like c\N{COMBINING TILDE} would start failing, as the
9080              * isWORDCHAR_A case below would gobble the 'c' up.
9081              */
9082
9083             char *t = *s + UTF8SKIP(*s);
9084             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9085                 t += UTF8SKIP(t);
9086             }
9087             if (*d + (t - *s) > e)
9088                 Perl_croak(aTHX_ "%s", ident_too_long);
9089             Copy(*s, *d, t - *s, char);
9090             *d += t - *s;
9091             *s = t;
9092         }
9093         else if ( isWORDCHAR_A(**s) ) {
9094             do {
9095                 *(*d)++ = *(*s)++;
9096             } while (isWORDCHAR_A(**s) && *d < e);
9097         }
9098         else if (   allow_package
9099                  && **s == '\''
9100                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9101         {
9102             *(*d)++ = ':';
9103             *(*d)++ = ':';
9104             (*s)++;
9105         }
9106         else if (allow_package && **s == ':' && (*s)[1] == ':'
9107            /* Disallow things like Foo::$bar. For the curious, this is
9108             * the code path that triggers the "Bad name after" warning
9109             * when looking for barewords.
9110             */
9111            && !(check_dollar && (*s)[2] == '$')) {
9112             *(*d)++ = *(*s)++;
9113             *(*d)++ = *(*s)++;
9114         }
9115         else
9116             break;
9117     }
9118     return;
9119 }
9120
9121 /* Returns a NUL terminated string, with the length of the string written to
9122    *slp
9123    */
9124 STATIC char *
9125 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9126 {
9127     char *d = dest;
9128     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9129     bool is_utf8 = cBOOL(UTF);
9130
9131     PERL_ARGS_ASSERT_SCAN_WORD;
9132
9133     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE);
9134     *d = '\0';
9135     *slp = d - dest;
9136     return s;
9137 }
9138
9139 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9140  * iff Unicode semantics are to be used.  The legal ones are any of:
9141  *  a) all ASCII characters except:
9142  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9143  *          2) '{'
9144  *     The final case currently doesn't get this far in the program, so we
9145  *     don't test for it.  If that were to change, it would be ok to allow it.
9146  *  b) When not under Unicode rules, any upper Latin1 character
9147  *  c) Otherwise, when unicode rules are used, all XIDS characters.
9148  *
9149  *      Because all ASCII characters have the same representation whether
9150  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9151  *      '{' without knowing if is UTF-8 or not. */
9152 #define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
9153     (isGRAPH_A(*(s)) || ((is_utf8)                                          \
9154                          ? isIDFIRST_utf8_safe(s, e)                        \
9155                          : (isGRAPH_L1(*s)                                  \
9156                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9157
9158 STATIC char *
9159 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9160 {
9161     I32 herelines = PL_parser->herelines;
9162     SSize_t bracket = -1;
9163     char funny = *s++;
9164     char *d = dest;
9165     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9166     bool is_utf8 = cBOOL(UTF);
9167     I32 orig_copline = 0, tmp_copline = 0;
9168
9169     PERL_ARGS_ASSERT_SCAN_IDENT;
9170
9171     if (isSPACE(*s) || !*s)
9172         s = skipspace(s);
9173     if (isDIGIT(*s)) {
9174         while (isDIGIT(*s)) {
9175             if (d >= e)
9176                 Perl_croak(aTHX_ "%s", ident_too_long);
9177             *d++ = *s++;
9178         }
9179     }
9180     else {  /* See if it is a "normal" identifier */
9181         parse_ident(&s, &d, e, 1, is_utf8, FALSE);
9182     }
9183     *d = '\0';
9184     d = dest;
9185     if (*d) {
9186         /* Either a digit variable, or parse_ident() found an identifier
9187            (anything valid as a bareword), so job done and return.  */
9188         if (PL_lex_state != LEX_NORMAL)
9189             PL_lex_state = LEX_INTERPENDMAYBE;
9190         return s;
9191     }
9192
9193     /* Here, it is not a run-of-the-mill identifier name */
9194
9195     if (*s == '$' && s[1]
9196         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9197             || isDIGIT_A((U8)s[1])
9198             || s[1] == '$'
9199             || s[1] == '{'
9200             || strEQs(s+1,"::")) )
9201     {
9202         /* Dereferencing a value in a scalar variable.
9203            The alternatives are different syntaxes for a scalar variable.
9204            Using ' as a leading package separator isn't allowed. :: is.   */
9205         return s;
9206     }
9207     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9208     if (*s == '{') {
9209         bracket = s - SvPVX(PL_linestr);
9210         s++;
9211         orig_copline = CopLINE(PL_curcop);
9212         if (s < PL_bufend && isSPACE(*s)) {
9213             s = skipspace(s);
9214         }
9215     }
9216     if ((s <= PL_bufend - (is_utf8)
9217                           ? UTF8SKIP(s)
9218                           : 1)
9219         && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9220     {
9221         if (is_utf8) {
9222             const STRLEN skip = UTF8SKIP(s);
9223             STRLEN i;
9224             d[skip] = '\0';
9225             for ( i = 0; i < skip; i++ )
9226                 d[i] = *s++;
9227         }
9228         else {
9229             *d = *s++;
9230             d[1] = '\0';
9231         }
9232     }
9233     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9234     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9235         *d = toCTRL(*s);
9236         s++;
9237     }
9238     /* Warn about ambiguous code after unary operators if {...} notation isn't
9239        used.  There's no difference in ambiguity; it's merely a heuristic
9240        about when not to warn.  */
9241     else if (ck_uni && bracket == -1)
9242         check_uni();
9243     if (bracket != -1) {
9244         bool skip;
9245         char *s2;
9246         /* If we were processing {...} notation then...  */
9247         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)) {
9248             /* if it starts as a valid identifier, assume that it is one.
9249                (the later check for } being at the expected point will trap
9250                cases where this doesn't pan out.)  */
9251             d += is_utf8 ? UTF8SKIP(d) : 1;
9252             parse_ident(&s, &d, e, 1, is_utf8, TRUE);
9253             *d = '\0';
9254             tmp_copline = CopLINE(PL_curcop);
9255             if (s < PL_bufend && isSPACE(*s)) {
9256                 s = skipspace(s);
9257             }
9258             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9259                 /* ${foo[0]} and ${foo{bar}} notation.  */
9260                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9261                     const char * const brack =
9262                         (const char *)
9263                         ((*s == '[') ? "[...]" : "{...}");
9264                     orig_copline = CopLINE(PL_curcop);
9265                     CopLINE_set(PL_curcop, tmp_copline);
9266    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9267                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9268                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9269                         funny, dest, brack, funny, dest, brack);
9270                     CopLINE_set(PL_curcop, orig_copline);
9271                 }
9272                 bracket++;
9273                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9274                 PL_lex_allbrackets++;
9275                 return s;
9276             }
9277         }
9278         /* Handle extended ${^Foo} variables
9279          * 1999-02-27 mjd-perl-patch@plover.com */
9280         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9281                  && isWORDCHAR(*s))
9282         {
9283             d++;
9284             while (isWORDCHAR(*s) && d < e) {
9285                 *d++ = *s++;
9286             }
9287             if (d >= e)
9288                 Perl_croak(aTHX_ "%s", ident_too_long);
9289             *d = '\0';
9290         }
9291
9292         if ( !tmp_copline )
9293             tmp_copline = CopLINE(PL_curcop);
9294         if ((skip = s < PL_bufend && isSPACE(*s)))
9295             /* Avoid incrementing line numbers or resetting PL_linestart,
9296                in case we have to back up.  */
9297             s2 = peekspace(s);
9298         else
9299             s2 = s;
9300
9301         /* Expect to find a closing } after consuming any trailing whitespace.
9302          */
9303         if (*s2 == '}') {
9304             /* Now increment line numbers if applicable.  */
9305             if (skip)
9306                 s = skipspace(s);
9307             s++;
9308             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9309                 PL_lex_state = LEX_INTERPEND;
9310                 PL_expect = XREF;
9311             }
9312             if (PL_lex_state == LEX_NORMAL) {
9313                 if (ckWARN(WARN_AMBIGUOUS)
9314                     && (keyword(dest, d - dest, 0)
9315                         || get_cvn_flags(dest, d - dest, is_utf8
9316                            ? SVf_UTF8
9317                            : 0)))
9318                 {
9319                     SV *tmp = newSVpvn_flags( dest, d - dest,
9320                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9321                     if (funny == '#')
9322                         funny = '@';
9323                     orig_copline = CopLINE(PL_curcop);
9324                     CopLINE_set(PL_curcop, tmp_copline);
9325                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9326                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
9327                         funny, SVfARG(tmp), funny, SVfARG(tmp));
9328                     CopLINE_set(PL_curcop, orig_copline);
9329                 }
9330             }
9331         }
9332         else {
9333             /* Didn't find the closing } at the point we expected, so restore
9334                state such that the next thing to process is the opening { and */
9335             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9336             CopLINE_set(PL_curcop, orig_copline);
9337             PL_parser->herelines = herelines;
9338             *dest = '\0';
9339         }
9340     }
9341     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9342         PL_lex_state = LEX_INTERPEND;
9343     return s;
9344 }
9345
9346 static bool
9347 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9348
9349     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9350      * found in the parse starting at 's', based on the subset that are valid
9351      * in this context input to this routine in 'valid_flags'. Advances s.
9352      * Returns TRUE if the input should be treated as a valid flag, so the next
9353      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9354      * upon first call on the current regex.  This routine will set it to any
9355      * charset modifier found.  The caller shouldn't change it.  This way,
9356      * another charset modifier encountered in the parse can be detected as an
9357      * error, as we have decided to allow only one */
9358
9359     const char c = **s;
9360     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9361
9362     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9363         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
9364             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9365                        UTF ? SVf_UTF8 : 0);
9366             (*s) += charlen;
9367             /* Pretend that it worked, so will continue processing before
9368              * dieing */
9369             return TRUE;
9370         }
9371         return FALSE;
9372     }
9373
9374     switch (c) {
9375
9376         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9377         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9378         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9379         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9380         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9381         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9382         case LOCALE_PAT_MOD:
9383             if (*charset) {
9384                 goto multiple_charsets;
9385             }
9386             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9387             *charset = c;
9388             break;
9389         case UNICODE_PAT_MOD:
9390             if (*charset) {
9391                 goto multiple_charsets;
9392             }
9393             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9394             *charset = c;
9395             break;
9396         case ASCII_RESTRICT_PAT_MOD:
9397             if (! *charset) {
9398                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9399             }
9400             else {
9401
9402                 /* Error if previous modifier wasn't an 'a', but if it was, see
9403                  * if, and accept, a second occurrence (only) */
9404                 if (*charset != 'a'
9405                     || get_regex_charset(*pmfl)
9406                         != REGEX_ASCII_RESTRICTED_CHARSET)
9407                 {
9408                         goto multiple_charsets;
9409                 }
9410                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9411             }
9412             *charset = c;
9413             break;
9414         case DEPENDS_PAT_MOD:
9415             if (*charset) {
9416                 goto multiple_charsets;
9417             }
9418             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9419             *charset = c;
9420             break;
9421     }
9422
9423     (*s)++;
9424     return TRUE;
9425
9426     multiple_charsets:
9427         if (*charset != c) {
9428             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9429         }
9430         else if (c == 'a') {
9431   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9432             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9433         }
9434         else {
9435             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9436         }
9437
9438         /* Pretend that it worked, so will continue processing before dieing */
9439         (*s)++;
9440         return TRUE;
9441 }
9442
9443 STATIC char *
9444 S_scan_pat(pTHX_ char *start, I32 type)
9445 {
9446     PMOP *pm;
9447     char *s;
9448     const char * const valid_flags =
9449         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9450     char charset = '\0';    /* character set modifier */
9451     unsigned int x_mod_count = 0;
9452
9453     PERL_ARGS_ASSERT_SCAN_PAT;
9454
9455     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9456     if (!s)
9457         Perl_croak(aTHX_ "Search pattern not terminated");
9458
9459     pm = (PMOP*)newPMOP(type, 0);
9460     if (PL_multi_open == '?') {
9461         /* This is the only point in the code that sets PMf_ONCE:  */
9462         pm->op_pmflags |= PMf_ONCE;
9463
9464         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9465            allows us to restrict the list needed by reset to just the ??
9466            matches.  */
9467         assert(type != OP_TRANS);
9468         if (PL_curstash) {
9469             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9470             U32 elements;
9471             if (!mg) {
9472                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9473                                  0);
9474             }
9475             elements = mg->mg_len / sizeof(PMOP**);
9476             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9477             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9478             mg->mg_len = elements * sizeof(PMOP**);
9479             PmopSTASH_set(pm,PL_curstash);
9480         }
9481     }
9482
9483     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9484      * anon CV. False positives like qr/[(?{]/ are harmless */
9485
9486     if (type == OP_QR) {
9487         STRLEN len;
9488         char *e, *p = SvPV(PL_lex_stuff, len);
9489         e = p + len;
9490         for (; p < e; p++) {
9491             if (p[0] == '(' && p[1] == '?'
9492                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9493             {
9494                 pm->op_pmflags |= PMf_HAS_CV;
9495                 break;
9496             }
9497         }
9498         pm->op_pmflags |= PMf_IS_QR;
9499     }
9500
9501     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9502                                 &s, &charset, &x_mod_count))
9503     {};
9504     /* issue a warning if /c is specified,but /g is not */
9505     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9506     {
9507         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9508                        "Use of /c modifier is meaningless without /g" );
9509     }
9510
9511     PL_lex_op = (OP*)pm;
9512     pl_yylval.ival = OP_MATCH;
9513     return s;
9514 }
9515
9516 STATIC char *
9517 S_scan_subst(pTHX_ char *start)
9518 {
9519     char *s;
9520     PMOP *pm;
9521     I32 first_start;
9522     line_t first_line;
9523     line_t linediff = 0;
9524     I32 es = 0;
9525     char charset = '\0';    /* character set modifier */
9526     unsigned int x_mod_count = 0;
9527     char *t;
9528
9529     PERL_ARGS_ASSERT_SCAN_SUBST;
9530
9531     pl_yylval.ival = OP_NULL;
9532
9533     s = scan_str(start, TRUE, FALSE, FALSE, &t);
9534
9535     if (!s)
9536         Perl_croak(aTHX_ "Substitution pattern not terminated");
9537
9538     s = t;
9539
9540     first_start = PL_multi_start;
9541     first_line = CopLINE(PL_curcop);
9542     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9543     if (!s) {
9544         SvREFCNT_dec_NN(PL_lex_stuff);
9545         PL_lex_stuff = NULL;
9546         Perl_croak(aTHX_ "Substitution replacement not terminated");
9547     }
9548     PL_multi_start = first_start;       /* so whole substitution is taken together */
9549
9550     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9551
9552
9553     while (*s) {
9554         if (*s == EXEC_PAT_MOD) {
9555             s++;
9556             es++;
9557         }
9558         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9559                                   &s, &charset, &x_mod_count))
9560         {
9561             break;
9562         }
9563     }
9564
9565     if ((pm->op_pmflags & PMf_CONTINUE)) {
9566         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9567     }
9568
9569     if (es) {
9570         SV * const repl = newSVpvs("");
9571
9572         PL_multi_end = 0;
9573         pm->op_pmflags |= PMf_EVAL;
9574         while (es-- > 0) {
9575             if (es)
9576                 sv_catpvs(repl, "eval ");
9577             else
9578                 sv_catpvs(repl, "do ");
9579         }
9580         sv_catpvs(repl, "{");
9581         sv_catsv(repl, PL_parser->lex_sub_repl);
9582         sv_catpvs(repl, "}");
9583         SvREFCNT_dec(PL_parser->lex_sub_repl);
9584         PL_parser->lex_sub_repl = repl;
9585         es = 1;
9586     }
9587
9588
9589     linediff = CopLINE(PL_curcop) - first_line;
9590     if (linediff)
9591         CopLINE_set(PL_curcop, first_line);
9592
9593     if (linediff || es) {
9594         /* the IVX field indicates that the replacement string is a s///e;
9595          * the NVX field indicates how many src code lines the replacement
9596          * spreads over */
9597         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
9598         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
9599         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
9600                                                                     cBOOL(es);
9601     }
9602
9603     PL_lex_op = (OP*)pm;
9604     pl_yylval.ival = OP_SUBST;
9605     return s;
9606 }
9607
9608 STATIC char *
9609 S_scan_trans(pTHX_ char *start)
9610 {
9611     char* s;
9612     OP *o;
9613     U8 squash;
9614     U8 del;
9615     U8 complement;
9616     bool nondestruct = 0;
9617     char *t;
9618
9619     PERL_ARGS_ASSERT_SCAN_TRANS;
9620
9621     pl_yylval.ival = OP_NULL;
9622
9623     s = scan_str(start,FALSE,FALSE,FALSE,&t);
9624     if (!s)
9625         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9626
9627     s = t;
9628
9629     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9630     if (!s) {
9631         SvREFCNT_dec_NN(PL_lex_stuff);
9632         PL_lex_stuff = NULL;
9633         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9634     }
9635
9636     complement = del = squash = 0;
9637     while (1) {
9638         switch (*s) {
9639         case 'c':
9640             complement = OPpTRANS_COMPLEMENT;
9641             break;
9642         case 'd':
9643             del = OPpTRANS_DELETE;
9644             break;
9645         case 's':
9646             squash = OPpTRANS_SQUASH;
9647             break;
9648         case 'r':
9649             nondestruct = 1;
9650             break;
9651         default:
9652             goto no_more;
9653         }
9654         s++;
9655     }
9656   no_more:
9657
9658     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9659     o->op_private &= ~OPpTRANS_ALL;
9660     o->op_private |= del|squash|complement|
9661       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9662       (DO_UTF8(PL_parser->lex_sub_repl) ? OPpTRANS_TO_UTF   : 0);
9663
9664     PL_lex_op = o;
9665     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9666
9667
9668     return s;
9669 }
9670
9671 /* scan_heredoc
9672    Takes a pointer to the first < in <<FOO.
9673    Returns a pointer to the byte following <<FOO.
9674
9675    This function scans a heredoc, which involves different methods
9676    depending on whether we are in a string eval, quoted construct, etc.
9677    This is because PL_linestr could containing a single line of input, or
9678    a whole string being evalled, or the contents of the current quote-
9679    like operator.
9680
9681    The two basic methods are:
9682     - Steal lines from the input stream
9683     - Scan the heredoc in PL_linestr and remove it therefrom
9684
9685    In a file scope or filtered eval, the first method is used; in a
9686    string eval, the second.
9687
9688    In a quote-like operator, we have to choose between the two,
9689    depending on where we can find a newline.  We peek into outer lex-
9690    ing scopes until we find one with a newline in it.  If we reach the
9691    outermost lexing scope and it is a file, we use the stream method.
9692    Otherwise it is treated as an eval.
9693 */
9694
9695 STATIC char *
9696 S_scan_heredoc(pTHX_ char *s)
9697 {
9698     I32 op_type = OP_SCALAR;
9699     I32 len;
9700     SV *tmpstr;
9701     char term;
9702     char *d;
9703     char *e;
9704     char *peek;
9705     char *indent = 0;
9706     I32 indent_len = 0;
9707     bool indented = FALSE;
9708     const bool infile = PL_rsfp || PL_parser->filtered;
9709     const line_t origline = CopLINE(PL_curcop);
9710     LEXSHARED *shared = PL_parser->lex_shared;
9711
9712     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9713
9714     s += 2;
9715     d = PL_tokenbuf + 1;
9716     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9717     *PL_tokenbuf = '\n';
9718     peek = s;
9719     if (*peek == '~') {
9720         indented = TRUE;
9721         peek++; s++;
9722     }
9723     while (SPACE_OR_TAB(*peek))
9724         peek++;
9725     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9726         s = peek;
9727         term = *s++;
9728         s = delimcpy(d, e, s, PL_bufend, term, &len);
9729         if (s == PL_bufend)
9730             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9731         d += len;
9732         s++;
9733     }
9734     else {
9735         if (*s == '\\')
9736             /* <<\FOO is equivalent to <<'FOO' */
9737             s++, term = '\'';
9738         else
9739             term = '"';
9740         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9741             deprecate("bare << to mean <<\"\"");
9742         peek = s;
9743         while (
9744                isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF))
9745         {
9746             peek += UTF ? UTF8SKIP(peek) : 1;
9747         }
9748         len = (peek - s >= e - d) ? (e - d) : (peek - s);
9749         Copy(s, d, len, char);
9750         s += len;
9751         d += len;
9752     }
9753     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9754         Perl_croak(aTHX_ "Delimiter for here document is too long");
9755     *d++ = '\n';
9756     *d = '\0';
9757     len = d - PL_tokenbuf;
9758
9759 #ifndef PERL_STRICT_CR
9760     d = strchr(s, '\r');
9761     if (d) {
9762         char * const olds = s;
9763         s = d;
9764         while (s < PL_bufend) {
9765             if (*s == '\r') {
9766                 *d++ = '\n';
9767                 if (*++s == '\n')
9768                     s++;
9769             }
9770             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9771                 *d++ = *s++;
9772                 s++;
9773             }
9774             else
9775                 *d++ = *s++;
9776         }
9777         *d = '\0';
9778         PL_bufend = d;
9779         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9780         s = olds;
9781     }
9782 #endif
9783
9784     tmpstr = newSV_type(SVt_PVIV);
9785     SvGROW(tmpstr, 80);
9786     if (term == '\'') {
9787         op_type = OP_CONST;
9788         SvIV_set(tmpstr, -1);
9789     }
9790     else if (term == '`') {
9791         op_type = OP_BACKTICK;
9792         SvIV_set(tmpstr, '\\');
9793     }
9794
9795     PL_multi_start = origline + 1 + PL_parser->herelines;
9796     PL_multi_open = PL_multi_close = '<';
9797     /* inside a string eval or quote-like operator */
9798     if (!infile || PL_lex_inwhat) {
9799         SV *linestr;
9800         char *bufend;
9801         char * const olds = s;
9802         PERL_CONTEXT * const cx = CX_CUR();
9803         /* These two fields are not set until an inner lexing scope is
9804            entered.  But we need them set here. */
9805         shared->ls_bufptr  = s;
9806         shared->ls_linestr = PL_linestr;
9807         if (PL_lex_inwhat)
9808           /* Look for a newline.  If the current buffer does not have one,
9809              peek into the line buffer of the parent lexing scope, going
9810              up as many levels as necessary to find one with a newline
9811              after bufptr.
9812            */
9813           while (!(s = (char *)memchr(
9814                     (void *)shared->ls_bufptr, '\n',
9815                     SvEND(shared->ls_linestr)-shared->ls_bufptr
9816                 ))) {
9817             shared = shared->ls_prev;
9818             /* shared is only null if we have gone beyond the outermost
9819                lexing scope.  In a file, we will have broken out of the
9820                loop in the previous iteration.  In an eval, the string buf-
9821                fer ends with "\n;", so the while condition above will have
9822                evaluated to false.  So shared can never be null.  Or so you
9823                might think.  Odd syntax errors like s;@{<<; can gobble up
9824                the implicit semicolon at the end of a flie, causing the
9825                file handle to be closed even when we are not in a string
9826                eval.  So shared may be null in that case.
9827                (Closing '}' here to balance the earlier open brace for
9828                editors that look for matched pairs.) */
9829             if (UNLIKELY(!shared))
9830                 goto interminable;
9831             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9832                most lexing scope.  In a file, shared->ls_linestr at that
9833                level is just one line, so there is no body to steal. */
9834             if (infile && !shared->ls_prev) {
9835                 s = olds;
9836                 goto streaming;
9837             }
9838           }
9839         else {  /* eval or we've already hit EOF */
9840             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9841             if (!s)
9842                 goto interminable;
9843         }
9844         linestr = shared->ls_linestr;
9845         bufend = SvEND(linestr);
9846         d = s;
9847         if (indented) {
9848             char *myolds = s;
9849
9850             while (s < bufend - len + 1) {
9851                 if (*s++ == '\n')
9852                     ++PL_parser->herelines;
9853
9854                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
9855                     char *backup = s;
9856                     indent_len = 0;
9857
9858                     /* Only valid if it's preceded by whitespace only */
9859                     while (backup != myolds && --backup >= myolds) {
9860                         if (! SPACE_OR_TAB(*backup)) {
9861                             break;
9862                         }
9863
9864                         indent_len++;
9865                     }
9866
9867                     /* No whitespace or all! */
9868                     if (backup == s || *backup == '\n') {
9869                         Newxz(indent, indent_len + 1, char);
9870                         memcpy(indent, backup + 1, indent_len);
9871                         s--; /* before our delimiter */
9872                         PL_parser->herelines--; /* this line doesn't count */
9873                         break;
9874                     }
9875                 }
9876             }
9877         } else {
9878             while (s < bufend - len + 1
9879                    && memNE(s,PL_tokenbuf,len) )
9880             {
9881                 if (*s++ == '\n')
9882                     ++PL_parser->herelines;
9883             }
9884         }
9885
9886         if (s >= bufend - len + 1) {
9887             goto interminable;
9888         }
9889         sv_setpvn(tmpstr,d+1,s-d);
9890         s += len - 1;
9891         /* the preceding stmt passes a newline */
9892         PL_parser->herelines++;
9893
9894         /* s now points to the newline after the heredoc terminator.
9895            d points to the newline before the body of the heredoc.
9896          */
9897
9898         /* We are going to modify linestr in place here, so set
9899            aside copies of the string if necessary for re-evals or
9900            (caller $n)[6]. */
9901         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9902            check shared->re_eval_str. */
9903         if (shared->re_eval_start || shared->re_eval_str) {
9904             /* Set aside the rest of the regexp */
9905             if (!shared->re_eval_str)
9906                 shared->re_eval_str =
9907                        newSVpvn(shared->re_eval_start,
9908                                 bufend - shared->re_eval_start);
9909             shared->re_eval_start -= s-d;
9910         }
9911         if (cxstack_ix >= 0
9912             && CxTYPE(cx) == CXt_EVAL
9913             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
9914             && cx->blk_eval.cur_text == linestr)
9915         {
9916             cx->blk_eval.cur_text = newSVsv(linestr);
9917             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
9918         }
9919         /* Copy everything from s onwards back to d. */
9920         Move(s,d,bufend-s + 1,char);
9921         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9922         /* Setting PL_bufend only applies when we have not dug deeper
9923            into other scopes, because sublex_done sets PL_bufend to
9924            SvEND(PL_linestr). */
9925         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9926         s = olds;
9927     }
9928     else
9929     {
9930       SV *linestr_save;
9931       char *oldbufptr_save;
9932       char *oldoldbufptr_save;
9933      streaming:
9934       SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
9935       term = PL_tokenbuf[1];
9936       len--;
9937       linestr_save = PL_linestr; /* must restore this afterwards */
9938       d = s;                     /* and this */
9939       oldbufptr_save = PL_oldbufptr;
9940       oldoldbufptr_save = PL_oldoldbufptr;
9941       PL_linestr = newSVpvs("");
9942       PL_bufend = SvPVX(PL_linestr);
9943       while (1) {
9944         PL_bufptr = PL_bufend;
9945         CopLINE_set(PL_curcop,
9946                     origline + 1 + PL_parser->herelines);
9947         if (!lex_next_chunk(LEX_NO_TERM)
9948          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9949             /* Simply freeing linestr_save might seem simpler here, as it
9950                does not matter what PL_linestr points to, since we are
9951                about to croak; but in a quote-like op, linestr_save
9952                will have been prospectively freed already, via
9953                SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
9954                restore PL_linestr. */
9955             SvREFCNT_dec_NN(PL_linestr);
9956             PL_linestr = linestr_save;
9957             PL_oldbufptr = oldbufptr_save;
9958             PL_oldoldbufptr = oldoldbufptr_save;
9959             goto interminable;
9960         }
9961         CopLINE_set(PL_curcop, origline);
9962         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9963             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9964             /* ^That should be enough to avoid this needing to grow:  */
9965             sv_catpvs(PL_linestr, "\n\0");
9966             assert(s == SvPVX(PL_linestr));
9967             PL_bufend = SvEND(PL_linestr);
9968         }
9969         s = PL_bufptr;
9970         PL_parser->herelines++;
9971         PL_last_lop = PL_last_uni = NULL;
9972 #ifndef PERL_STRICT_CR
9973         if (PL_bufend - PL_linestart >= 2) {
9974             if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
9975                 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9976             {
9977                 PL_bufend[-2] = '\n';
9978                 PL_bufend--;
9979                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9980             }
9981             else if (PL_bufend[-1] == '\r')
9982                 PL_bufend[-1] = '\n';
9983         }
9984         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9985             PL_bufend[-1] = '\n';
9986 #endif
9987         if (indented && (PL_bufend-s) >= len) {
9988             char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
9989
9990             if (found) {
9991                 char *backup = found;
9992                 indent_len = 0;
9993
9994                 /* Only valid if it's preceded by whitespace only */
9995                 while (backup != s && --backup >= s) {
9996                     if (! SPACE_OR_TAB(*backup)) {
9997                         break;
9998                     }
9999                     indent_len++;
10000                 }
10001
10002                 /* All whitespace or none! */
10003                 if (backup == found || SPACE_OR_TAB(*backup)) {
10004                     Newxz(indent, indent_len + 1, char);
10005                     memcpy(indent, backup, indent_len);
10006                     SvREFCNT_dec(PL_linestr);
10007                     PL_linestr = linestr_save;
10008                     PL_linestart = SvPVX(linestr_save);
10009                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10010                     PL_oldbufptr = oldbufptr_save;
10011                     PL_oldoldbufptr = oldoldbufptr_save;
10012                     s = d;
10013                     break;
10014                 }
10015             }
10016
10017             /* Didn't find it */
10018             sv_catsv(tmpstr,PL_linestr);
10019         } else {
10020             if (*s == term && PL_bufend-s >= len
10021                 && memEQ(s,PL_tokenbuf + 1,len))
10022             {
10023                 SvREFCNT_dec(PL_linestr);
10024                 PL_linestr = linestr_save;
10025                 PL_linestart = SvPVX(linestr_save);
10026                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10027                 PL_oldbufptr = oldbufptr_save;
10028                 PL_oldoldbufptr = oldoldbufptr_save;
10029                 s = d;
10030                 break;
10031             } else {
10032                 sv_catsv(tmpstr,PL_linestr);
10033             }
10034         }
10035       }
10036     }
10037     PL_multi_end = origline + PL_parser->herelines;
10038     if (indented && indent) {
10039         STRLEN linecount = 1;
10040         STRLEN herelen = SvCUR(tmpstr);
10041         char *ss = SvPVX(tmpstr);
10042         char *se = ss + herelen;
10043         SV *newstr = newSV(herelen+1);
10044         SvPOK_on(newstr);
10045
10046         /* Trim leading whitespace */
10047         while (ss < se) {
10048             /* newline only? Copy and move on */
10049             if (*ss == '\n') {
10050                 sv_catpv(newstr,"\n");
10051                 ss++;
10052                 linecount++;
10053
10054             /* Found our indentation? Strip it */
10055             } else if (se - ss >= indent_len
10056                        && memEQ(ss, indent, indent_len))
10057             {
10058                 STRLEN le = 0;
10059
10060                 ss += indent_len;
10061
10062                 while ((ss + le) < se && *(ss + le) != '\n')
10063                     le++;
10064
10065                 sv_catpvn(newstr, ss, le);
10066
10067                 ss += le;
10068
10069             /* Line doesn't begin with our indentation? Croak */
10070             } else {
10071                 Perl_croak(aTHX_
10072                     "Indentation on line %d of here-doc doesn't match delimiter",
10073                     (int)linecount
10074                 );
10075             }
10076         }
10077         /* avoid sv_setsv() as we dont wan't to COW here */
10078         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10079         Safefree(indent);
10080         SvREFCNT_dec_NN(newstr);
10081     }
10082     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10083         SvPV_shrink_to_cur(tmpstr);
10084     }
10085     if (!IN_BYTES) {
10086         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10087             SvUTF8_on(tmpstr);
10088     }
10089     PL_lex_stuff = tmpstr;
10090     pl_yylval.ival = op_type;
10091     return s;
10092
10093   interminable:
10094     SvREFCNT_dec(tmpstr);
10095     CopLINE_set(PL_curcop, origline);
10096     missingterm(PL_tokenbuf + 1);
10097 }
10098
10099 /* scan_inputsymbol
10100    takes: position of first '<' in input buffer
10101    returns: position of first char following the matching '>' in
10102             input buffer
10103    side-effects: pl_yylval and lex_op are set.
10104
10105    This code handles:
10106
10107    <>           read from ARGV
10108    <<>>         read from ARGV without magic open
10109    <FH>         read from filehandle
10110    <pkg::FH>    read from package qualified filehandle
10111    <pkg'FH>     read from package qualified filehandle
10112    <$fh>        read from filehandle in $fh
10113    <*.h>        filename glob
10114
10115 */
10116
10117 STATIC char *
10118 S_scan_inputsymbol(pTHX_ char *start)
10119 {
10120     char *s = start;            /* current position in buffer */
10121     char *end;
10122     I32 len;
10123     bool nomagicopen = FALSE;
10124     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10125     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10126
10127     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10128
10129     end = strchr(s, '\n');
10130     if (!end)
10131         end = PL_bufend;
10132     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10133         nomagicopen = TRUE;
10134         *d = '\0';
10135         len = 0;
10136         s += 3;
10137     }
10138     else
10139         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
10140
10141     /* die if we didn't have space for the contents of the <>,
10142        or if it didn't end, or if we see a newline
10143     */
10144
10145     if (len >= (I32)sizeof PL_tokenbuf)
10146         Perl_croak(aTHX_ "Excessively long <> operator");
10147     if (s >= end)
10148         Perl_croak(aTHX_ "Unterminated <> operator");
10149
10150     s++;
10151
10152     /* check for <$fh>
10153        Remember, only scalar variables are interpreted as filehandles by
10154        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10155        treated as a glob() call.
10156        This code makes use of the fact that except for the $ at the front,
10157        a scalar variable and a filehandle look the same.
10158     */
10159     if (*d == '$' && d[1]) d++;
10160
10161     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10162     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10163         d += UTF ? UTF8SKIP(d) : 1;
10164     }
10165
10166     /* If we've tried to read what we allow filehandles to look like, and
10167        there's still text left, then it must be a glob() and not a getline.
10168        Use scan_str to pull out the stuff between the <> and treat it
10169        as nothing more than a string.
10170     */
10171
10172     if (d - PL_tokenbuf != len) {
10173         pl_yylval.ival = OP_GLOB;
10174         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10175         if (!s)
10176            Perl_croak(aTHX_ "Glob not terminated");
10177         return s;
10178     }
10179     else {
10180         bool readline_overriden = FALSE;
10181         GV *gv_readline;
10182         /* we're in a filehandle read situation */
10183         d = PL_tokenbuf;
10184
10185         /* turn <> into <ARGV> */
10186         if (!len)
10187             Copy("ARGV",d,5,char);
10188
10189         /* Check whether readline() is overriden */
10190         if ((gv_readline = gv_override("readline",8)))
10191             readline_overriden = TRUE;
10192
10193         /* if <$fh>, create the ops to turn the variable into a
10194            filehandle
10195         */
10196         if (*d == '$') {
10197             /* try to find it in the pad for this block, otherwise find
10198                add symbol table ops
10199             */
10200             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
10201             if (tmp != NOT_IN_PAD) {
10202                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10203                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10204                     HEK * const stashname = HvNAME_HEK(stash);
10205                     SV * const sym = sv_2mortal(newSVhek(stashname));
10206                     sv_catpvs(sym, "::");
10207                     sv_catpv(sym, d+1);
10208                     d = SvPVX(sym);
10209                     goto intro_sym;
10210                 }
10211                 else {
10212                     OP * const o = newOP(OP_PADSV, 0);
10213                     o->op_targ = tmp;
10214                     PL_lex_op = readline_overriden
10215                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10216                                 op_append_elem(OP_LIST, o,
10217                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10218                         : newUNOP(OP_READLINE, 0, o);
10219                 }
10220             }
10221             else {
10222                 GV *gv;
10223                 ++d;
10224               intro_sym:
10225                 gv = gv_fetchpv(d,
10226                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
10227                                 SVt_PV);
10228                 PL_lex_op = readline_overriden
10229                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10230                             op_append_elem(OP_LIST,
10231                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10232                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10233                     : newUNOP(OP_READLINE, 0,
10234                             newUNOP(OP_RV2SV, 0,
10235                                 newGVOP(OP_GV, 0, gv)));
10236             }
10237             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10238             pl_yylval.ival = OP_NULL;
10239         }
10240
10241         /* If it's none of the above, it must be a literal filehandle
10242            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10243         else {
10244             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10245             PL_lex_op = readline_overriden
10246                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10247                         op_append_elem(OP_LIST,
10248                             newGVOP(OP_GV, 0, gv),
10249                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10250                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
10251             pl_yylval.ival = OP_NULL;
10252         }
10253     }
10254
10255     return s;
10256 }
10257
10258
10259 /* scan_str
10260    takes:
10261         start                   position in buffer
10262         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
10263                                 only if they are of the open/close form
10264         keep_delims             preserve the delimiters around the string
10265         re_reparse              compiling a run-time /(?{})/:
10266                                    collapse // to /,  and skip encoding src
10267         delimp                  if non-null, this is set to the position of
10268                                 the closing delimiter, or just after it if
10269                                 the closing and opening delimiters differ
10270                                 (i.e., the opening delimiter of a substitu-
10271                                 tion replacement)
10272    returns: position to continue reading from buffer
10273    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10274         updates the read buffer.
10275
10276    This subroutine pulls a string out of the input.  It is called for:
10277         q               single quotes           q(literal text)
10278         '               single quotes           'literal text'
10279         qq              double quotes           qq(interpolate $here please)
10280         "               double quotes           "interpolate $here please"
10281         qx              backticks               qx(/bin/ls -l)
10282         `               backticks               `/bin/ls -l`
10283         qw              quote words             @EXPORT_OK = qw( func() $spam )
10284         m//             regexp match            m/this/
10285         s///            regexp substitute       s/this/that/
10286         tr///           string transliterate    tr/this/that/
10287         y///            string transliterate    y/this/that/
10288         ($*@)           sub prototypes          sub foo ($)
10289         (stuff)         sub attr parameters     sub foo : attr(stuff)
10290         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10291
10292    In most of these cases (all but <>, patterns and transliterate)
10293    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10294    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10295    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10296    calls scan_str().
10297
10298    It skips whitespace before the string starts, and treats the first
10299    character as the delimiter.  If the delimiter is one of ([{< then
10300    the corresponding "close" character )]}> is used as the closing
10301    delimiter.  It allows quoting of delimiters, and if the string has
10302    balanced delimiters ([{<>}]) it allows nesting.
10303
10304    On success, the SV with the resulting string is put into lex_stuff or,
10305    if that is already non-NULL, into lex_repl. The second case occurs only
10306    when parsing the RHS of the special constructs s/// and tr/// (y///).
10307    For convenience, the terminating delimiter character is stuffed into
10308    SvIVX of the SV.
10309 */
10310
10311 STATIC char *
10312 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
10313                  char **delimp
10314     )
10315 {
10316     SV *sv;                     /* scalar value: string */
10317     const char *tmps;           /* temp string, used for delimiter matching */
10318     char *s = start;            /* current position in the buffer */
10319     char term;                  /* terminating character */
10320     char *to;                   /* current position in the sv's data */
10321     I32 brackets = 1;           /* bracket nesting level */
10322     bool has_utf8 = FALSE;      /* is there any utf8 content? */
10323     IV termcode;                /* terminating char. code */
10324     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
10325     STRLEN termlen;             /* length of terminating string */
10326     line_t herelines;
10327
10328     /* The delimiters that have a mirror-image closing one */
10329     const char * opening_delims = "([{<";
10330     const char * closing_delims = ")]}>";
10331
10332     const char * non_grapheme_msg = "Use of unassigned code point or"
10333                                     " non-standalone grapheme for a delimiter"
10334                                     " will be a fatal error starting in Perl"
10335                                     " v5.30";
10336     /* The only non-UTF character that isn't a stand alone grapheme is
10337      * white-space, hence can't be a delimiter.  So can skip for non-UTF-8 */
10338     bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
10339
10340     PERL_ARGS_ASSERT_SCAN_STR;
10341
10342     /* skip space before the delimiter */
10343     if (isSPACE(*s)) {
10344         s = skipspace(s);
10345     }
10346
10347     /* mark where we are, in case we need to report errors */
10348     CLINE;
10349
10350     /* after skipping whitespace, the next character is the terminator */
10351     term = *s;
10352     if (!UTF || UTF8_IS_INVARIANT(term)) {
10353         termcode = termstr[0] = term;
10354         termlen = 1;
10355     }
10356     else {
10357         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10358         if (check_grapheme) {
10359             if (   UNLIKELY(UNICODE_IS_SUPER(termcode))
10360                 || UNLIKELY(UNICODE_IS_NONCHAR(termcode)))
10361             {
10362                 /* These are considered graphemes, and since the ending
10363                  * delimiter will be the same, we don't have to check the other
10364                  * end */
10365                 check_grapheme = FALSE;
10366             }
10367             else if (UNLIKELY(! _is_grapheme((U8 *) start,
10368                                              (U8 *) s,
10369                                              (U8 *) PL_bufend,
10370                                              termcode)))
10371             {
10372                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg);
10373
10374                 /* Don't have to check the other end, as have already warned at
10375                  * this one */
10376                 check_grapheme = FALSE;
10377             }
10378         }
10379
10380         Copy(s, termstr, termlen, U8);
10381     }
10382
10383     /* mark where we are */
10384     PL_multi_start = CopLINE(PL_curcop);
10385     PL_multi_open = termcode;
10386     herelines = PL_parser->herelines;
10387
10388     /* If the delimiter has a mirror-image closing one, get it */
10389     if (term && (tmps = strchr(opening_delims, term))) {
10390         termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
10391     }
10392
10393     PL_multi_close = termcode;
10394
10395     if (PL_multi_open == PL_multi_close) {
10396         keep_bracketed_quoted = FALSE;
10397     }
10398
10399     /* create a new SV to hold the contents.  79 is the SV's initial length.
10400        What a random number. */
10401     sv = newSV_type(SVt_PVIV);
10402     SvGROW(sv, 80);
10403     SvIV_set(sv, termcode);
10404     (void)SvPOK_only(sv);               /* validate pointer */
10405
10406     /* move past delimiter and try to read a complete string */
10407     if (keep_delims)
10408         sv_catpvn(sv, s, termlen);
10409     s += termlen;
10410     for (;;) {
10411         /* extend sv if need be */
10412         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10413         /* set 'to' to the next character in the sv's string */
10414         to = SvPVX(sv)+SvCUR(sv);
10415
10416         /* if open delimiter is the close delimiter read unbridle */
10417         if (PL_multi_open == PL_multi_close) {
10418             for (; s < PL_bufend; s++,to++) {
10419                 /* embedded newlines increment the current line number */
10420                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10421                     COPLINE_INC_WITH_HERELINES;
10422                 /* handle quoted delimiters */
10423                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10424                     if (!keep_bracketed_quoted
10425                         && (s[1] == term
10426                             || (re_reparse && s[1] == '\\'))
10427                     )
10428                         s++;
10429                     else /* any other quotes are simply copied straight through */
10430                         *to++ = *s++;
10431                 }
10432                 /* terminate when run out of buffer (the for() condition), or
10433                    have found the terminator */
10434                 else if (*s == term) {
10435                     if (termlen == 1)
10436                         break;
10437                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10438                         if (   check_grapheme
10439                             && UNLIKELY(! _is_grapheme((U8 *) start,
10440                                                               (U8 *) s,
10441                                                               (U8 *) PL_bufend,
10442                                                               termcode)))
10443                         {
10444                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10445                                         "%s", non_grapheme_msg);
10446                         }
10447                         break;
10448                 }
10449                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10450                     has_utf8 = TRUE;
10451                 *to = *s;
10452             }
10453         }
10454
10455         /* if the terminator isn't the same as the start character (e.g.,
10456            matched brackets), we have to allow more in the quoting, and
10457            be prepared for nested brackets.
10458         */
10459         else {
10460             /* read until we run out of string, or we find the terminator */
10461             for (; s < PL_bufend; s++,to++) {
10462                 /* embedded newlines increment the line count */
10463                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10464                     COPLINE_INC_WITH_HERELINES;
10465                 /* backslashes can escape the open or closing characters */
10466                 if (*s == '\\' && s+1 < PL_bufend) {
10467                     if (!keep_bracketed_quoted
10468                        && ( ((UV)s[1] == PL_multi_open)
10469                          || ((UV)s[1] == PL_multi_close) ))
10470                     {
10471                         s++;
10472                     }
10473                     else
10474                         *to++ = *s++;
10475                 }
10476                 /* allow nested opens and closes */
10477                 else if ((UV)*s == PL_multi_close && --brackets <= 0)
10478                     break;
10479                 else if ((UV)*s == PL_multi_open)
10480                     brackets++;
10481                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10482                     has_utf8 = TRUE;
10483                 *to = *s;
10484             }
10485         }
10486         /* terminate the copied string and update the sv's end-of-string */
10487         *to = '\0';
10488         SvCUR_set(sv, to - SvPVX_const(sv));
10489
10490         /*
10491          * this next chunk reads more into the buffer if we're not done yet
10492          */
10493
10494         if (s < PL_bufend)
10495             break;              /* handle case where we are done yet :-) */
10496
10497 #ifndef PERL_STRICT_CR
10498         if (to - SvPVX_const(sv) >= 2) {
10499             if (   (to[-2] == '\r' && to[-1] == '\n')
10500                 || (to[-2] == '\n' && to[-1] == '\r'))
10501             {
10502                 to[-2] = '\n';
10503                 to--;
10504                 SvCUR_set(sv, to - SvPVX_const(sv));
10505             }
10506             else if (to[-1] == '\r')
10507                 to[-1] = '\n';
10508         }
10509         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10510             to[-1] = '\n';
10511 #endif
10512
10513         /* if we're out of file, or a read fails, bail and reset the current
10514            line marker so we can report where the unterminated string began
10515         */
10516         COPLINE_INC_WITH_HERELINES;
10517         PL_bufptr = PL_bufend;
10518         if (!lex_next_chunk(0)) {
10519             sv_free(sv);
10520             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10521             return NULL;
10522         }
10523         s = start = PL_bufptr;
10524     }
10525
10526     /* at this point, we have successfully read the delimited string */
10527
10528     if (keep_delims)
10529             sv_catpvn(sv, s, termlen);
10530     s += termlen;
10531
10532     if (has_utf8)
10533         SvUTF8_on(sv);
10534
10535     PL_multi_end = CopLINE(PL_curcop);
10536     CopLINE_set(PL_curcop, PL_multi_start);
10537     PL_parser->herelines = herelines;
10538
10539     /* if we allocated too much space, give some back */
10540     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10541         SvLEN_set(sv, SvCUR(sv) + 1);
10542         SvPV_renew(sv, SvLEN(sv));
10543     }
10544
10545     /* decide whether this is the first or second quoted string we've read
10546        for this op
10547     */
10548
10549     if (PL_lex_stuff)
10550         PL_parser->lex_sub_repl = sv;
10551     else
10552         PL_lex_stuff = sv;
10553     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10554     return s;
10555 }
10556
10557 /*
10558   scan_num
10559   takes: pointer to position in buffer
10560   returns: pointer to new position in buffer
10561   side-effects: builds ops for the constant in pl_yylval.op
10562
10563   Read a number in any of the formats that Perl accepts:
10564
10565   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10566   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10567   0b[01](_?[01])*                                       binary integers
10568   0[0-7](_?[0-7])*                                      octal integers
10569   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
10570   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
10571
10572   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10573   thing it reads.
10574
10575   If it reads a number without a decimal point or an exponent, it will
10576   try converting the number to an integer and see if it can do so
10577   without loss of precision.
10578 */
10579
10580 char *
10581 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10582 {
10583     const char *s = start;      /* current position in buffer */
10584     char *d;                    /* destination in temp buffer */
10585     char *e;                    /* end of temp buffer */
10586     NV nv;                              /* number read, as a double */
10587     SV *sv = NULL;                      /* place to put the converted number */
10588     bool floatit;                       /* boolean: int or float? */
10589     const char *lastub = NULL;          /* position of last underbar */
10590     static const char* const number_too_long = "Number too long";
10591     /* Hexadecimal floating point.
10592      *
10593      * In many places (where we have quads and NV is IEEE 754 double)
10594      * we can fit the mantissa bits of a NV into an unsigned quad.
10595      * (Note that UVs might not be quads even when we have quads.)
10596      * This will not work everywhere, though (either no quads, or
10597      * using long doubles), in which case we have to resort to NV,
10598      * which will probably mean horrible loss of precision due to
10599      * multiple fp operations. */
10600     bool hexfp = FALSE;
10601     int total_bits = 0;
10602     int significant_bits = 0;
10603 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10604 #  define HEXFP_UQUAD
10605     Uquad_t hexfp_uquad = 0;
10606     int hexfp_frac_bits = 0;
10607 #else
10608 #  define HEXFP_NV
10609     NV hexfp_nv = 0.0;
10610 #endif
10611     NV hexfp_mult = 1.0;
10612     UV high_non_zero = 0; /* highest digit */
10613     int non_zero_integer_digits = 0;
10614
10615     PERL_ARGS_ASSERT_SCAN_NUM;
10616
10617     /* We use the first character to decide what type of number this is */
10618
10619     switch (*s) {
10620     default:
10621         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10622
10623     /* if it starts with a 0, it could be an octal number, a decimal in
10624        0.13 disguise, or a hexadecimal number, or a binary number. */
10625     case '0':
10626         {
10627           /* variables:
10628              u          holds the "number so far"
10629              shift      the power of 2 of the base
10630                         (hex == 4, octal == 3, binary == 1)
10631              overflowed was the number more than we can hold?
10632
10633              Shift is used when we add a digit.  It also serves as an "are
10634              we in octal/hex/binary?" indicator to disallow hex characters
10635              when in octal mode.
10636            */
10637             NV n = 0.0;
10638             UV u = 0;
10639             I32 shift;
10640             bool overflowed = FALSE;
10641             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10642             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10643             static const char* const bases[5] =
10644               { "", "binary", "", "octal", "hexadecimal" };
10645             static const char* const Bases[5] =
10646               { "", "Binary", "", "Octal", "Hexadecimal" };
10647             static const char* const maxima[5] =
10648               { "",
10649                 "0b11111111111111111111111111111111",
10650                 "",
10651                 "037777777777",
10652                 "0xffffffff" };
10653             const char *base, *Base, *max;
10654
10655             /* check for hex */
10656             if (isALPHA_FOLD_EQ(s[1], 'x')) {
10657                 shift = 4;
10658                 s += 2;
10659                 just_zero = FALSE;
10660             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10661                 shift = 1;
10662                 s += 2;
10663                 just_zero = FALSE;
10664             }
10665             /* check for a decimal in disguise */
10666             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10667                 goto decimal;
10668             /* so it must be octal */
10669             else {
10670                 shift = 3;
10671                 s++;
10672             }
10673
10674             if (*s == '_') {
10675                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10676                                "Misplaced _ in number");
10677                lastub = s++;
10678             }
10679
10680             base = bases[shift];
10681             Base = Bases[shift];
10682             max  = maxima[shift];
10683
10684             /* read the rest of the number */
10685             for (;;) {
10686                 /* x is used in the overflow test,
10687                    b is the digit we're adding on. */
10688                 UV x, b;
10689
10690                 switch (*s) {
10691
10692                 /* if we don't mention it, we're done */
10693                 default:
10694                     goto out;
10695
10696                 /* _ are ignored -- but warned about if consecutive */
10697                 case '_':
10698                     if (lastub && s == lastub + 1)
10699                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10700                                        "Misplaced _ in number");
10701                     lastub = s++;
10702                     break;
10703
10704                 /* 8 and 9 are not octal */
10705                 case '8': case '9':
10706                     if (shift == 3)
10707                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10708                     /* FALLTHROUGH */
10709
10710                 /* octal digits */
10711                 case '2': case '3': case '4':
10712                 case '5': case '6': case '7':
10713                     if (shift == 1)
10714                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10715                     /* FALLTHROUGH */
10716
10717                 case '0': case '1':
10718                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10719                     goto digit;
10720
10721                 /* hex digits */
10722                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10723                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10724                     /* make sure they said 0x */
10725                     if (shift != 4)
10726                         goto out;
10727                     b = (*s++ & 7) + 9;
10728
10729                     /* Prepare to put the digit we have onto the end
10730                        of the number so far.  We check for overflows.
10731                     */
10732
10733                   digit:
10734                     just_zero = FALSE;
10735                     if (!overflowed) {
10736                         x = u << shift; /* make room for the digit */
10737
10738                         total_bits += shift;
10739
10740                         if ((x >> shift) != u
10741                             && !(PL_hints & HINT_NEW_BINARY)) {
10742                             overflowed = TRUE;
10743                             n = (NV) u;
10744                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10745                                              "Integer overflow in %s number",
10746                                              base);
10747                         } else
10748                             u = x | b;          /* add the digit to the end */
10749                     }
10750                     if (overflowed) {
10751                         n *= nvshift[shift];
10752                         /* If an NV has not enough bits in its
10753                          * mantissa to represent an UV this summing of
10754                          * small low-order numbers is a waste of time
10755                          * (because the NV cannot preserve the
10756                          * low-order bits anyway): we could just
10757                          * remember when did we overflow and in the
10758                          * end just multiply n by the right
10759                          * amount. */
10760                         n += (NV) b;
10761                     }
10762
10763                     if (high_non_zero == 0 && b > 0)
10764                         high_non_zero = b;
10765
10766                     if (high_non_zero)
10767                         non_zero_integer_digits++;
10768
10769                     /* this could be hexfp, but peek ahead
10770                      * to avoid matching ".." */
10771                     if (UNLIKELY(HEXFP_PEEK(s))) {
10772                         goto out;
10773                     }
10774
10775                     break;
10776                 }
10777             }
10778
10779           /* if we get here, we had success: make a scalar value from
10780              the number.
10781           */
10782           out:
10783
10784             /* final misplaced underbar check */
10785             if (s[-1] == '_') {
10786                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10787             }
10788
10789             if (UNLIKELY(HEXFP_PEEK(s))) {
10790                 /* Do sloppy (on the underbars) but quick detection
10791                  * (and value construction) for hexfp, the decimal
10792                  * detection will shortly be more thorough with the
10793                  * underbar checks. */
10794                 const char* h = s;
10795                 significant_bits = non_zero_integer_digits * shift;
10796 #ifdef HEXFP_UQUAD
10797                 hexfp_uquad = u;
10798 #else /* HEXFP_NV */
10799                 hexfp_nv = u;
10800 #endif
10801                 /* Ignore the leading zero bits of
10802                  * the high (first) non-zero digit. */
10803                 if (high_non_zero) {
10804                     if (high_non_zero < 0x8)
10805                         significant_bits--;
10806                     if (high_non_zero < 0x4)
10807                         significant_bits--;
10808                     if (high_non_zero < 0x2)
10809                         significant_bits--;
10810                 }
10811
10812                 if (*h == '.') {
10813 #ifdef HEXFP_NV
10814                     NV nv_mult = 1.0;
10815 #endif
10816                     bool accumulate = TRUE;
10817                     for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
10818                         if (isXDIGIT(*h)) {
10819                             U8 b = XDIGIT_VALUE(*h);
10820                             significant_bits += shift;
10821 #ifdef HEXFP_UQUAD
10822                             if (accumulate) {
10823                                 if (significant_bits < NV_MANT_DIG) {
10824                                     /* We are in the long "run" of xdigits,
10825                                      * accumulate the full four bits. */
10826                                     hexfp_uquad <<= shift;
10827                                     hexfp_uquad |= b;
10828                                     hexfp_frac_bits += shift;
10829                                 } else {
10830                                     /* We are at a hexdigit either at,
10831                                      * or straddling, the edge of mantissa.
10832                                      * We will try grabbing as many as
10833                                      * possible bits. */
10834                                     int tail =
10835                                       significant_bits - NV_MANT_DIG;
10836                                     if (tail <= 0)
10837                                        tail += shift;
10838                                     hexfp_uquad <<= tail;
10839                                     hexfp_uquad |= b >> (shift - tail);
10840                                     hexfp_frac_bits += tail;
10841
10842                                     /* Ignore the trailing zero bits
10843                                      * of the last non-zero xdigit.
10844                                      *
10845                                      * The assumption here is that if
10846                                      * one has input of e.g. the xdigit
10847                                      * eight (0x8), there is only one
10848                                      * bit being input, not the full
10849                                      * four bits.  Conversely, if one
10850                                      * specifies a zero xdigit, the
10851                                      * assumption is that one really
10852                                      * wants all those bits to be zero. */
10853                                     if (b) {
10854                                         if ((b & 0x1) == 0x0) {
10855                                             significant_bits--;
10856                                             if ((b & 0x2) == 0x0) {
10857                                                 significant_bits--;
10858                                                 if ((b & 0x4) == 0x0) {
10859                                                     significant_bits--;
10860                                                 }
10861                                             }
10862                                         }
10863                                     }
10864
10865                                     accumulate = FALSE;
10866                                 }
10867                             } else {
10868                                 /* Keep skipping the xdigits, and
10869                                  * accumulating the significant bits,
10870                                  * but do not shift the uquad
10871                                  * (which would catastrophically drop
10872                                  * high-order bits) or accumulate the
10873                                  * xdigits anymore. */
10874                             }
10875 #else /* HEXFP_NV */
10876                             if (accumulate) {
10877                                 nv_mult /= 16.0;
10878                                 if (nv_mult > 0.0)
10879                                     hexfp_nv += b * nv_mult;
10880                                 else
10881                                     accumulate = FALSE;
10882                             }
10883 #endif
10884                         }
10885                         if (significant_bits >= NV_MANT_DIG)
10886                             accumulate = FALSE;
10887                     }
10888                 }
10889
10890                 if ((total_bits > 0 || significant_bits > 0) &&
10891                     isALPHA_FOLD_EQ(*h, 'p')) {
10892                     bool negexp = FALSE;
10893                     h++;
10894                     if (*h == '+')
10895                         h++;
10896                     else if (*h == '-') {
10897                         negexp = TRUE;
10898                         h++;
10899                     }
10900                     if (isDIGIT(*h)) {
10901                         I32 hexfp_exp = 0;
10902                         while (isDIGIT(*h) || *h == '_') {
10903                             if (isDIGIT(*h)) {
10904                                 hexfp_exp *= 10;
10905                                 hexfp_exp += *h - '0';
10906 #ifdef NV_MIN_EXP
10907                                 if (negexp
10908                                     && -hexfp_exp < NV_MIN_EXP - 1) {
10909                                     /* NOTE: this means that the exponent
10910                                      * underflow warning happens for
10911                                      * the IEEE 754 subnormals (denormals),
10912                                      * because DBL_MIN_EXP etc are the lowest
10913                                      * possible binary (or, rather, DBL_RADIX-base)
10914                                      * exponent for normals, not subnormals.
10915                                      *
10916                                      * This may or may not be a good thing. */
10917                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10918                                                    "Hexadecimal float: exponent underflow");
10919                                     break;
10920                                 }
10921 #endif
10922 #ifdef NV_MAX_EXP
10923                                 if (!negexp
10924                                     && hexfp_exp > NV_MAX_EXP - 1) {
10925                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10926                                                    "Hexadecimal float: exponent overflow");
10927                                     break;
10928                                 }
10929 #endif
10930                             }
10931                             h++;
10932                         }
10933                         if (negexp)
10934                             hexfp_exp = -hexfp_exp;
10935 #ifdef HEXFP_UQUAD
10936                         hexfp_exp -= hexfp_frac_bits;
10937 #endif
10938                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
10939                         hexfp = TRUE;
10940                         goto decimal;
10941                     }
10942                 }
10943             }
10944
10945             if (overflowed) {
10946                 if (n > 4294967295.0)
10947                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10948                                    "%s number > %s non-portable",
10949                                    Base, max);
10950                 sv = newSVnv(n);
10951             }
10952             else {
10953 #if UVSIZE > 4
10954                 if (u > 0xffffffff)
10955                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10956                                    "%s number > %s non-portable",
10957                                    Base, max);
10958 #endif
10959                 sv = newSVuv(u);
10960             }
10961             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10962                 sv = new_constant(start, s - start, "integer",
10963                                   sv, NULL, NULL, 0);
10964             else if (PL_hints & HINT_NEW_BINARY)
10965                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10966         }
10967         break;
10968
10969     /*
10970       handle decimal numbers.
10971       we're also sent here when we read a 0 as the first digit
10972     */
10973     case '1': case '2': case '3': case '4': case '5':
10974     case '6': case '7': case '8': case '9': case '.':
10975       decimal:
10976         d = PL_tokenbuf;
10977         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10978         floatit = FALSE;
10979         if (hexfp) {
10980             floatit = TRUE;
10981             *d++ = '0';
10982             *d++ = 'x';
10983             s = start + 2;
10984         }
10985
10986         /* read next group of digits and _ and copy into d */
10987         while (isDIGIT(*s)
10988                || *s == '_'
10989                || UNLIKELY(hexfp && isXDIGIT(*s)))
10990         {
10991             /* skip underscores, checking for misplaced ones
10992                if -w is on
10993             */
10994             if (*s == '_') {
10995                 if (lastub && s == lastub + 1)
10996                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10997                                    "Misplaced _ in number");
10998                 lastub = s++;
10999             }
11000             else {
11001                 /* check for end of fixed-length buffer */
11002                 if (d >= e)
11003                     Perl_croak(aTHX_ "%s", number_too_long);
11004                 /* if we're ok, copy the character */
11005                 *d++ = *s++;
11006             }
11007         }
11008
11009         /* final misplaced underbar check */
11010         if (lastub && s == lastub + 1) {
11011             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11012         }
11013
11014         /* read a decimal portion if there is one.  avoid
11015            3..5 being interpreted as the number 3. followed
11016            by .5
11017         */
11018         if (*s == '.' && s[1] != '.') {
11019             floatit = TRUE;
11020             *d++ = *s++;
11021
11022             if (*s == '_') {
11023                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11024                                "Misplaced _ in number");
11025                 lastub = s;
11026             }
11027
11028             /* copy, ignoring underbars, until we run out of digits.
11029             */
11030             for (; isDIGIT(*s)
11031                    || *s == '_'
11032                    || UNLIKELY(hexfp && isXDIGIT(*s));
11033                  s++)
11034             {
11035                 /* fixed length buffer check */
11036                 if (d >= e)
11037                     Perl_croak(aTHX_ "%s", number_too_long);
11038                 if (*s == '_') {
11039                    if (lastub && s == lastub + 1)
11040                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11041                                       "Misplaced _ in number");
11042                    lastub = s;
11043                 }
11044                 else
11045                     *d++ = *s;
11046             }
11047             /* fractional part ending in underbar? */
11048             if (s[-1] == '_') {
11049                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11050                                "Misplaced _ in number");
11051             }
11052             if (*s == '.' && isDIGIT(s[1])) {
11053                 /* oops, it's really a v-string, but without the "v" */
11054                 s = start;
11055                 goto vstring;
11056             }
11057         }
11058
11059         /* read exponent part, if present */
11060         if ((isALPHA_FOLD_EQ(*s, 'e')
11061               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11062             && strchr("+-0123456789_", s[1]))
11063         {
11064             floatit = TRUE;
11065
11066             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11067                ditto for p (hexfloats) */
11068             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11069                 /* At least some Mach atof()s don't grok 'E' */
11070                 *d++ = 'e';
11071             }
11072             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11073                 *d++ = 'p';
11074             }
11075
11076             s++;
11077
11078
11079             /* stray preinitial _ */
11080             if (*s == '_') {
11081                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11082                                "Misplaced _ in number");
11083                 lastub = s++;
11084             }
11085
11086             /* allow positive or negative exponent */
11087             if (*s == '+' || *s == '-')
11088                 *d++ = *s++;
11089
11090             /* stray initial _ */
11091             if (*s == '_') {
11092                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11093                                "Misplaced _ in number");
11094                 lastub = s++;
11095             }
11096
11097             /* read digits of exponent */
11098             while (isDIGIT(*s) || *s == '_') {
11099                 if (isDIGIT(*s)) {
11100                     if (d >= e)
11101                         Perl_croak(aTHX_ "%s", number_too_long);
11102                     *d++ = *s++;
11103                 }
11104                 else {
11105                    if (((lastub && s == lastub + 1)
11106                         || (!isDIGIT(s[1]) && s[1] != '_')))
11107                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11108                                       "Misplaced _ in number");
11109                    lastub = s++;
11110                 }
11111             }
11112         }
11113
11114
11115         /*
11116            We try to do an integer conversion first if no characters
11117            indicating "float" have been found.
11118          */
11119
11120         if (!floatit) {
11121             UV uv;
11122             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11123
11124             if (flags == IS_NUMBER_IN_UV) {
11125               if (uv <= IV_MAX)
11126                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11127               else
11128                 sv = newSVuv(uv);
11129             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11130               if (uv <= (UV) IV_MIN)
11131                 sv = newSViv(-(IV)uv);
11132               else
11133                 floatit = TRUE;
11134             } else
11135               floatit = TRUE;
11136         }
11137         if (floatit) {
11138             STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
11139             /* terminate the string */
11140             *d = '\0';
11141             if (UNLIKELY(hexfp)) {
11142 #  ifdef NV_MANT_DIG
11143                 if (significant_bits > NV_MANT_DIG)
11144                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11145                                    "Hexadecimal float: mantissa overflow");
11146 #  endif
11147 #ifdef HEXFP_UQUAD
11148                 nv = hexfp_uquad * hexfp_mult;
11149 #else /* HEXFP_NV */
11150                 nv = hexfp_nv * hexfp_mult;
11151 #endif
11152             } else {
11153                 nv = Atof(PL_tokenbuf);
11154             }
11155             RESTORE_LC_NUMERIC_UNDERLYING();
11156             sv = newSVnv(nv);
11157         }
11158
11159         if ( floatit
11160              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11161             const char *const key = floatit ? "float" : "integer";
11162             const STRLEN keylen = floatit ? 5 : 7;
11163             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11164                                 key, keylen, sv, NULL, NULL, 0);
11165         }
11166         break;
11167
11168     /* if it starts with a v, it could be a v-string */
11169     case 'v':
11170     vstring:
11171                 sv = newSV(5); /* preallocate storage space */
11172                 ENTER_with_name("scan_vstring");
11173                 SAVEFREESV(sv);
11174                 s = scan_vstring(s, PL_bufend, sv);
11175                 SvREFCNT_inc_simple_void_NN(sv);
11176                 LEAVE_with_name("scan_vstring");
11177         break;
11178     }
11179
11180     /* make the op for the constant and return */
11181
11182     if (sv)
11183         lvalp->opval = newSVOP(OP_CONST, 0, sv);
11184     else
11185         lvalp->opval = NULL;
11186
11187     return (char *)s;
11188 }
11189
11190 STATIC char *
11191 S_scan_formline(pTHX_ char *s)
11192 {
11193     char *eol;
11194     char *t;
11195     SV * const stuff = newSVpvs("");
11196     bool needargs = FALSE;
11197     bool eofmt = FALSE;
11198
11199     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11200
11201     while (!needargs) {
11202         if (*s == '.') {
11203             t = s+1;
11204 #ifdef PERL_STRICT_CR
11205             while (SPACE_OR_TAB(*t))
11206                 t++;
11207 #else
11208             while (SPACE_OR_TAB(*t) || *t == '\r')
11209                 t++;
11210 #endif
11211             if (*t == '\n' || t == PL_bufend) {
11212                 eofmt = TRUE;
11213                 break;
11214             }
11215         }
11216         eol = (char *) memchr(s,'\n',PL_bufend-s);
11217         if (!eol++)
11218                 eol = PL_bufend;
11219         if (*s != '#') {
11220             for (t = s; t < eol; t++) {
11221                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11222                     needargs = FALSE;
11223                     goto enough;        /* ~~ must be first line in formline */
11224                 }
11225                 if (*t == '@' || *t == '^')
11226                     needargs = TRUE;
11227             }
11228             if (eol > s) {
11229                 sv_catpvn(stuff, s, eol-s);
11230 #ifndef PERL_STRICT_CR
11231                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11232                     char *end = SvPVX(stuff) + SvCUR(stuff);
11233                     end[-2] = '\n';
11234                     end[-1] = '\0';
11235                     SvCUR_set(stuff, SvCUR(stuff) - 1);
11236                 }
11237 #endif
11238             }
11239             else
11240               break;
11241         }
11242         s = (char*)eol;
11243         if ((PL_rsfp || PL_parser->filtered)
11244          && PL_parser->form_lex_state == LEX_NORMAL) {
11245             bool got_some;
11246             PL_bufptr = PL_bufend;
11247             COPLINE_INC_WITH_HERELINES;
11248             got_some = lex_next_chunk(0);
11249             CopLINE_dec(PL_curcop);
11250             s = PL_bufptr;
11251             if (!got_some)
11252                 break;
11253         }
11254         incline(s);
11255     }
11256   enough:
11257     if (!SvCUR(stuff) || needargs)
11258         PL_lex_state = PL_parser->form_lex_state;
11259     if (SvCUR(stuff)) {
11260         PL_expect = XSTATE;
11261         if (needargs) {
11262             const char *s2 = s;
11263             while (isSPACE(*s2) && *s2 != '\n')
11264                 s2++;
11265             if (*s2 == '{') {
11266                 PL_expect = XTERMBLOCK;
11267                 NEXTVAL_NEXTTOKE.ival = 0;
11268                 force_next(DO);
11269             }
11270             NEXTVAL_NEXTTOKE.ival = 0;
11271             force_next(FORMLBRACK);
11272         }
11273         if (!IN_BYTES) {
11274             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11275                 SvUTF8_on(stuff);
11276         }
11277         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
11278         force_next(THING);
11279     }
11280     else {
11281         SvREFCNT_dec(stuff);
11282         if (eofmt)
11283             PL_lex_formbrack = 0;
11284     }
11285     return s;
11286 }
11287
11288 I32
11289 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11290 {
11291     const I32 oldsavestack_ix = PL_savestack_ix;
11292     CV* const outsidecv = PL_compcv;
11293
11294     SAVEI32(PL_subline);
11295     save_item(PL_subname);
11296     SAVESPTR(PL_compcv);
11297
11298     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11299     CvFLAGS(PL_compcv) |= flags;
11300
11301     PL_subline = CopLINE(PL_curcop);
11302     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11303     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11304     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11305     if (outsidecv && CvPADLIST(outsidecv))
11306         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
11307
11308     return oldsavestack_ix;
11309 }
11310
11311 static int
11312 S_yywarn(pTHX_ const char *const s, U32 flags)
11313 {
11314     PERL_ARGS_ASSERT_YYWARN;
11315
11316     PL_in_eval |= EVAL_WARNONLY;
11317     yyerror_pv(s, flags);
11318     return 0;
11319 }
11320
11321 int
11322 Perl_yyerror(pTHX_ const char *const s)
11323 {
11324     PERL_ARGS_ASSERT_YYERROR;
11325     return yyerror_pvn(s, strlen(s), 0);
11326 }
11327
11328 int
11329 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11330 {
11331     PERL_ARGS_ASSERT_YYERROR_PV;
11332     return yyerror_pvn(s, strlen(s), flags);
11333 }
11334
11335 int
11336 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11337 {
11338     const char *context = NULL;
11339     int contlen = -1;
11340     SV *msg;
11341     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11342     int yychar  = PL_parser->yychar;
11343
11344     PERL_ARGS_ASSERT_YYERROR_PVN;
11345
11346     if (!yychar || (yychar == ';' && !PL_rsfp))
11347         sv_catpvs(where_sv, "at EOF");
11348     else if (   PL_oldoldbufptr
11349              && PL_bufptr > PL_oldoldbufptr
11350              && PL_bufptr - PL_oldoldbufptr < 200
11351              && PL_oldoldbufptr != PL_oldbufptr
11352              && PL_oldbufptr != PL_bufptr)
11353     {
11354         /*
11355                 Only for NetWare:
11356                 The code below is removed for NetWare because it abends/crashes on NetWare
11357                 when the script has error such as not having the closing quotes like:
11358                     if ($var eq "value)
11359                 Checking of white spaces is anyway done in NetWare code.
11360         */
11361 #ifndef NETWARE
11362         while (isSPACE(*PL_oldoldbufptr))
11363             PL_oldoldbufptr++;
11364 #endif
11365         context = PL_oldoldbufptr;
11366         contlen = PL_bufptr - PL_oldoldbufptr;
11367     }
11368     else if (  PL_oldbufptr
11369             && PL_bufptr > PL_oldbufptr
11370             && PL_bufptr - PL_oldbufptr < 200
11371             && PL_oldbufptr != PL_bufptr) {
11372         /*
11373                 Only for NetWare:
11374                 The code below is removed for NetWare because it abends/crashes on NetWare
11375                 when the script has error such as not having the closing quotes like:
11376                     if ($var eq "value)
11377                 Checking of white spaces is anyway done in NetWare code.
11378         */
11379 #ifndef NETWARE
11380         while (isSPACE(*PL_oldbufptr))
11381             PL_oldbufptr++;
11382 #endif
11383         context = PL_oldbufptr;
11384         contlen = PL_bufptr - PL_oldbufptr;
11385     }
11386     else if (yychar > 255)
11387         sv_catpvs(where_sv, "next token ???");
11388     else if (yychar == YYEMPTY) {
11389         if (PL_lex_state == LEX_NORMAL)
11390             sv_catpvs(where_sv, "at end of line");
11391         else if (PL_lex_inpat)
11392             sv_catpvs(where_sv, "within pattern");
11393         else
11394             sv_catpvs(where_sv, "within string");
11395     }
11396     else {
11397         sv_catpvs(where_sv, "next char ");
11398         if (yychar < 32)
11399             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11400         else if (isPRINT_LC(yychar)) {
11401             const char string = yychar;
11402             sv_catpvn(where_sv, &string, 1);
11403         }
11404         else
11405             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11406     }
11407     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11408     Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
11409         OutCopFILE(PL_curcop),
11410         (IV)(PL_parser->preambling == NOLINE
11411                ? CopLINE(PL_curcop)
11412                : PL_parser->preambling));
11413     if (context)
11414         Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
11415                              UTF8fARG(UTF, contlen, context));
11416     else
11417         Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
11418     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11419         Perl_sv_catpvf(aTHX_ msg,
11420         "  (Might be a runaway multi-line %c%c string starting on line %" IVdf ")\n",
11421                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11422         PL_multi_end = 0;
11423     }
11424     if (PL_in_eval & EVAL_WARNONLY) {
11425         PL_in_eval &= ~EVAL_WARNONLY;
11426         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
11427     }
11428     else
11429         qerror(msg);
11430     if (PL_error_count >= 10) {
11431         SV * errsv;
11432         if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11433             Perl_croak(aTHX_ "%" SVf "%s has too many errors.\n",
11434                        SVfARG(errsv), OutCopFILE(PL_curcop));
11435         else
11436             Perl_croak(aTHX_ "%s has too many errors.\n",
11437             OutCopFILE(PL_curcop));
11438     }
11439     PL_in_my = 0;
11440     PL_in_my_stash = NULL;
11441     return 0;
11442 }
11443
11444 STATIC char*
11445 S_swallow_bom(pTHX_ U8 *s)
11446 {
11447     const STRLEN slen = SvCUR(PL_linestr);
11448
11449     PERL_ARGS_ASSERT_SWALLOW_BOM;
11450
11451     switch (s[0]) {
11452     case 0xFF:
11453         if (s[1] == 0xFE) {
11454             /* UTF-16 little-endian? (or UTF-32LE?) */
11455             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11456                 /* diag_listed_as: Unsupported script encoding %s */
11457                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11458 #ifndef PERL_NO_UTF16_FILTER
11459             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11460             s += 2;
11461             if (PL_bufend > (char*)s) {
11462                 s = add_utf16_textfilter(s, TRUE);
11463             }
11464 #else
11465             /* diag_listed_as: Unsupported script encoding %s */
11466             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11467 #endif
11468         }
11469         break;
11470     case 0xFE:
11471         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11472 #ifndef PERL_NO_UTF16_FILTER
11473             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11474             s += 2;
11475             if (PL_bufend > (char *)s) {
11476                 s = add_utf16_textfilter(s, FALSE);
11477             }
11478 #else
11479             /* diag_listed_as: Unsupported script encoding %s */
11480             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11481 #endif
11482         }
11483         break;
11484     case BOM_UTF8_FIRST_BYTE: {
11485         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11486         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11487             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11488             s += len + 1;                      /* UTF-8 */
11489         }
11490         break;
11491     }
11492     case 0:
11493         if (slen > 3) {
11494              if (s[1] == 0) {
11495                   if (s[2] == 0xFE && s[3] == 0xFF) {
11496                        /* UTF-32 big-endian */
11497                        /* diag_listed_as: Unsupported script encoding %s */
11498                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11499                   }
11500              }
11501              else if (s[2] == 0 && s[3] != 0) {
11502                   /* Leading bytes
11503                    * 00 xx 00 xx
11504                    * are a good indicator of UTF-16BE. */
11505 #ifndef PERL_NO_UTF16_FILTER
11506                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11507                   s = add_utf16_textfilter(s, FALSE);
11508 #else
11509                   /* diag_listed_as: Unsupported script encoding %s */
11510                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11511 #endif
11512              }
11513         }
11514         break;
11515
11516     default:
11517          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11518                   /* Leading bytes
11519                    * xx 00 xx 00
11520                    * are a good indicator of UTF-16LE. */
11521 #ifndef PERL_NO_UTF16_FILTER
11522               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11523               s = add_utf16_textfilter(s, TRUE);
11524 #else
11525               /* diag_listed_as: Unsupported script encoding %s */
11526               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11527 #endif
11528          }
11529     }
11530     return (char*)s;
11531 }
11532
11533
11534 #ifndef PERL_NO_UTF16_FILTER
11535 static I32
11536 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11537 {
11538     SV *const filter = FILTER_DATA(idx);
11539     /* We re-use this each time round, throwing the contents away before we
11540        return.  */
11541     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11542     SV *const utf8_buffer = filter;
11543     IV status = IoPAGE(filter);
11544     const bool reverse = cBOOL(IoLINES(filter));
11545     I32 retval;
11546
11547     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11548
11549     /* As we're automatically added, at the lowest level, and hence only called
11550        from this file, we can be sure that we're not called in block mode. Hence
11551        don't bother writing code to deal with block mode.  */
11552     if (maxlen) {
11553         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11554     }
11555     if (status < 0) {
11556         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
11557     }
11558     DEBUG_P(PerlIO_printf(Perl_debug_log,
11559                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11560                           FPTR2DPTR(void *, S_utf16_textfilter),
11561                           reverse ? 'l' : 'b', idx, maxlen, status,
11562                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11563
11564     while (1) {
11565         STRLEN chars;
11566         STRLEN have;
11567         I32 newlen;
11568         U8 *end;
11569         /* First, look in our buffer of existing UTF-8 data:  */
11570         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11571
11572         if (nl) {
11573             ++nl;
11574         } else if (status == 0) {
11575             /* EOF */
11576             IoPAGE(filter) = 0;
11577             nl = SvEND(utf8_buffer);
11578         }
11579         if (nl) {
11580             STRLEN got = nl - SvPVX(utf8_buffer);
11581             /* Did we have anything to append?  */
11582             retval = got != 0;
11583             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11584             /* Everything else in this code works just fine if SVp_POK isn't
11585                set.  This, however, needs it, and we need it to work, else
11586                we loop infinitely because the buffer is never consumed.  */
11587             sv_chop(utf8_buffer, nl);
11588             break;
11589         }
11590
11591         /* OK, not a complete line there, so need to read some more UTF-16.
11592            Read an extra octect if the buffer currently has an odd number. */
11593         while (1) {
11594             if (status <= 0)
11595                 break;
11596             if (SvCUR(utf16_buffer) >= 2) {
11597                 /* Location of the high octet of the last complete code point.
11598                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11599                    *coupled* with all the benefits of partial reads and
11600                    endianness.  */
11601                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11602                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11603
11604                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11605                     break;
11606                 }
11607
11608                 /* We have the first half of a surrogate. Read more.  */
11609                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11610             }
11611
11612             status = FILTER_READ(idx + 1, utf16_buffer,
11613                                  160 + (SvCUR(utf16_buffer) & 1));
11614             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
11615             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11616             if (status < 0) {
11617                 /* Error */
11618                 IoPAGE(filter) = status;
11619                 return status;
11620             }
11621         }
11622
11623         chars = SvCUR(utf16_buffer) >> 1;
11624         have = SvCUR(utf8_buffer);
11625         SvGROW(utf8_buffer, have + chars * 3 + 1);
11626
11627         if (reverse) {
11628             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11629                                          (U8*)SvPVX_const(utf8_buffer) + have,
11630                                          chars * 2, &newlen);
11631         } else {
11632             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11633                                 (U8*)SvPVX_const(utf8_buffer) + have,
11634                                 chars * 2, &newlen);
11635         }
11636         SvCUR_set(utf8_buffer, have + newlen);
11637         *end = '\0';
11638
11639         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11640            it's private to us, and utf16_to_utf8{,reversed} take a
11641            (pointer,length) pair, rather than a NUL-terminated string.  */
11642         if(SvCUR(utf16_buffer) & 1) {
11643             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11644             SvCUR_set(utf16_buffer, 1);
11645         } else {
11646             SvCUR_set(utf16_buffer, 0);
11647         }
11648     }
11649     DEBUG_P(PerlIO_printf(Perl_debug_log,
11650                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11651                           status,
11652                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11653     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11654     return retval;
11655 }
11656
11657 static U8 *
11658 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11659 {
11660     SV *filter = filter_add(S_utf16_textfilter, NULL);
11661
11662     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11663
11664     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11665     SvPVCLEAR(filter);
11666     IoLINES(filter) = reversed;
11667     IoPAGE(filter) = 1; /* Not EOF */
11668
11669     /* Sadly, we have to return a valid pointer, come what may, so we have to
11670        ignore any error return from this.  */
11671     SvCUR_set(PL_linestr, 0);
11672     if (FILTER_READ(0, PL_linestr, 0)) {
11673         SvUTF8_on(PL_linestr);
11674     } else {
11675         SvUTF8_on(PL_linestr);
11676     }
11677     PL_bufend = SvEND(PL_linestr);
11678     return (U8*)SvPVX(PL_linestr);
11679 }
11680 #endif
11681
11682 /*
11683 Returns a pointer to the next character after the parsed
11684 vstring, as well as updating the passed in sv.
11685
11686 Function must be called like
11687
11688         sv = sv_2mortal(newSV(5));
11689         s = scan_vstring(s,e,sv);
11690
11691 where s and e are the start and end of the string.
11692 The sv should already be large enough to store the vstring
11693 passed in, for performance reasons.
11694
11695 This function may croak if fatal warnings are enabled in the
11696 calling scope, hence the sv_2mortal in the example (to prevent
11697 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11698 sv_2mortal.
11699
11700 */
11701
11702 char *
11703 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11704 {
11705     const char *pos = s;
11706     const char *start = s;
11707
11708     PERL_ARGS_ASSERT_SCAN_VSTRING;
11709
11710     if (*pos == 'v') pos++;  /* get past 'v' */
11711     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11712         pos++;
11713     if ( *pos != '.') {
11714         /* this may not be a v-string if followed by => */
11715         const char *next = pos;
11716         while (next < e && isSPACE(*next))
11717             ++next;
11718         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11719             /* return string not v-string */
11720             sv_setpvn(sv,(char *)s,pos-s);
11721             return (char *)pos;
11722         }
11723     }
11724
11725     if (!isALPHA(*pos)) {
11726         U8 tmpbuf[UTF8_MAXBYTES+1];
11727
11728         if (*s == 'v')
11729             s++;  /* get past 'v' */
11730
11731         SvPVCLEAR(sv);
11732
11733         for (;;) {
11734             /* this is atoi() that tolerates underscores */
11735             U8 *tmpend;
11736             UV rev = 0;
11737             const char *end = pos;
11738             UV mult = 1;
11739             while (--end >= s) {
11740                 if (*end != '_') {
11741                     const UV orev = rev;
11742                     rev += (*end - '0') * mult;
11743                     mult *= 10;
11744                     if (orev > rev)
11745                         /* diag_listed_as: Integer overflow in %s number */
11746                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11747                                          "Integer overflow in decimal number");
11748                 }
11749             }
11750
11751             /* Append native character for the rev point */
11752             tmpend = uvchr_to_utf8(tmpbuf, rev);
11753             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11754             if (!UVCHR_IS_INVARIANT(rev))
11755                  SvUTF8_on(sv);
11756             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11757                  s = ++pos;
11758             else {
11759                  s = pos;
11760                  break;
11761             }
11762             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11763                  pos++;
11764         }
11765         SvPOK_on(sv);
11766         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11767         SvRMAGICAL_on(sv);
11768     }
11769     return (char *)s;
11770 }
11771
11772 int
11773 Perl_keyword_plugin_standard(pTHX_
11774         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11775 {
11776     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11777     PERL_UNUSED_CONTEXT;
11778     PERL_UNUSED_ARG(keyword_ptr);
11779     PERL_UNUSED_ARG(keyword_len);
11780     PERL_UNUSED_ARG(op_ptr);
11781     return KEYWORD_PLUGIN_DECLINE;
11782 }
11783
11784 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11785 static void
11786 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11787 {
11788     SAVEI32(PL_lex_brackets);
11789     if (PL_lex_brackets > 100)
11790         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11791     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11792     SAVEI32(PL_lex_allbrackets);
11793     PL_lex_allbrackets = 0;
11794     SAVEI8(PL_lex_fakeeof);
11795     PL_lex_fakeeof = (U8)fakeeof;
11796     if(yyparse(gramtype) && !PL_parser->error_count)
11797         qerror(Perl_mess(aTHX_ "Parse error"));
11798 }
11799
11800 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11801 static OP *
11802 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11803 {
11804     OP *o;
11805     ENTER;
11806     SAVEVPTR(PL_eval_root);
11807     PL_eval_root = NULL;
11808     parse_recdescent(gramtype, fakeeof);
11809     o = PL_eval_root;
11810     LEAVE;
11811     return o;
11812 }
11813
11814 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11815 static OP *
11816 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11817 {
11818     OP *exprop;
11819     if (flags & ~PARSE_OPTIONAL)
11820         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11821     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11822     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11823         if (!PL_parser->error_count)
11824             qerror(Perl_mess(aTHX_ "Parse error"));
11825         exprop = newOP(OP_NULL, 0);
11826     }
11827     return exprop;
11828 }
11829
11830 /*
11831 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11832
11833 Parse a Perl arithmetic expression.  This may contain operators of precedence
11834 down to the bit shift operators.  The expression must be followed (and thus
11835 terminated) either by a comparison or lower-precedence operator or by
11836 something that would normally terminate an expression such as semicolon.
11837 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11838 otherwise it is mandatory.  It is up to the caller to ensure that the
11839 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11840 the source of the code to be parsed and the lexical context for the
11841 expression.
11842
11843 The op tree representing the expression is returned.  If an optional
11844 expression is absent, a null pointer is returned, otherwise the pointer
11845 will be non-null.
11846
11847 If an error occurs in parsing or compilation, in most cases a valid op
11848 tree is returned anyway.  The error is reflected in the parser state,
11849 normally resulting in a single exception at the top level of parsing
11850 which covers all the compilation errors that occurred.  Some compilation
11851 errors, however, will throw an exception immediately.
11852
11853 =cut
11854 */
11855
11856 OP *
11857 Perl_parse_arithexpr(pTHX_ U32 flags)
11858 {
11859     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11860 }
11861
11862 /*
11863 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11864
11865 Parse a Perl term expression.  This may contain operators of precedence
11866 down to the assignment operators.  The expression must be followed (and thus
11867 terminated) either by a comma or lower-precedence operator or by
11868 something that would normally terminate an expression such as semicolon.
11869 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11870 otherwise it is mandatory.  It is up to the caller to ensure that the
11871 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11872 the source of the code to be parsed and the lexical context for the
11873 expression.
11874
11875 The op tree representing the expression is returned.  If an optional
11876 expression is absent, a null pointer is returned, otherwise the pointer
11877 will be non-null.
11878
11879 If an error occurs in parsing or compilation, in most cases a valid op
11880 tree is returned anyway.  The error is reflected in the parser state,
11881 normally resulting in a single exception at the top level of parsing
11882 which covers all the compilation errors that occurred.  Some compilation
11883 errors, however, will throw an exception immediately.
11884
11885 =cut
11886 */
11887
11888 OP *
11889 Perl_parse_termexpr(pTHX_ U32 flags)
11890 {
11891     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11892 }
11893
11894 /*
11895 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11896
11897 Parse a Perl list expression.  This may contain operators of precedence
11898 down to the comma operator.  The expression must be followed (and thus
11899 terminated) either by a low-precedence logic operator such as C<or> or by
11900 something that would normally terminate an expression such as semicolon.
11901 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11902 otherwise it is mandatory.  It is up to the caller to ensure that the
11903 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11904 the source of the code to be parsed and the lexical context for the
11905 expression.
11906
11907 The op tree representing the expression is returned.  If an optional
11908 expression is absent, a null pointer is returned, otherwise the pointer
11909 will be non-null.
11910
11911 If an error occurs in parsing or compilation, in most cases a valid op
11912 tree is returned anyway.  The error is reflected in the parser state,
11913 normally resulting in a single exception at the top level of parsing
11914 which covers all the compilation errors that occurred.  Some compilation
11915 errors, however, will throw an exception immediately.
11916
11917 =cut
11918 */
11919
11920 OP *
11921 Perl_parse_listexpr(pTHX_ U32 flags)
11922 {
11923     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11924 }
11925
11926 /*
11927 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11928
11929 Parse a single complete Perl expression.  This allows the full
11930 expression grammar, including the lowest-precedence operators such
11931 as C<or>.  The expression must be followed (and thus terminated) by a
11932 token that an expression would normally be terminated by: end-of-file,
11933 closing bracketing punctuation, semicolon, or one of the keywords that
11934 signals a postfix expression-statement modifier.  If C<flags> has the
11935 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
11936 mandatory.  It is up to the caller to ensure that the dynamic parser
11937 state (L</PL_parser> et al) is correctly set to reflect the source of
11938 the code to be parsed and the lexical context for the expression.
11939
11940 The op tree representing the expression is returned.  If an optional
11941 expression is absent, a null pointer is returned, otherwise the pointer
11942 will be non-null.
11943
11944 If an error occurs in parsing or compilation, in most cases a valid op
11945 tree is returned anyway.  The error is reflected in the parser state,
11946 normally resulting in a single exception at the top level of parsing
11947 which covers all the compilation errors that occurred.  Some compilation
11948 errors, however, will throw an exception immediately.
11949
11950 =cut
11951 */
11952
11953 OP *
11954 Perl_parse_fullexpr(pTHX_ U32 flags)
11955 {
11956     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11957 }
11958
11959 /*
11960 =for apidoc Amx|OP *|parse_block|U32 flags
11961
11962 Parse a single complete Perl code block.  This consists of an opening
11963 brace, a sequence of statements, and a closing brace.  The block
11964 constitutes a lexical scope, so C<my> variables and various compile-time
11965 effects can be contained within it.  It is up to the caller to ensure
11966 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11967 reflect the source of the code to be parsed and the lexical context for
11968 the statement.
11969
11970 The op tree representing the code block is returned.  This is always a
11971 real op, never a null pointer.  It will normally be a C<lineseq> list,
11972 including C<nextstate> or equivalent ops.  No ops to construct any kind
11973 of runtime scope are included by virtue of it being a block.
11974
11975 If an error occurs in parsing or compilation, in most cases a valid op
11976 tree (most likely null) is returned anyway.  The error is reflected in
11977 the parser state, normally resulting in a single exception at the top
11978 level of parsing which covers all the compilation errors that occurred.
11979 Some compilation errors, however, will throw an exception immediately.
11980
11981 The C<flags> parameter is reserved for future use, and must always
11982 be zero.
11983
11984 =cut
11985 */
11986
11987 OP *
11988 Perl_parse_block(pTHX_ U32 flags)
11989 {
11990     if (flags)
11991         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11992     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11993 }
11994
11995 /*
11996 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11997
11998 Parse a single unadorned Perl statement.  This may be a normal imperative
11999 statement or a declaration that has compile-time effect.  It does not
12000 include any label or other affixture.  It is up to the caller to ensure
12001 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12002 reflect the source of the code to be parsed and the lexical context for
12003 the statement.
12004
12005 The op tree representing the statement is returned.  This may be a
12006 null pointer if the statement is null, for example if it was actually
12007 a subroutine definition (which has compile-time side effects).  If not
12008 null, it will be ops directly implementing the statement, suitable to
12009 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12010 equivalent op (except for those embedded in a scope contained entirely
12011 within the statement).
12012
12013 If an error occurs in parsing or compilation, in most cases a valid op
12014 tree (most likely null) is returned anyway.  The error is reflected in
12015 the parser state, normally resulting in a single exception at the top
12016 level of parsing which covers all the compilation errors that occurred.
12017 Some compilation errors, however, will throw an exception immediately.
12018
12019 The C<flags> parameter is reserved for future use, and must always
12020 be zero.
12021
12022 =cut
12023 */
12024
12025 OP *
12026 Perl_parse_barestmt(pTHX_ U32 flags)
12027 {
12028     if (flags)
12029         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12030     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12031 }
12032
12033 /*
12034 =for apidoc Amx|SV *|parse_label|U32 flags
12035
12036 Parse a single label, possibly optional, of the type that may prefix a
12037 Perl statement.  It is up to the caller to ensure that the dynamic parser
12038 state (L</PL_parser> et al) is correctly set to reflect the source of
12039 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
12040 label is optional, otherwise it is mandatory.
12041
12042 The name of the label is returned in the form of a fresh scalar.  If an
12043 optional label is absent, a null pointer is returned.
12044
12045 If an error occurs in parsing, which can only occur if the label is
12046 mandatory, a valid label is returned anyway.  The error is reflected in
12047 the parser state, normally resulting in a single exception at the top
12048 level of parsing which covers all the compilation errors that occurred.
12049
12050 =cut
12051 */
12052
12053 SV *
12054 Perl_parse_label(pTHX_ U32 flags)
12055 {
12056     if (flags & ~PARSE_OPTIONAL)
12057         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12058     if (PL_nexttoke) {
12059         PL_parser->yychar = yylex();
12060         if (PL_parser->yychar == LABEL) {
12061             char * const lpv = pl_yylval.pval;
12062             STRLEN llen = strlen(lpv);
12063             PL_parser->yychar = YYEMPTY;
12064             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12065         } else {
12066             yyunlex();
12067             goto no_label;
12068         }
12069     } else {
12070         char *s, *t;
12071         STRLEN wlen, bufptr_pos;
12072         lex_read_space(0);
12073         t = s = PL_bufptr;
12074         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
12075             goto no_label;
12076         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12077         if (word_takes_any_delimiter(s, wlen))
12078             goto no_label;
12079         bufptr_pos = s - SvPVX(PL_linestr);
12080         PL_bufptr = t;
12081         lex_read_space(LEX_KEEP_PREVIOUS);
12082         t = PL_bufptr;
12083         s = SvPVX(PL_linestr) + bufptr_pos;
12084         if (t[0] == ':' && t[1] != ':') {
12085             PL_oldoldbufptr = PL_oldbufptr;
12086             PL_oldbufptr = s;
12087             PL_bufptr = t+1;
12088             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12089         } else {
12090             PL_bufptr = s;
12091             no_label:
12092             if (flags & PARSE_OPTIONAL) {
12093                 return NULL;
12094             } else {
12095                 qerror(Perl_mess(aTHX_ "Parse error"));
12096                 return newSVpvs("x");
12097             }
12098         }
12099     }
12100 }
12101
12102 /*
12103 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12104
12105 Parse a single complete Perl statement.  This may be a normal imperative
12106 statement or a declaration that has compile-time effect, and may include
12107 optional labels.  It is up to the caller to ensure that the dynamic
12108 parser state (L</PL_parser> et al) is correctly set to reflect the source
12109 of the code to be parsed and the lexical context for the statement.
12110
12111 The op tree representing the statement is returned.  This may be a
12112 null pointer if the statement is null, for example if it was actually
12113 a subroutine definition (which has compile-time side effects).  If not
12114 null, it will be the result of a L</newSTATEOP> call, normally including
12115 a C<nextstate> or equivalent op.
12116
12117 If an error occurs in parsing or compilation, in most cases a valid op
12118 tree (most likely null) is returned anyway.  The error is reflected in
12119 the parser state, normally resulting in a single exception at the top
12120 level of parsing which covers all the compilation errors that occurred.
12121 Some compilation errors, however, will throw an exception immediately.
12122
12123 The C<flags> parameter is reserved for future use, and must always
12124 be zero.
12125
12126 =cut
12127 */
12128
12129 OP *
12130 Perl_parse_fullstmt(pTHX_ U32 flags)
12131 {
12132     if (flags)
12133         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12134     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12135 }
12136
12137 /*
12138 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12139
12140 Parse a sequence of zero or more Perl statements.  These may be normal
12141 imperative statements, including optional labels, or declarations
12142 that have compile-time effect, or any mixture thereof.  The statement
12143 sequence ends when a closing brace or end-of-file is encountered in a
12144 place where a new statement could have validly started.  It is up to
12145 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12146 is correctly set to reflect the source of the code to be parsed and the
12147 lexical context for the statements.
12148
12149 The op tree representing the statement sequence is returned.  This may
12150 be a null pointer if the statements were all null, for example if there
12151 were no statements or if there were only subroutine definitions (which
12152 have compile-time side effects).  If not null, it will be a C<lineseq>
12153 list, normally including C<nextstate> or equivalent ops.
12154
12155 If an error occurs in parsing or compilation, in most cases a valid op
12156 tree is returned anyway.  The error is reflected in the parser state,
12157 normally resulting in a single exception at the top level of parsing
12158 which covers all the compilation errors that occurred.  Some compilation
12159 errors, however, will throw an exception immediately.
12160
12161 The C<flags> parameter is reserved for future use, and must always
12162 be zero.
12163
12164 =cut
12165 */
12166
12167 OP *
12168 Perl_parse_stmtseq(pTHX_ U32 flags)
12169 {
12170     OP *stmtseqop;
12171     I32 c;
12172     if (flags)
12173         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12174     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12175     c = lex_peek_unichar(0);
12176     if (c != -1 && c != /*{*/'}')
12177         qerror(Perl_mess(aTHX_ "Parse error"));
12178     return stmtseqop;
12179 }
12180
12181 /*
12182  * ex: set ts=8 sts=4 sw=4 et:
12183  */