toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_preambled            (PL_parser->preambled)
  70 #define PL_sublex_info          (PL_parser->sublex_info)
  71 #define PL_linestr              (PL_parser->linestr)
  72 #define PL_expect               (PL_parser->expect)
  73 #define PL_copline              (PL_parser->copline)
  74 #define PL_bufptr               (PL_parser->bufptr)
  75 #define PL_oldbufptr            (PL_parser->oldbufptr)
  76 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  77 #define PL_linestart            (PL_parser->linestart)
  78 #define PL_bufend               (PL_parser->bufend)
  79 #define PL_last_uni             (PL_parser->last_uni)
  80 #define PL_last_lop             (PL_parser->last_lop)
  81 #define PL_last_lop_op          (PL_parser->last_lop_op)
  82 #define PL_lex_state            (PL_parser->lex_state)
  83 #define PL_rsfp                 (PL_parser->rsfp)
  84 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  85 #define PL_in_my                (PL_parser->in_my)
  86 #define PL_in_my_stash          (PL_parser->in_my_stash)
  87 #define PL_tokenbuf             (PL_parser->tokenbuf)
  88 #define PL_multi_end            (PL_parser->multi_end)
  89 #define PL_error_count          (PL_parser->error_count)
  90
  91 #ifdef PERL_MAD
  92 #  define PL_endwhite           (PL_parser->endwhite)
  93 #  define PL_faketokens         (PL_parser->faketokens)
  94 #  define PL_lasttoke           (PL_parser->lasttoke)
  95 #  define PL_nextwhite          (PL_parser->nextwhite)
  96 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  97 #  define PL_skipwhite          (PL_parser->skipwhite)
  98 #  define PL_thisclose          (PL_parser->thisclose)
  99 #  define PL_thismad            (PL_parser->thismad)
 100 #  define PL_thisopen           (PL_parser->thisopen)
 101 #  define PL_thisstuff          (PL_parser->thisstuff)
 102 #  define PL_thistoken          (PL_parser->thistoken)
 103 #  define PL_thiswhite          (PL_parser->thiswhite)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_nexttoke           (PL_parser->nexttoke)
 106 #  define PL_curforce           (PL_parser->curforce)
 107 #else
 108 #  define PL_nexttoke           (PL_parser->nexttoke)
 109 #  define PL_nexttype           (PL_parser->nexttype)
 110 #  define PL_nextval            (PL_parser->nextval)
 111 #endif
 112
 113 static const char* const ident_too_long = "Identifier too long";
 114
 115 #ifdef PERL_MAD
 116 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 117 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 118 #else
 119 #  define CURMAD(slot,sv)
 120 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 121 #endif
 122
 123 #define XENUMMASK  0x3f
 124 #define XFAKEEOF   0x40
 125 #define XFAKEBRACK 0x80
 126
 127 #ifdef USE_UTF8_SCRIPTS
 128 #   define UTF (!IN_BYTES)
 129 #else
 130 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 131 #endif
 132
 133 /* The maximum number of characters preceding the unrecognized one to display */
 134 #define UNRECOGNIZED_PRECEDE_COUNT 10
 135
 136 /* In variables named $^X, these are the legal values for X.
 137  * 1999-02-27 mjd-perl-patch@plover.com */
 138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 139
 140 #define SPACE_OR_TAB(c) isBLANK_A(c)
 141
 142 /* LEX_* are values for PL_lex_state, the state of the lexer.
 143  * They are arranged oddly so that the guard on the switch statement
 144  * can get by with a single comparison (if the compiler is smart enough).
 145  *
 146  * These values refer to the various states within a sublex parse,
 147  * i.e. within a double quotish string
 148  */
 149
 150 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 151
 152 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 153 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 154 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 155 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 156 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 157
 158                                    /* at end of code, eg "$x" followed by:  */
 159 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 160 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 161
 162 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 163                                         string or after \E, $foo, etc       */
 164 #define LEX_INTERPCONST          2 /* NOT USED */
 165 #define LEX_FORMLINE             1 /* expecting a format line               */
 166 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 167
 168
 169 #ifdef DEBUGGING
 170 static const char* const lex_state_names[] = {
 171     "KNOWNEXT",
 172     "FORMLINE",
 173     "INTERPCONST",
 174     "INTERPCONCAT",
 175     "INTERPENDMAYBE",
 176     "INTERPEND",
 177     "INTERPSTART",
 178     "INTERPPUSH",
 179     "INTERPCASEMOD",
 180     "INTERPNORMAL",
 181     "NORMAL"
 182 };
 183 #endif
 184
 185 #ifdef ff_next
 186 #undef ff_next
 187 #endif
 188
 189 #include "keywords.h"
 190
 191 /* CLINE is a macro that ensures PL_copline has a sane value */
 192
 193 #ifdef CLINE
 194 #undef CLINE
 195 #endif
 196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 197
 198 #ifdef PERL_MAD
 199 #  define SKIPSPACE0(s) skipspace0(s)
 200 #  define SKIPSPACE1(s) skipspace1(s)
 201 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 202 #  define PEEKSPACE(s) skipspace2(s,0)
 203 #else
 204 #  define SKIPSPACE0(s) skipspace(s)
 205 #  define SKIPSPACE1(s) skipspace(s)
 206 #  define SKIPSPACE2(s,tsv) skipspace(s)
 207 #  define PEEKSPACE(s) skipspace(s)
 208 #endif
 209
 210 /*
 211  * Convenience functions to return different tokens and prime the
 212  * lexer for the next token.  They all take an argument.
 213  *
 214  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 215  * OPERATOR     : generic operator
 216  * AOPERATOR    : assignment operator
 217  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 218  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 219  * PREREF       : *EXPR where EXPR is not a simple identifier
 220  * TERM         : expression term
 221  * LOOPX        : loop exiting command (goto, last, dump, etc)
 222  * FTST         : file test operator
 223  * FUN0         : zero-argument function
 224  * FUN0OP       : zero-argument function, with its op created in this file
 225  * FUN1         : not used, except for not, which isn't a UNIOP
 226  * BOop         : bitwise or or xor
 227  * BAop         : bitwise and
 228  * SHop         : shift operator
 229  * PWop         : power operator
 230  * PMop         : pattern-matching operator
 231  * Aop          : addition-level operator
 232  * Mop          : multiplication-level operator
 233  * Eop          : equality-testing operator
 234  * Rop          : relational operator <= != gt
 235  *
 236  * Also see LOP and lop() below.
 237  */
 238
 239 #ifdef DEBUGGING /* Serve -DT. */
 240 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 241 #else
 242 #   define REPORT(retval) (retval)
 243 #endif
 244
 245 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 246 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 247 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 248 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 249 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 251 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 252 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 253 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 254 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 255 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI3(f,x,have_x) { \
 273         pl_yylval.ival = f; \
 274         if (have_x) PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI3(f,XTERM,1)
 284 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 285 #define UNIPROTO(f,optional) { \
 286         if (optional) PL_last_uni = PL_oldbufptr; \
 287         OPERATOR(f); \
 288         }
 289
 290 #define UNIBRACK(f) UNI3(f,0,0)
 291
 292 /* grandfather return to old style */
 293 #define OLDLOP(f) \
 294         do { \
 295             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 296                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 297             pl_yylval.ival = (f); \
 298             PL_expect = XTERM; \
 299             PL_bufptr = s; \
 300             return (int)LSTOP; \
 301         } while(0)
 302
 303 #define COPLINE_INC_WITH_HERELINES                  \
 304     STMT_START {                                     \
 305         CopLINE_inc(PL_curcop);                       \
 306         if (PL_parser->lex_shared->herelines)          \
 307             CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
 308             PL_parser->lex_shared->herelines = 0;                    \
 309     } STMT_END
 310
 311
 312 #ifdef DEBUGGING
 313
 314 /* how to interpret the pl_yylval associated with the token */
 315 enum token_type {
 316     TOKENTYPE_NONE,
 317     TOKENTYPE_IVAL,
 318     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 319     TOKENTYPE_PVAL,
 320     TOKENTYPE_OPVAL
 321 };
 322
 323 static struct debug_tokens {
 324     const int token;
 325     enum token_type type;
 326     const char *name;
 327 } const debug_tokens[] =
 328 {
 329     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 330     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 331     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 332     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 333     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 334     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 335     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 336     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 337     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 338     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 339     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 340     { DO,               TOKENTYPE_NONE,         "DO" },
 341     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 342     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 343     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 344     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 345     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 346     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 347     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 348     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 349     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 350     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 351     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 352     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 353     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 354     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 355     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 356     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 357     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 358     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 359     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 360     { IF,               TOKENTYPE_IVAL,         "IF" },
 361     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 362     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 363     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 364     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 365     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 366     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 367     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 368     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 369     { MY,               TOKENTYPE_IVAL,         "MY" },
 370     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 371     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 372     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 373     { OROR,             TOKENTYPE_NONE,         "OROR" },
 374     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 375     { PEG,              TOKENTYPE_NONE,         "PEG" },
 376     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 377     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 378     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 379     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 380     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 381     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 382     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 383     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 384     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 385     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 386     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 387     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 388     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 389     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 390     { SUB,              TOKENTYPE_NONE,         "SUB" },
 391     { THING,            TOKENTYPE_OPVAL,        "THING" },
 392     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 393     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 394     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 395     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 396     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 397     { USE,              TOKENTYPE_IVAL,         "USE" },
 398     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 399     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 400     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 401     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 402     { 0,                TOKENTYPE_NONE,         NULL }
 403 };
 404
 405 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 406
 407 STATIC int
 408 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 409 {
 410     dVAR;
 411
 412     PERL_ARGS_ASSERT_TOKEREPORT;
 413
 414     if (DEBUG_T_TEST) {
 415         const char *name = NULL;
 416         enum token_type type = TOKENTYPE_NONE;
 417         const struct debug_tokens *p;
 418         SV* const report = newSVpvs("<== ");
 419
 420         for (p = debug_tokens; p->token; p++) {
 421             if (p->token == (int)rv) {
 422                 name = p->name;
 423                 type = p->type;
 424                 break;
 425             }
 426         }
 427         if (name)
 428             Perl_sv_catpv(aTHX_ report, name);
 429         else if ((char)rv > ' ' && (char)rv <= '~')
 430         {
 431             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 432             if ((char)rv == 'p')
 433                 sv_catpvs(report, " (pending identifier)");
 434         }
 435         else if (!rv)
 436             sv_catpvs(report, "EOF");
 437         else
 438             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 439         switch (type) {
 440         case TOKENTYPE_NONE:
 441             break;
 442         case TOKENTYPE_IVAL:
 443             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 444             break;
 445         case TOKENTYPE_OPNUM:
 446             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 447                                     PL_op_name[lvalp->ival]);
 448             break;
 449         case TOKENTYPE_PVAL:
 450             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 451             break;
 452         case TOKENTYPE_OPVAL:
 453             if (lvalp->opval) {
 454                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 455                                     PL_op_name[lvalp->opval->op_type]);
 456                 if (lvalp->opval->op_type == OP_CONST) {
 457                     Perl_sv_catpvf(aTHX_ report, " %s",
 458                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 459                 }
 460
 461             }
 462             else
 463                 sv_catpvs(report, "(opval=null)");
 464             break;
 465         }
 466         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 467     };
 468     return (int)rv;
 469 }
 470
 471
 472 /* print the buffer with suitable escapes */
 473
 474 STATIC void
 475 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 476 {
 477     SV* const tmp = newSVpvs("");
 478
 479     PERL_ARGS_ASSERT_PRINTBUF;
 480
 481     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 482     SvREFCNT_dec(tmp);
 483 }
 484
 485 #endif
 486
 487 static int
 488 S_deprecate_commaless_var_list(pTHX) {
 489     PL_expect = XTERM;
 490     deprecate("comma-less variable list");
 491     return REPORT(','); /* grandfather non-comma-format format */
 492 }
 493
 494 /*
 495  * S_ao
 496  *
 497  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 498  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 499  */
 500
 501 STATIC int
 502 S_ao(pTHX_ int toketype)
 503 {
 504     dVAR;
 505     if (*PL_bufptr == '=') {
 506         PL_bufptr++;
 507         if (toketype == ANDAND)
 508             pl_yylval.ival = OP_ANDASSIGN;
 509         else if (toketype == OROR)
 510             pl_yylval.ival = OP_ORASSIGN;
 511         else if (toketype == DORDOR)
 512             pl_yylval.ival = OP_DORASSIGN;
 513         toketype = ASSIGNOP;
 514     }
 515     return toketype;
 516 }
 517
 518 /*
 519  * S_no_op
 520  * When Perl expects an operator and finds something else, no_op
 521  * prints the warning.  It always prints "<something> found where
 522  * operator expected.  It prints "Missing semicolon on previous line?"
 523  * if the surprise occurs at the start of the line.  "do you need to
 524  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 525  * where the compiler doesn't know if foo is a method call or a function.
 526  * It prints "Missing operator before end of line" if there's nothing
 527  * after the missing operator, or "... before <...>" if there is something
 528  * after the missing operator.
 529  */
 530
 531 STATIC void
 532 S_no_op(pTHX_ const char *const what, char *s)
 533 {
 534     dVAR;
 535     char * const oldbp = PL_bufptr;
 536     const bool is_first = (PL_oldbufptr == PL_linestart);
 537
 538     PERL_ARGS_ASSERT_NO_OP;
 539
 540     if (!s)
 541         s = oldbp;
 542     else
 543         PL_bufptr = s;
 544     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 545     if (ckWARN_d(WARN_SYNTAX)) {
 546         if (is_first)
 547             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 548                     "\t(Missing semicolon on previous line?)\n");
 549         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 550             const char *t;
 551             for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
 552                                                             t += UTF ? UTF8SKIP(t) : 1)
 553                 NOOP;
 554             if (t < PL_bufptr && isSPACE(*t))
 555                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 556                         "\t(Do you need to predeclare %"UTF8f"?)\n",
 557                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 558         }
 559         else {
 560             assert(s >= oldbp);
 561             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 562                     "\t(Missing operator before %"UTF8f"?)\n",
 563                      UTF8fARG(UTF, s - oldbp, oldbp));
 564         }
 565     }
 566     PL_bufptr = oldbp;
 567 }
 568
 569 /*
 570  * S_missingterm
 571  * Complain about missing quote/regexp/heredoc terminator.
 572  * If it's called with NULL then it cauterizes the line buffer.
 573  * If we're in a delimited string and the delimiter is a control
 574  * character, it's reformatted into a two-char sequence like ^C.
 575  * This is fatal.
 576  */
 577
 578 STATIC void
 579 S_missingterm(pTHX_ char *s)
 580 {
 581     dVAR;
 582     char tmpbuf[3];
 583     char q;
 584     if (s) {
 585         char * const nl = strrchr(s,'\n');
 586         if (nl)
 587             *nl = '\0';
 588     }
 589     else if (isCNTRL(PL_multi_close)) {
 590         *tmpbuf = '^';
 591         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 592         tmpbuf[2] = '\0';
 593         s = tmpbuf;
 594     }
 595     else {
 596         *tmpbuf = (char)PL_multi_close;
 597         tmpbuf[1] = '\0';
 598         s = tmpbuf;
 599     }
 600     q = strchr(s,'"') ? '\'' : '"';
 601     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 602 }
 603
 604 #include "feature.h"
 605
 606 /*
 607  * Check whether the named feature is enabled.
 608  */
 609 bool
 610 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 611 {
 612     dVAR;
 613     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 614
 615     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 616
 617     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 618
 619     if (namelen > MAX_FEATURE_LEN)
 620         return FALSE;
 621     memcpy(&he_name[8], name, namelen);
 622
 623     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 624                                      REFCOUNTED_HE_EXISTS));
 625 }
 626
 627 /*
 628  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 629  * utf16-to-utf8-reversed.
 630  */
 631
 632 #ifdef PERL_CR_FILTER
 633 static void
 634 strip_return(SV *sv)
 635 {
 636     const char *s = SvPVX_const(sv);
 637     const char * const e = s + SvCUR(sv);
 638
 639     PERL_ARGS_ASSERT_STRIP_RETURN;
 640
 641     /* outer loop optimized to do nothing if there are no CR-LFs */
 642     while (s < e) {
 643         if (*s++ == '\r' && *s == '\n') {
 644             /* hit a CR-LF, need to copy the rest */
 645             char *d = s - 1;
 646             *d++ = *s++;
 647             while (s < e) {
 648                 if (*s == '\r' && s[1] == '\n')
 649                     s++;
 650                 *d++ = *s++;
 651             }
 652             SvCUR(sv) -= s - d;
 653             return;
 654         }
 655     }
 656 }
 657
 658 STATIC I32
 659 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 660 {
 661     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 662     if (count > 0 && !maxlen)
 663         strip_return(sv);
 664     return count;
 665 }
 666 #endif
 667
 668 /*
 669 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 670
 671 Creates and initialises a new lexer/parser state object, supplying
 672 a context in which to lex and parse from a new source of Perl code.
 673 A pointer to the new state object is placed in L</PL_parser>.  An entry
 674 is made on the save stack so that upon unwinding the new state object
 675 will be destroyed and the former value of L</PL_parser> will be restored.
 676 Nothing else need be done to clean up the parsing context.
 677
 678 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 679 non-null, provides a string (in SV form) containing code to be parsed.
 680 A copy of the string is made, so subsequent modification of I<line>
 681 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 682 from which code will be read to be parsed.  If both are non-null, the
 683 code in I<line> comes first and must consist of complete lines of input,
 684 and I<rsfp> supplies the remainder of the source.
 685
 686 The I<flags> parameter is reserved for future use.  Currently it is only
 687 used by perl internally, so extensions should always pass zero.
 688
 689 =cut
 690 */
 691
 692 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 693    can share filters with the current parser.
 694    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 695    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 696    destruction. This is used to handle the case of defaulting to reading the
 697    script from the standard input because no filename was given on the command
 698    line (without getting confused by situation where STDIN has been closed, so
 699    the script handle is opened on fd 0)  */
 700
 701 void
 702 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 703 {
 704     dVAR;
 705     const char *s = NULL;
 706     yy_parser *parser, *oparser;
 707     if (flags && flags & ~LEX_START_FLAGS)
 708         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 709
 710     /* create and initialise a parser */
 711
 712     Newxz(parser, 1, yy_parser);
 713     parser->old_parser = oparser = PL_parser;
 714     PL_parser = parser;
 715
 716     parser->stack = NULL;
 717     parser->ps = NULL;
 718     parser->stack_size = 0;
 719
 720     /* on scope exit, free this parser and restore any outer one */
 721     SAVEPARSER(parser);
 722     parser->saved_curcop = PL_curcop;
 723
 724     /* initialise lexer state */
 725
 726 #ifdef PERL_MAD
 727     parser->curforce = -1;
 728 #else
 729     parser->nexttoke = 0;
 730 #endif
 731     parser->error_count = oparser ? oparser->error_count : 0;
 732     parser->copline = NOLINE;
 733     parser->lex_state = LEX_NORMAL;
 734     parser->expect = XSTATE;
 735     parser->rsfp = rsfp;
 736     parser->rsfp_filters =
 737       !(flags & LEX_START_SAME_FILTER) || !oparser
 738         ? NULL
 739         : MUTABLE_AV(SvREFCNT_inc(
 740             oparser->rsfp_filters
 741              ? oparser->rsfp_filters
 742              : (oparser->rsfp_filters = newAV())
 743           ));
 744
 745     Newx(parser->lex_brackstack, 120, char);
 746     Newx(parser->lex_casestack, 12, char);
 747     *parser->lex_casestack = '\0';
 748     Newxz(parser->lex_shared, 1, LEXSHARED);
 749
 750     if (line) {
 751         STRLEN len;
 752         s = SvPV_const(line, len);
 753         parser->linestr = flags & LEX_START_COPIED
 754                             ? SvREFCNT_inc_simple_NN(line)
 755                             : newSVpvn_flags(s, len, SvUTF8(line));
 756         sv_catpvs(parser->linestr, "\n;");
 757     } else {
 758         parser->linestr = newSVpvs("\n;");
 759     }
 760     parser->oldoldbufptr =
 761         parser->oldbufptr =
 762         parser->bufptr =
 763         parser->linestart = SvPVX(parser->linestr);
 764     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 765     parser->last_lop = parser->last_uni = NULL;
 766     parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 767                                  |LEX_DONT_CLOSE_RSFP);
 768
 769     parser->in_pod = parser->filtered = 0;
 770 }
 771
 772
 773 /* delete a parser object */
 774
 775 void
 776 Perl_parser_free(pTHX_  const yy_parser *parser)
 777 {
 778     PERL_ARGS_ASSERT_PARSER_FREE;
 779
 780     PL_curcop = parser->saved_curcop;
 781     SvREFCNT_dec(parser->linestr);
 782
 783     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 784         PerlIO_clearerr(parser->rsfp);
 785     else if (parser->rsfp && (!parser->old_parser ||
 786                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 787         PerlIO_close(parser->rsfp);
 788     SvREFCNT_dec(parser->rsfp_filters);
 789     SvREFCNT_dec(parser->lex_stuff);
 790     SvREFCNT_dec(parser->sublex_info.repl);
 791
 792     Safefree(parser->lex_brackstack);
 793     Safefree(parser->lex_casestack);
 794     Safefree(parser->lex_shared);
 795     PL_parser = parser->old_parser;
 796     Safefree(parser);
 797 }
 798
 799 void
 800 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 801 {
 802 #ifdef PERL_MAD
 803     I32 nexttoke = parser->lasttoke;
 804 #else
 805     I32 nexttoke = parser->nexttoke;
 806 #endif
 807     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 808     while (nexttoke--) {
 809 #ifdef PERL_MAD
 810         if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
 811                                 & 0xffff)
 812          && parser->nexttoke[nexttoke].next_val.opval
 813          && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
 814          && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
 815                 op_free(parser->nexttoke[nexttoke].next_val.opval);
 816                 parser->nexttoke[nexttoke].next_val.opval = NULL;
 817         }
 818 #else
 819         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 820          && parser->nextval[nexttoke].opval
 821          && parser->nextval[nexttoke].opval->op_slabbed
 822          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 823             op_free(parser->nextval[nexttoke].opval);
 824             parser->nextval[nexttoke].opval = NULL;
 825         }
 826 #endif
 827     }
 828 }
 829
 830
 831 /*
 832 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 833
 834 Buffer scalar containing the chunk currently under consideration of the
 835 text currently being lexed.  This is always a plain string scalar (for
 836 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 837 normal scalar means; instead refer to the buffer directly by the pointer
 838 variables described below.
 839
 840 The lexer maintains various C<char*> pointers to things in the
 841 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 842 reallocated, all of these pointers must be updated.  Don't attempt to
 843 do this manually, but rather use L</lex_grow_linestr> if you need to
 844 reallocate the buffer.
 845
 846 The content of the text chunk in the buffer is commonly exactly one
 847 complete line of input, up to and including a newline terminator,
 848 but there are situations where it is otherwise.  The octets of the
 849 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 850 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 851 flag on this scalar, which may disagree with it.
 852
 853 For direct examination of the buffer, the variable
 854 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 855 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 856 of these pointers is usually preferable to examination of the scalar
 857 through normal scalar means.
 858
 859 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 860
 861 Direct pointer to the end of the chunk of text currently being lexed, the
 862 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 863 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 864 always located at the end of the buffer, and does not count as part of
 865 the buffer's contents.
 866
 867 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 868
 869 Points to the current position of lexing inside the lexer buffer.
 870 Characters around this point may be freely examined, within
 871 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 872 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 873 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 874
 875 Lexing code (whether in the Perl core or not) moves this pointer past
 876 the characters that it consumes.  It is also expected to perform some
 877 bookkeeping whenever a newline character is consumed.  This movement
 878 can be more conveniently performed by the function L</lex_read_to>,
 879 which handles newlines appropriately.
 880
 881 Interpretation of the buffer's octets can be abstracted out by
 882 using the slightly higher-level functions L</lex_peek_unichar> and
 883 L</lex_read_unichar>.
 884
 885 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 886
 887 Points to the start of the current line inside the lexer buffer.
 888 This is useful for indicating at which column an error occurred, and
 889 not much else.  This must be updated by any lexing code that consumes
 890 a newline; the function L</lex_read_to> handles this detail.
 891
 892 =cut
 893 */
 894
 895 /*
 896 =for apidoc Amx|bool|lex_bufutf8
 897
 898 Indicates whether the octets in the lexer buffer
 899 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 900 of Unicode characters.  If not, they should be interpreted as Latin-1
 901 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 902
 903 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 904 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 905 encoding.
 906
 907 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 908 is significant, but not the whole story regarding the input character
 909 encoding.  Normally, when a file is being read, the scalar contains octets
 910 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 911 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 912 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 913 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 914 is in effect.  This logic may change in the future; use this function
 915 instead of implementing the logic yourself.
 916
 917 =cut
 918 */
 919
 920 bool
 921 Perl_lex_bufutf8(pTHX)
 922 {
 923     return UTF;
 924 }
 925
 926 /*
 927 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 928
 929 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 930 at least I<len> octets (including terminating NUL).  Returns a
 931 pointer to the reallocated buffer.  This is necessary before making
 932 any direct modification of the buffer that would increase its length.
 933 L</lex_stuff_pvn> provides a more convenient way to insert text into
 934 the buffer.
 935
 936 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 937 this function updates all of the lexer's variables that point directly
 938 into the buffer.
 939
 940 =cut
 941 */
 942
 943 char *
 944 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 945 {
 946     SV *linestr;
 947     char *buf;
 948     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 949     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 950     linestr = PL_parser->linestr;
 951     buf = SvPVX(linestr);
 952     if (len <= SvLEN(linestr))
 953         return buf;
 954     bufend_pos = PL_parser->bufend - buf;
 955     bufptr_pos = PL_parser->bufptr - buf;
 956     oldbufptr_pos = PL_parser->oldbufptr - buf;
 957     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 958     linestart_pos = PL_parser->linestart - buf;
 959     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 960     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 961     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 962                             PL_parser->lex_shared->re_eval_start - buf : 0;
 963
 964     buf = sv_grow(linestr, len);
 965
 966     PL_parser->bufend = buf + bufend_pos;
 967     PL_parser->bufptr = buf + bufptr_pos;
 968     PL_parser->oldbufptr = buf + oldbufptr_pos;
 969     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 970     PL_parser->linestart = buf + linestart_pos;
 971     if (PL_parser->last_uni)
 972         PL_parser->last_uni = buf + last_uni_pos;
 973     if (PL_parser->last_lop)
 974         PL_parser->last_lop = buf + last_lop_pos;
 975     if (PL_parser->lex_shared->re_eval_start)
 976         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 977     return buf;
 978 }
 979
 980 /*
 981 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 982
 983 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 984 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 985 reallocating the buffer if necessary.  This means that lexing code that
 986 runs later will see the characters as if they had appeared in the input.
 987 It is not recommended to do this as part of normal parsing, and most
 988 uses of this facility run the risk of the inserted characters being
 989 interpreted in an unintended manner.
 990
 991 The string to be inserted is represented by I<len> octets starting
 992 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 993 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 994 The characters are recoded for the lexer buffer, according to how the
 995 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 996 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 997 function is more convenient.
 998
 999 =cut
1000 */
1001
1002 void
1003 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1004 {
1005     dVAR;
1006     char *bufptr;
1007     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1008     if (flags & ~(LEX_STUFF_UTF8))
1009         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1010     if (UTF) {
1011         if (flags & LEX_STUFF_UTF8) {
1012             goto plain_copy;
1013         } else {
1014             STRLEN highhalf = 0;    /* Count of variants */
1015             const char *p, *e = pv+len;
1016             for (p = pv; p != e; p++) {
1017                 if (! UTF8_IS_INVARIANT(*p)) {
1018                     highhalf++;
1019                 }
1020             }
1021             if (!highhalf)
1022                 goto plain_copy;
1023             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1024             bufptr = PL_parser->bufptr;
1025             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1026             SvCUR_set(PL_parser->linestr,
1027                 SvCUR(PL_parser->linestr) + len+highhalf);
1028             PL_parser->bufend += len+highhalf;
1029             for (p = pv; p != e; p++) {
1030                 U8 c = (U8)*p;
1031                 if (! UTF8_IS_INVARIANT(c)) {
1032                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
1033                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
1034                 } else {
1035                     *bufptr++ = (char)c;
1036                 }
1037             }
1038         }
1039     } else {
1040         if (flags & LEX_STUFF_UTF8) {
1041             STRLEN highhalf = 0;
1042             const char *p, *e = pv+len;
1043             for (p = pv; p != e; p++) {
1044                 U8 c = (U8)*p;
1045                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1046                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1047                                 "non-Latin-1 character into Latin-1 input");
1048                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1049                     p++;
1050                     highhalf++;
1051                 } else if (! UTF8_IS_INVARIANT(c)) {
1052                     /* malformed UTF-8 */
1053                     ENTER;
1054                     SAVESPTR(PL_warnhook);
1055                     PL_warnhook = PERL_WARNHOOK_FATAL;
1056                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1057                     LEAVE;
1058                 }
1059             }
1060             if (!highhalf)
1061                 goto plain_copy;
1062             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1063             bufptr = PL_parser->bufptr;
1064             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1065             SvCUR_set(PL_parser->linestr,
1066                 SvCUR(PL_parser->linestr) + len-highhalf);
1067             PL_parser->bufend += len-highhalf;
1068             p = pv;
1069             while (p < e) {
1070                 if (UTF8_IS_INVARIANT(*p)) {
1071                     *bufptr++ = *p;
1072                     p++;
1073                 }
1074                 else {
1075                     assert(p < e -1 );
1076                     *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
1077                     p += 2;
1078                 }
1079             }
1080         } else {
1081           plain_copy:
1082             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1083             bufptr = PL_parser->bufptr;
1084             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1085             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1086             PL_parser->bufend += len;
1087             Copy(pv, bufptr, len, char);
1088         }
1089     }
1090 }
1091
1092 /*
1093 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1094
1095 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1096 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1097 reallocating the buffer if necessary.  This means that lexing code that
1098 runs later will see the characters as if they had appeared in the input.
1099 It is not recommended to do this as part of normal parsing, and most
1100 uses of this facility run the risk of the inserted characters being
1101 interpreted in an unintended manner.
1102
1103 The string to be inserted is represented by octets starting at I<pv>
1104 and continuing to the first nul.  These octets are interpreted as either
1105 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1106 in I<flags>.  The characters are recoded for the lexer buffer, according
1107 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1108 If it is not convenient to nul-terminate a string to be inserted, the
1109 L</lex_stuff_pvn> function is more appropriate.
1110
1111 =cut
1112 */
1113
1114 void
1115 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1116 {
1117     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1118     lex_stuff_pvn(pv, strlen(pv), flags);
1119 }
1120
1121 /*
1122 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1123
1124 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1125 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1126 reallocating the buffer if necessary.  This means that lexing code that
1127 runs later will see the characters as if they had appeared in the input.
1128 It is not recommended to do this as part of normal parsing, and most
1129 uses of this facility run the risk of the inserted characters being
1130 interpreted in an unintended manner.
1131
1132 The string to be inserted is the string value of I<sv>.  The characters
1133 are recoded for the lexer buffer, according to how the buffer is currently
1134 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1135 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1136 need to construct a scalar.
1137
1138 =cut
1139 */
1140
1141 void
1142 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1143 {
1144     char *pv;
1145     STRLEN len;
1146     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1147     if (flags)
1148         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1149     pv = SvPV(sv, len);
1150     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1151 }
1152
1153 /*
1154 =for apidoc Amx|void|lex_unstuff|char *ptr
1155
1156 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1157 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1158 This hides the discarded text from any lexing code that runs later,
1159 as if the text had never appeared.
1160
1161 This is not the normal way to consume lexed text.  For that, use
1162 L</lex_read_to>.
1163
1164 =cut
1165 */
1166
1167 void
1168 Perl_lex_unstuff(pTHX_ char *ptr)
1169 {
1170     char *buf, *bufend;
1171     STRLEN unstuff_len;
1172     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1173     buf = PL_parser->bufptr;
1174     if (ptr < buf)
1175         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1176     if (ptr == buf)
1177         return;
1178     bufend = PL_parser->bufend;
1179     if (ptr > bufend)
1180         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1181     unstuff_len = ptr - buf;
1182     Move(ptr, buf, bufend+1-ptr, char);
1183     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1184     PL_parser->bufend = bufend - unstuff_len;
1185 }
1186
1187 /*
1188 =for apidoc Amx|void|lex_read_to|char *ptr
1189
1190 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1191 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1192 performing the correct bookkeeping whenever a newline character is passed.
1193 This is the normal way to consume lexed text.
1194
1195 Interpretation of the buffer's octets can be abstracted out by
1196 using the slightly higher-level functions L</lex_peek_unichar> and
1197 L</lex_read_unichar>.
1198
1199 =cut
1200 */
1201
1202 void
1203 Perl_lex_read_to(pTHX_ char *ptr)
1204 {
1205     char *s;
1206     PERL_ARGS_ASSERT_LEX_READ_TO;
1207     s = PL_parser->bufptr;
1208     if (ptr < s || ptr > PL_parser->bufend)
1209         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1210     for (; s != ptr; s++)
1211         if (*s == '\n') {
1212             COPLINE_INC_WITH_HERELINES;
1213             PL_parser->linestart = s+1;
1214         }
1215     PL_parser->bufptr = ptr;
1216 }
1217
1218 /*
1219 =for apidoc Amx|void|lex_discard_to|char *ptr
1220
1221 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1222 up to I<ptr>.  The remaining content of the buffer will be moved, and
1223 all pointers into the buffer updated appropriately.  I<ptr> must not
1224 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1225 it is not permitted to discard text that has yet to be lexed.
1226
1227 Normally it is not necessarily to do this directly, because it suffices to
1228 use the implicit discarding behaviour of L</lex_next_chunk> and things
1229 based on it.  However, if a token stretches across multiple lines,
1230 and the lexing code has kept multiple lines of text in the buffer for
1231 that purpose, then after completion of the token it would be wise to
1232 explicitly discard the now-unneeded earlier lines, to avoid future
1233 multi-line tokens growing the buffer without bound.
1234
1235 =cut
1236 */
1237
1238 void
1239 Perl_lex_discard_to(pTHX_ char *ptr)
1240 {
1241     char *buf;
1242     STRLEN discard_len;
1243     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1244     buf = SvPVX(PL_parser->linestr);
1245     if (ptr < buf)
1246         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1247     if (ptr == buf)
1248         return;
1249     if (ptr > PL_parser->bufptr)
1250         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1251     discard_len = ptr - buf;
1252     if (PL_parser->oldbufptr < ptr)
1253         PL_parser->oldbufptr = ptr;
1254     if (PL_parser->oldoldbufptr < ptr)
1255         PL_parser->oldoldbufptr = ptr;
1256     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1257         PL_parser->last_uni = NULL;
1258     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1259         PL_parser->last_lop = NULL;
1260     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1261     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1262     PL_parser->bufend -= discard_len;
1263     PL_parser->bufptr -= discard_len;
1264     PL_parser->oldbufptr -= discard_len;
1265     PL_parser->oldoldbufptr -= discard_len;
1266     if (PL_parser->last_uni)
1267         PL_parser->last_uni -= discard_len;
1268     if (PL_parser->last_lop)
1269         PL_parser->last_lop -= discard_len;
1270 }
1271
1272 /*
1273 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1274
1275 Reads in the next chunk of text to be lexed, appending it to
1276 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1277 looked to the end of the current chunk and wants to know more.  It is
1278 usual, but not necessary, for lexing to have consumed the entirety of
1279 the current chunk at this time.
1280
1281 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1282 chunk (i.e., the current chunk has been entirely consumed), normally the
1283 current chunk will be discarded at the same time that the new chunk is
1284 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1285 will not be discarded.  If the current chunk has not been entirely
1286 consumed, then it will not be discarded regardless of the flag.
1287
1288 Returns true if some new text was added to the buffer, or false if the
1289 buffer has reached the end of the input text.
1290
1291 =cut
1292 */
1293
1294 #define LEX_FAKE_EOF 0x80000000
1295 #define LEX_NO_TERM  0x40000000
1296
1297 bool
1298 Perl_lex_next_chunk(pTHX_ U32 flags)
1299 {
1300     SV *linestr;
1301     char *buf;
1302     STRLEN old_bufend_pos, new_bufend_pos;
1303     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1304     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1305     bool got_some_for_debugger = 0;
1306     bool got_some;
1307     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1308         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1309     linestr = PL_parser->linestr;
1310     buf = SvPVX(linestr);
1311     if (!(flags & LEX_KEEP_PREVIOUS) &&
1312             PL_parser->bufptr == PL_parser->bufend) {
1313         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1314         linestart_pos = 0;
1315         if (PL_parser->last_uni != PL_parser->bufend)
1316             PL_parser->last_uni = NULL;
1317         if (PL_parser->last_lop != PL_parser->bufend)
1318             PL_parser->last_lop = NULL;
1319         last_uni_pos = last_lop_pos = 0;
1320         *buf = 0;
1321         SvCUR(linestr) = 0;
1322     } else {
1323         old_bufend_pos = PL_parser->bufend - buf;
1324         bufptr_pos = PL_parser->bufptr - buf;
1325         oldbufptr_pos = PL_parser->oldbufptr - buf;
1326         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1327         linestart_pos = PL_parser->linestart - buf;
1328         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1329         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1330     }
1331     if (flags & LEX_FAKE_EOF) {
1332         goto eof;
1333     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1334         got_some = 0;
1335     } else if (filter_gets(linestr, old_bufend_pos)) {
1336         got_some = 1;
1337         got_some_for_debugger = 1;
1338     } else if (flags & LEX_NO_TERM) {
1339         got_some = 0;
1340     } else {
1341         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1342             sv_setpvs(linestr, "");
1343         eof:
1344         /* End of real input.  Close filehandle (unless it was STDIN),
1345          * then add implicit termination.
1346          */
1347         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1348             PerlIO_clearerr(PL_parser->rsfp);
1349         else if (PL_parser->rsfp)
1350             (void)PerlIO_close(PL_parser->rsfp);
1351         PL_parser->rsfp = NULL;
1352         PL_parser->in_pod = PL_parser->filtered = 0;
1353 #ifdef PERL_MAD
1354         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1355             PL_faketokens = 1;
1356 #endif
1357         if (!PL_in_eval && PL_minus_p) {
1358             sv_catpvs(linestr,
1359                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1360             PL_minus_n = PL_minus_p = 0;
1361         } else if (!PL_in_eval && PL_minus_n) {
1362             sv_catpvs(linestr, /*{*/";}");
1363             PL_minus_n = 0;
1364         } else
1365             sv_catpvs(linestr, ";");
1366         got_some = 1;
1367     }
1368     buf = SvPVX(linestr);
1369     new_bufend_pos = SvCUR(linestr);
1370     PL_parser->bufend = buf + new_bufend_pos;
1371     PL_parser->bufptr = buf + bufptr_pos;
1372     PL_parser->oldbufptr = buf + oldbufptr_pos;
1373     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1374     PL_parser->linestart = buf + linestart_pos;
1375     if (PL_parser->last_uni)
1376         PL_parser->last_uni = buf + last_uni_pos;
1377     if (PL_parser->last_lop)
1378         PL_parser->last_lop = buf + last_lop_pos;
1379     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1380             PL_curstash != PL_debstash) {
1381         /* debugger active and we're not compiling the debugger code,
1382          * so store the line into the debugger's array of lines
1383          */
1384         update_debugger_info(NULL, buf+old_bufend_pos,
1385             new_bufend_pos-old_bufend_pos);
1386     }
1387     return got_some;
1388 }
1389
1390 /*
1391 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1392
1393 Looks ahead one (Unicode) character in the text currently being lexed.
1394 Returns the codepoint (unsigned integer value) of the next character,
1395 or -1 if lexing has reached the end of the input text.  To consume the
1396 peeked character, use L</lex_read_unichar>.
1397
1398 If the next character is in (or extends into) the next chunk of input
1399 text, the next chunk will be read in.  Normally the current chunk will be
1400 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1401 then the current chunk will not be discarded.
1402
1403 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1404 is encountered, an exception is generated.
1405
1406 =cut
1407 */
1408
1409 I32
1410 Perl_lex_peek_unichar(pTHX_ U32 flags)
1411 {
1412     dVAR;
1413     char *s, *bufend;
1414     if (flags & ~(LEX_KEEP_PREVIOUS))
1415         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1416     s = PL_parser->bufptr;
1417     bufend = PL_parser->bufend;
1418     if (UTF) {
1419         U8 head;
1420         I32 unichar;
1421         STRLEN len, retlen;
1422         if (s == bufend) {
1423             if (!lex_next_chunk(flags))
1424                 return -1;
1425             s = PL_parser->bufptr;
1426             bufend = PL_parser->bufend;
1427         }
1428         head = (U8)*s;
1429         if (UTF8_IS_INVARIANT(head))
1430             return head;
1431         if (UTF8_IS_START(head)) {
1432             len = UTF8SKIP(&head);
1433             while ((STRLEN)(bufend-s) < len) {
1434                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1435                     break;
1436                 s = PL_parser->bufptr;
1437                 bufend = PL_parser->bufend;
1438             }
1439         }
1440         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1441         if (retlen == (STRLEN)-1) {
1442             /* malformed UTF-8 */
1443             ENTER;
1444             SAVESPTR(PL_warnhook);
1445             PL_warnhook = PERL_WARNHOOK_FATAL;
1446             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1447             LEAVE;
1448         }
1449         return unichar;
1450     } else {
1451         if (s == bufend) {
1452             if (!lex_next_chunk(flags))
1453                 return -1;
1454             s = PL_parser->bufptr;
1455         }
1456         return (U8)*s;
1457     }
1458 }
1459
1460 /*
1461 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1462
1463 Reads the next (Unicode) character in the text currently being lexed.
1464 Returns the codepoint (unsigned integer value) of the character read,
1465 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1466 if lexing has reached the end of the input text.  To non-destructively
1467 examine the next character, use L</lex_peek_unichar> instead.
1468
1469 If the next character is in (or extends into) the next chunk of input
1470 text, the next chunk will be read in.  Normally the current chunk will be
1471 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1472 then the current chunk will not be discarded.
1473
1474 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1475 is encountered, an exception is generated.
1476
1477 =cut
1478 */
1479
1480 I32
1481 Perl_lex_read_unichar(pTHX_ U32 flags)
1482 {
1483     I32 c;
1484     if (flags & ~(LEX_KEEP_PREVIOUS))
1485         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1486     c = lex_peek_unichar(flags);
1487     if (c != -1) {
1488         if (c == '\n')
1489             COPLINE_INC_WITH_HERELINES;
1490         if (UTF)
1491             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1492         else
1493             ++(PL_parser->bufptr);
1494     }
1495     return c;
1496 }
1497
1498 /*
1499 =for apidoc Amx|void|lex_read_space|U32 flags
1500
1501 Reads optional spaces, in Perl style, in the text currently being
1502 lexed.  The spaces may include ordinary whitespace characters and
1503 Perl-style comments.  C<#line> directives are processed if encountered.
1504 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1505 at a non-space character (or the end of the input text).
1506
1507 If spaces extend into the next chunk of input text, the next chunk will
1508 be read in.  Normally the current chunk will be discarded at the same
1509 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1510 chunk will not be discarded.
1511
1512 =cut
1513 */
1514
1515 #define LEX_NO_NEXT_CHUNK 0x80000000
1516
1517 void
1518 Perl_lex_read_space(pTHX_ U32 flags)
1519 {
1520     char *s, *bufend;
1521     bool need_incline = 0;
1522     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1523         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1524 #ifdef PERL_MAD
1525     if (PL_skipwhite) {
1526         sv_free(PL_skipwhite);
1527         PL_skipwhite = NULL;
1528     }
1529     if (PL_madskills)
1530         PL_skipwhite = newSVpvs("");
1531 #endif /* PERL_MAD */
1532     s = PL_parser->bufptr;
1533     bufend = PL_parser->bufend;
1534     while (1) {
1535         char c = *s;
1536         if (c == '#') {
1537             do {
1538                 c = *++s;
1539             } while (!(c == '\n' || (c == 0 && s == bufend)));
1540         } else if (c == '\n') {
1541             s++;
1542             PL_parser->linestart = s;
1543             if (s == bufend)
1544                 need_incline = 1;
1545             else
1546                 incline(s);
1547         } else if (isSPACE(c)) {
1548             s++;
1549         } else if (c == 0 && s == bufend) {
1550             bool got_more;
1551 #ifdef PERL_MAD
1552             if (PL_madskills)
1553                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1554 #endif /* PERL_MAD */
1555             if (flags & LEX_NO_NEXT_CHUNK)
1556                 break;
1557             PL_parser->bufptr = s;
1558             COPLINE_INC_WITH_HERELINES;
1559             got_more = lex_next_chunk(flags);
1560             CopLINE_dec(PL_curcop);
1561             s = PL_parser->bufptr;
1562             bufend = PL_parser->bufend;
1563             if (!got_more)
1564                 break;
1565             if (need_incline && PL_parser->rsfp) {
1566                 incline(s);
1567                 need_incline = 0;
1568             }
1569         } else {
1570             break;
1571         }
1572     }
1573 #ifdef PERL_MAD
1574     if (PL_madskills)
1575         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1576 #endif /* PERL_MAD */
1577     PL_parser->bufptr = s;
1578 }
1579
1580 /*
1581
1582 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1583
1584 This function performs syntax checking on a prototype, C<proto>.
1585 If C<warn> is true, any illegal characters or mismatched brackets
1586 will trigger illegalproto warnings, declaring that they were
1587 detected in the prototype for C<name>.
1588
1589 The return value is C<true> if this is a valid prototype, and
1590 C<false> if it is not, regardless of whether C<warn> was C<true> or
1591 C<false>.
1592
1593 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1594
1595 =cut
1596
1597  */
1598
1599 bool
1600 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1601 {
1602     STRLEN len, origlen;
1603     char *p = proto ? SvPV(proto, len) : NULL;
1604     bool bad_proto = FALSE;
1605     bool in_brackets = FALSE;
1606     bool after_slash = FALSE;
1607     char greedy_proto = ' ';
1608     bool proto_after_greedy_proto = FALSE;
1609     bool must_be_last = FALSE;
1610     bool underscore = FALSE;
1611     bool bad_proto_after_underscore = FALSE;
1612
1613     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1614
1615     if (!proto)
1616         return TRUE;
1617
1618     origlen = len;
1619     for (; len--; p++) {
1620         if (!isSPACE(*p)) {
1621             if (must_be_last)
1622                 proto_after_greedy_proto = TRUE;
1623             if (underscore) {
1624                 if (!strchr(";@%", *p))
1625                     bad_proto_after_underscore = TRUE;
1626                 underscore = FALSE;
1627             }
1628             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1629                 bad_proto = TRUE;
1630             }
1631             else {
1632                 if (*p == '[')
1633                     in_brackets = TRUE;
1634                 else if (*p == ']')
1635                     in_brackets = FALSE;
1636                 else if ((*p == '@' || *p == '%') &&
1637                     !after_slash &&
1638                     !in_brackets ) {
1639                     must_be_last = TRUE;
1640                     greedy_proto = *p;
1641                 }
1642                 else if (*p == '_')
1643                     underscore = TRUE;
1644             }
1645             if (*p == '\\')
1646                 after_slash = TRUE;
1647             else
1648                 after_slash = FALSE;
1649         }
1650     }
1651
1652     if (warn) {
1653         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1654         p -= origlen;
1655         p = SvUTF8(proto)
1656             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1657                              origlen, UNI_DISPLAY_ISPRINT)
1658             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1659
1660         if (proto_after_greedy_proto)
1661             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1662                         "Prototype after '%c' for %"SVf" : %s",
1663                         greedy_proto, SVfARG(name), p);
1664         if (in_brackets)
1665             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1666                         "Missing ']' in prototype for %"SVf" : %s",
1667                         SVfARG(name), p);
1668         if (bad_proto)
1669             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1670                         "Illegal character in prototype for %"SVf" : %s",
1671                         SVfARG(name), p);
1672         if (bad_proto_after_underscore)
1673             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1674                         "Illegal character after '_' in prototype for %"SVf" : %s",
1675                         SVfARG(name), p);
1676     }
1677
1678     return (! (proto_after_greedy_proto || bad_proto) );
1679 }
1680
1681 /*
1682  * S_incline
1683  * This subroutine has nothing to do with tilting, whether at windmills
1684  * or pinball tables.  Its name is short for "increment line".  It
1685  * increments the current line number in CopLINE(PL_curcop) and checks
1686  * to see whether the line starts with a comment of the form
1687  *    # line 500 "foo.pm"
1688  * If so, it sets the current line number and file to the values in the comment.
1689  */
1690
1691 STATIC void
1692 S_incline(pTHX_ const char *s)
1693 {
1694     dVAR;
1695     const char *t;
1696     const char *n;
1697     const char *e;
1698     line_t line_num;
1699
1700     PERL_ARGS_ASSERT_INCLINE;
1701
1702     COPLINE_INC_WITH_HERELINES;
1703     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1704      && s+1 == PL_bufend && *s == ';') {
1705         /* fake newline in string eval */
1706         CopLINE_dec(PL_curcop);
1707         return;
1708     }
1709     if (*s++ != '#')
1710         return;
1711     while (SPACE_OR_TAB(*s))
1712         s++;
1713     if (strnEQ(s, "line", 4))
1714         s += 4;
1715     else
1716         return;
1717     if (SPACE_OR_TAB(*s))
1718         s++;
1719     else
1720         return;
1721     while (SPACE_OR_TAB(*s))
1722         s++;
1723     if (!isDIGIT(*s))
1724         return;
1725
1726     n = s;
1727     while (isDIGIT(*s))
1728         s++;
1729     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1730         return;
1731     while (SPACE_OR_TAB(*s))
1732         s++;
1733     if (*s == '"' && (t = strchr(s+1, '"'))) {
1734         s++;
1735         e = t + 1;
1736     }
1737     else {
1738         t = s;
1739         while (!isSPACE(*t))
1740             t++;
1741         e = t;
1742     }
1743     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1744         e++;
1745     if (*e != '\n' && *e != '\0')
1746         return;         /* false alarm */
1747
1748     line_num = atoi(n)-1;
1749
1750     if (t - s > 0) {
1751         const STRLEN len = t - s;
1752         SV *const temp_sv = CopFILESV(PL_curcop);
1753         const char *cf;
1754         STRLEN tmplen;
1755
1756         if (temp_sv) {
1757             cf = SvPVX(temp_sv);
1758             tmplen = SvCUR(temp_sv);
1759         } else {
1760             cf = NULL;
1761             tmplen = 0;
1762         }
1763
1764         if (!PL_rsfp && !PL_parser->filtered) {
1765             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1766              * to *{"::_<newfilename"} */
1767             /* However, the long form of evals is only turned on by the
1768                debugger - usually they're "(eval %lu)" */
1769             char smallbuf[128];
1770             char *tmpbuf;
1771             GV **gvp;
1772             STRLEN tmplen2 = len;
1773             if (tmplen + 2 <= sizeof smallbuf)
1774                 tmpbuf = smallbuf;
1775             else
1776                 Newx(tmpbuf, tmplen + 2, char);
1777             tmpbuf[0] = '_';
1778             tmpbuf[1] = '<';
1779             memcpy(tmpbuf + 2, cf, tmplen);
1780             tmplen += 2;
1781             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1782             if (gvp) {
1783                 char *tmpbuf2;
1784                 GV *gv2;
1785
1786                 if (tmplen2 + 2 <= sizeof smallbuf)
1787                     tmpbuf2 = smallbuf;
1788                 else
1789                     Newx(tmpbuf2, tmplen2 + 2, char);
1790
1791                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1792                     /* Either they malloc'd it, or we malloc'd it,
1793                        so no prefix is present in ours.  */
1794                     tmpbuf2[0] = '_';
1795                     tmpbuf2[1] = '<';
1796                 }
1797
1798                 memcpy(tmpbuf2 + 2, s, tmplen2);
1799                 tmplen2 += 2;
1800
1801                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1802                 if (!isGV(gv2)) {
1803                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1804                     /* adjust ${"::_<newfilename"} to store the new file name */
1805                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1806                     /* The line number may differ. If that is the case,
1807                        alias the saved lines that are in the array.
1808                        Otherwise alias the whole array. */
1809                     if (CopLINE(PL_curcop) == line_num) {
1810                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1811                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1812                     }
1813                     else if (GvAV(*gvp)) {
1814                         AV * const av = GvAV(*gvp);
1815                         const I32 start = CopLINE(PL_curcop)+1;
1816                         I32 items = AvFILLp(av) - start;
1817                         if (items > 0) {
1818                             AV * const av2 = GvAVn(gv2);
1819                             SV **svp = AvARRAY(av) + start;
1820                             I32 l = (I32)line_num+1;
1821                             while (items--)
1822                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1823                         }
1824                     }
1825                 }
1826
1827                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1828             }
1829             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1830         }
1831         CopFILE_free(PL_curcop);
1832         CopFILE_setn(PL_curcop, s, len);
1833     }
1834     CopLINE_set(PL_curcop, line_num);
1835 }
1836
1837 #ifdef PERL_MAD
1838 /* skip space before PL_thistoken */
1839
1840 STATIC char *
1841 S_skipspace0(pTHX_ char *s)
1842 {
1843     PERL_ARGS_ASSERT_SKIPSPACE0;
1844
1845     s = skipspace(s);
1846     if (!PL_madskills)
1847         return s;
1848     if (PL_skipwhite) {
1849         if (!PL_thiswhite)
1850             PL_thiswhite = newSVpvs("");
1851         sv_catsv(PL_thiswhite, PL_skipwhite);
1852         sv_free(PL_skipwhite);
1853         PL_skipwhite = 0;
1854     }
1855     PL_realtokenstart = s - SvPVX(PL_linestr);
1856     return s;
1857 }
1858
1859 /* skip space after PL_thistoken */
1860
1861 STATIC char *
1862 S_skipspace1(pTHX_ char *s)
1863 {
1864     const char *start = s;
1865     I32 startoff = start - SvPVX(PL_linestr);
1866
1867     PERL_ARGS_ASSERT_SKIPSPACE1;
1868
1869     s = skipspace(s);
1870     if (!PL_madskills)
1871         return s;
1872     start = SvPVX(PL_linestr) + startoff;
1873     if (!PL_thistoken && PL_realtokenstart >= 0) {
1874         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1875         PL_thistoken = newSVpvn(tstart, start - tstart);
1876     }
1877     PL_realtokenstart = -1;
1878     if (PL_skipwhite) {
1879         if (!PL_nextwhite)
1880             PL_nextwhite = newSVpvs("");
1881         sv_catsv(PL_nextwhite, PL_skipwhite);
1882         sv_free(PL_skipwhite);
1883         PL_skipwhite = 0;
1884     }
1885     return s;
1886 }
1887
1888 STATIC char *
1889 S_skipspace2(pTHX_ char *s, SV **svp)
1890 {
1891     char *start;
1892     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1893     const I32 startoff = s - SvPVX(PL_linestr);
1894
1895     PERL_ARGS_ASSERT_SKIPSPACE2;
1896
1897     s = skipspace(s);
1898     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1899     if (!PL_madskills || !svp)
1900         return s;
1901     start = SvPVX(PL_linestr) + startoff;
1902     if (!PL_thistoken && PL_realtokenstart >= 0) {
1903         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1904         PL_thistoken = newSVpvn(tstart, start - tstart);
1905         PL_realtokenstart = -1;
1906     }
1907     if (PL_skipwhite) {
1908         if (!*svp)
1909             *svp = newSVpvs("");
1910         sv_setsv(*svp, PL_skipwhite);
1911         sv_free(PL_skipwhite);
1912         PL_skipwhite = 0;
1913     }
1914
1915     return s;
1916 }
1917 #endif
1918
1919 STATIC void
1920 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1921 {
1922     AV *av = CopFILEAVx(PL_curcop);
1923     if (av) {
1924         SV * const sv = newSV_type(SVt_PVMG);
1925         if (orig_sv)
1926             sv_setsv_flags(sv, orig_sv, 0); /* no cow */
1927         else
1928             sv_setpvn(sv, buf, len);
1929         (void)SvIOK_on(sv);
1930         SvIV_set(sv, 0);
1931         av_store(av, (I32)CopLINE(PL_curcop), sv);
1932     }
1933 }
1934
1935 /*
1936  * S_skipspace
1937  * Called to gobble the appropriate amount and type of whitespace.
1938  * Skips comments as well.
1939  */
1940
1941 STATIC char *
1942 S_skipspace(pTHX_ char *s)
1943 {
1944 #ifdef PERL_MAD
1945     char *start = s;
1946 #endif /* PERL_MAD */
1947     PERL_ARGS_ASSERT_SKIPSPACE;
1948 #ifdef PERL_MAD
1949     if (PL_skipwhite) {
1950         sv_free(PL_skipwhite);
1951         PL_skipwhite = NULL;
1952     }
1953 #endif /* PERL_MAD */
1954     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1955         while (s < PL_bufend && SPACE_OR_TAB(*s))
1956             s++;
1957     } else {
1958         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1959         PL_bufptr = s;
1960         lex_read_space(LEX_KEEP_PREVIOUS |
1961                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1962                     LEX_NO_NEXT_CHUNK : 0));
1963         s = PL_bufptr;
1964         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1965         if (PL_linestart > PL_bufptr)
1966             PL_bufptr = PL_linestart;
1967         return s;
1968     }
1969 #ifdef PERL_MAD
1970     if (PL_madskills)
1971         PL_skipwhite = newSVpvn(start, s-start);
1972 #endif /* PERL_MAD */
1973     return s;
1974 }
1975
1976 /*
1977  * S_check_uni
1978  * Check the unary operators to ensure there's no ambiguity in how they're
1979  * used.  An ambiguous piece of code would be:
1980  *     rand + 5
1981  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1982  * the +5 is its argument.
1983  */
1984
1985 STATIC void
1986 S_check_uni(pTHX)
1987 {
1988     dVAR;
1989     const char *s;
1990     const char *t;
1991
1992     if (PL_oldoldbufptr != PL_last_uni)
1993         return;
1994     while (isSPACE(*PL_last_uni))
1995         PL_last_uni++;
1996     s = PL_last_uni;
1997     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1998         s++;
1999     if ((t = strchr(s, '(')) && t < PL_bufptr)
2000         return;
2001
2002     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2003                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2004                      (int)(s - PL_last_uni), PL_last_uni);
2005 }
2006
2007 /*
2008  * LOP : macro to build a list operator.  Its behaviour has been replaced
2009  * with a subroutine, S_lop() for which LOP is just another name.
2010  */
2011
2012 #define LOP(f,x) return lop(f,x,s)
2013
2014 /*
2015  * S_lop
2016  * Build a list operator (or something that might be one).  The rules:
2017  *  - if we have a next token, then it's a list operator [why?]
2018  *  - if the next thing is an opening paren, then it's a function
2019  *  - else it's a list operator
2020  */
2021
2022 STATIC I32
2023 S_lop(pTHX_ I32 f, int x, char *s)
2024 {
2025     dVAR;
2026
2027     PERL_ARGS_ASSERT_LOP;
2028
2029     pl_yylval.ival = f;
2030     CLINE;
2031     PL_expect = x;
2032     PL_bufptr = s;
2033     PL_last_lop = PL_oldbufptr;
2034     PL_last_lop_op = (OPCODE)f;
2035 #ifdef PERL_MAD
2036     if (PL_lasttoke)
2037         goto lstop;
2038 #else
2039     if (PL_nexttoke)
2040         goto lstop;
2041 #endif
2042     if (*s == '(')
2043         return REPORT(FUNC);
2044     s = PEEKSPACE(s);
2045     if (*s == '(')
2046         return REPORT(FUNC);
2047     else {
2048         lstop:
2049         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2050             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2051         return REPORT(LSTOP);
2052     }
2053 }
2054
2055 #ifdef PERL_MAD
2056  /*
2057  * S_start_force
2058  * Sets up for an eventual force_next().  start_force(0) basically does
2059  * an unshift, while start_force(-1) does a push.  yylex removes items
2060  * on the "pop" end.
2061  */
2062
2063 STATIC void
2064 S_start_force(pTHX_ int where)
2065 {
2066     int i;
2067
2068     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
2069         where = PL_lasttoke;
2070     assert(PL_curforce < 0 || PL_curforce == where);
2071     if (PL_curforce != where) {
2072         for (i = PL_lasttoke; i > where; --i) {
2073             PL_nexttoke[i] = PL_nexttoke[i-1];
2074         }
2075         PL_lasttoke++;
2076     }
2077     if (PL_curforce < 0)        /* in case of duplicate start_force() */
2078         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2079     PL_curforce = where;
2080     if (PL_nextwhite) {
2081         if (PL_madskills)
2082             curmad('^', newSVpvs(""));
2083         CURMAD('_', PL_nextwhite);
2084     }
2085 }
2086
2087 STATIC void
2088 S_curmad(pTHX_ char slot, SV *sv)
2089 {
2090     MADPROP **where;
2091
2092     if (!sv)
2093         return;
2094     if (PL_curforce < 0)
2095         where = &PL_thismad;
2096     else
2097         where = &PL_nexttoke[PL_curforce].next_mad;
2098
2099     if (PL_faketokens)
2100         sv_setpvs(sv, "");
2101     else {
2102         if (!IN_BYTES) {
2103             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2104                 SvUTF8_on(sv);
2105             else if (PL_encoding) {
2106                 sv_recode_to_utf8(sv, PL_encoding);
2107             }
2108         }
2109     }
2110
2111     /* keep a slot open for the head of the list? */
2112     if (slot != '_' && *where && (*where)->mad_key == '^') {
2113         (*where)->mad_key = slot;
2114         sv_free(MUTABLE_SV(((*where)->mad_val)));
2115         (*where)->mad_val = (void*)sv;
2116     }
2117     else
2118         addmad(newMADsv(slot, sv), where, 0);
2119 }
2120 #else
2121 #  define start_force(where)    NOOP
2122 #  define curmad(slot, sv)      NOOP
2123 #endif
2124
2125 /*
2126  * S_force_next
2127  * When the lexer realizes it knows the next token (for instance,
2128  * it is reordering tokens for the parser) then it can call S_force_next
2129  * to know what token to return the next time the lexer is called.  Caller
2130  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2131  * and possibly PL_expect to ensure the lexer handles the token correctly.
2132  */
2133
2134 STATIC void
2135 S_force_next(pTHX_ I32 type)
2136 {
2137     dVAR;
2138 #ifdef DEBUGGING
2139     if (DEBUG_T_TEST) {
2140         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2141         tokereport(type, &NEXTVAL_NEXTTOKE);
2142     }
2143 #endif
2144 #ifdef PERL_MAD
2145     if (PL_curforce < 0)
2146         start_force(PL_lasttoke);
2147     PL_nexttoke[PL_curforce].next_type = type;
2148     if (PL_lex_state != LEX_KNOWNEXT)
2149         PL_lex_defer = PL_lex_state;
2150     PL_lex_state = LEX_KNOWNEXT;
2151     PL_lex_expect = PL_expect;
2152     PL_curforce = -1;
2153 #else
2154     PL_nexttype[PL_nexttoke] = type;
2155     PL_nexttoke++;
2156     if (PL_lex_state != LEX_KNOWNEXT) {
2157         PL_lex_defer = PL_lex_state;
2158         PL_lex_expect = PL_expect;
2159         PL_lex_state = LEX_KNOWNEXT;
2160     }
2161 #endif
2162 }
2163
2164 void
2165 Perl_yyunlex(pTHX)
2166 {
2167     int yyc = PL_parser->yychar;
2168     if (yyc != YYEMPTY) {
2169         if (yyc) {
2170             start_force(-1);
2171             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2172             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2173                 PL_lex_allbrackets--;
2174                 PL_lex_brackets--;
2175                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2176             } else if (yyc == '('/*)*/) {
2177                 PL_lex_allbrackets--;
2178                 yyc |= (2<<24);
2179             }
2180             force_next(yyc);
2181         }
2182         PL_parser->yychar = YYEMPTY;
2183     }
2184 }
2185
2186 STATIC SV *
2187 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2188 {
2189     dVAR;
2190     SV * const sv = newSVpvn_utf8(start, len,
2191                                   !IN_BYTES
2192                                   && UTF
2193                                   && !is_ascii_string((const U8*)start, len)
2194                                   && is_utf8_string((const U8*)start, len));
2195     return sv;
2196 }
2197
2198 /*
2199  * S_force_word
2200  * When the lexer knows the next thing is a word (for instance, it has
2201  * just seen -> and it knows that the next char is a word char, then
2202  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2203  * lookahead.
2204  *
2205  * Arguments:
2206  *   char *start : buffer position (must be within PL_linestr)
2207  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2208  *   int check_keyword : if true, Perl checks to make sure the word isn't
2209  *       a keyword (do this if the word is a label, e.g. goto FOO)
2210  *   int allow_pack : if true, : characters will also be allowed (require,
2211  *       use, etc. do this)
2212  *   int allow_initial_tick : used by the "sub" lexer only.
2213  */
2214
2215 STATIC char *
2216 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2217 {
2218     dVAR;
2219     char *s;
2220     STRLEN len;
2221
2222     PERL_ARGS_ASSERT_FORCE_WORD;
2223
2224     start = SKIPSPACE1(start);
2225     s = start;
2226     if (isIDFIRST_lazy_if(s,UTF) ||
2227         (allow_pack && *s == ':') )
2228     {
2229         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2230         if (check_keyword) {
2231           char *s2 = PL_tokenbuf;
2232           if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2233             s2 += 6, len -= 6;
2234           if (keyword(s2, len, 0))
2235             return start;
2236         }
2237         start_force(PL_curforce);
2238         if (PL_madskills)
2239             curmad('X', newSVpvn(start,s-start));
2240         if (token == METHOD) {
2241             s = SKIPSPACE1(s);
2242             if (*s == '(')
2243                 PL_expect = XTERM;
2244             else {
2245                 PL_expect = XOPERATOR;
2246             }
2247         }
2248         if (PL_madskills)
2249             curmad('g', newSVpvs( "forced" ));
2250         NEXTVAL_NEXTTOKE.opval
2251             = (OP*)newSVOP(OP_CONST,0,
2252                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2253         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2254         force_next(token);
2255     }
2256     return s;
2257 }
2258
2259 /*
2260  * S_force_ident
2261  * Called when the lexer wants $foo *foo &foo etc, but the program
2262  * text only contains the "foo" portion.  The first argument is a pointer
2263  * to the "foo", and the second argument is the type symbol to prefix.
2264  * Forces the next token to be a "WORD".
2265  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2266  */
2267
2268 STATIC void
2269 S_force_ident(pTHX_ const char *s, int kind)
2270 {
2271     dVAR;
2272
2273     PERL_ARGS_ASSERT_FORCE_IDENT;
2274
2275     if (s[0]) {
2276         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2277         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2278                                                                 UTF ? SVf_UTF8 : 0));
2279         start_force(PL_curforce);
2280         NEXTVAL_NEXTTOKE.opval = o;
2281         force_next(WORD);
2282         if (kind) {
2283             o->op_private = OPpCONST_ENTERED;
2284             /* XXX see note in pp_entereval() for why we forgo typo
2285                warnings if the symbol must be introduced in an eval.
2286                GSAR 96-10-12 */
2287             gv_fetchpvn_flags(s, len,
2288                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2289                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2290                               kind == '$' ? SVt_PV :
2291                               kind == '@' ? SVt_PVAV :
2292                               kind == '%' ? SVt_PVHV :
2293                               SVt_PVGV
2294                               );
2295         }
2296     }
2297 }
2298
2299 static void
2300 S_force_ident_maybe_lex(pTHX_ char pit)
2301 {
2302     start_force(PL_curforce);
2303     NEXTVAL_NEXTTOKE.ival = pit;
2304     force_next('p');
2305 }
2306
2307 NV
2308 Perl_str_to_version(pTHX_ SV *sv)
2309 {
2310     NV retval = 0.0;
2311     NV nshift = 1.0;
2312     STRLEN len;
2313     const char *start = SvPV_const(sv,len);
2314     const char * const end = start + len;
2315     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2316
2317     PERL_ARGS_ASSERT_STR_TO_VERSION;
2318
2319     while (start < end) {
2320         STRLEN skip;
2321         UV n;
2322         if (utf)
2323             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2324         else {
2325             n = *(U8*)start;
2326             skip = 1;
2327         }
2328         retval += ((NV)n)/nshift;
2329         start += skip;
2330         nshift *= 1000;
2331     }
2332     return retval;
2333 }
2334
2335 /*
2336  * S_force_version
2337  * Forces the next token to be a version number.
2338  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2339  * and if "guessing" is TRUE, then no new token is created (and the caller
2340  * must use an alternative parsing method).
2341  */
2342
2343 STATIC char *
2344 S_force_version(pTHX_ char *s, int guessing)
2345 {
2346     dVAR;
2347     OP *version = NULL;
2348     char *d;
2349 #ifdef PERL_MAD
2350     I32 startoff = s - SvPVX(PL_linestr);
2351 #endif
2352
2353     PERL_ARGS_ASSERT_FORCE_VERSION;
2354
2355     s = SKIPSPACE1(s);
2356
2357     d = s;
2358     if (*d == 'v')
2359         d++;
2360     if (isDIGIT(*d)) {
2361         while (isDIGIT(*d) || *d == '_' || *d == '.')
2362             d++;
2363 #ifdef PERL_MAD
2364         if (PL_madskills) {
2365             start_force(PL_curforce);
2366             curmad('X', newSVpvn(s,d-s));
2367         }
2368 #endif
2369         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2370             SV *ver;
2371 #ifdef USE_LOCALE_NUMERIC
2372             char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2373             setlocale(LC_NUMERIC, "C");
2374 #endif
2375             s = scan_num(s, &pl_yylval);
2376 #ifdef USE_LOCALE_NUMERIC
2377             setlocale(LC_NUMERIC, loc);
2378             Safefree(loc);
2379 #endif
2380             version = pl_yylval.opval;
2381             ver = cSVOPx(version)->op_sv;
2382             if (SvPOK(ver) && !SvNIOK(ver)) {
2383                 SvUPGRADE(ver, SVt_PVNV);
2384                 SvNV_set(ver, str_to_version(ver));
2385                 SvNOK_on(ver);          /* hint that it is a version */
2386             }
2387         }
2388         else if (guessing) {
2389 #ifdef PERL_MAD
2390             if (PL_madskills) {
2391                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2392                 PL_nextwhite = 0;
2393                 s = SvPVX(PL_linestr) + startoff;
2394             }
2395 #endif
2396             return s;
2397         }
2398     }
2399
2400 #ifdef PERL_MAD
2401     if (PL_madskills && !version) {
2402         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2403         PL_nextwhite = 0;
2404         s = SvPVX(PL_linestr) + startoff;
2405     }
2406 #endif
2407     /* NOTE: The parser sees the package name and the VERSION swapped */
2408     start_force(PL_curforce);
2409     NEXTVAL_NEXTTOKE.opval = version;
2410     force_next(WORD);
2411
2412     return s;
2413 }
2414
2415 /*
2416  * S_force_strict_version
2417  * Forces the next token to be a version number using strict syntax rules.
2418  */
2419
2420 STATIC char *
2421 S_force_strict_version(pTHX_ char *s)
2422 {
2423     dVAR;
2424     OP *version = NULL;
2425 #ifdef PERL_MAD
2426     I32 startoff = s - SvPVX(PL_linestr);
2427 #endif
2428     const char *errstr = NULL;
2429
2430     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2431
2432     while (isSPACE(*s)) /* leading whitespace */
2433         s++;
2434
2435     if (is_STRICT_VERSION(s,&errstr)) {
2436         SV *ver = newSV(0);
2437         s = (char *)scan_version(s, ver, 0);
2438         version = newSVOP(OP_CONST, 0, ver);
2439     }
2440     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2441             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2442     {
2443         PL_bufptr = s;
2444         if (errstr)
2445             yyerror(errstr); /* version required */
2446         return s;
2447     }
2448
2449 #ifdef PERL_MAD
2450     if (PL_madskills && !version) {
2451         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2452         PL_nextwhite = 0;
2453         s = SvPVX(PL_linestr) + startoff;
2454     }
2455 #endif
2456     /* NOTE: The parser sees the package name and the VERSION swapped */
2457     start_force(PL_curforce);
2458     NEXTVAL_NEXTTOKE.opval = version;
2459     force_next(WORD);
2460
2461     return s;
2462 }
2463
2464 /*
2465  * S_tokeq
2466  * Tokenize a quoted string passed in as an SV.  It finds the next
2467  * chunk, up to end of string or a backslash.  It may make a new
2468  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2469  * turns \\ into \.
2470  */
2471
2472 STATIC SV *
2473 S_tokeq(pTHX_ SV *sv)
2474 {
2475     dVAR;
2476     char *s;
2477     char *send;
2478     char *d;
2479     STRLEN len = 0;
2480     SV *pv = sv;
2481
2482     PERL_ARGS_ASSERT_TOKEQ;
2483
2484     if (!SvLEN(sv))
2485         goto finish;
2486
2487     s = SvPV_force(sv, len);
2488     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2489         goto finish;
2490     send = s + len;
2491     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2492     while (s < send && !(*s == '\\' && s[1] == '\\'))
2493         s++;
2494     if (s == send)
2495         goto finish;
2496     d = s;
2497     if ( PL_hints & HINT_NEW_STRING ) {
2498         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2499     }
2500     while (s < send) {
2501         if (*s == '\\') {
2502             if (s + 1 < send && (s[1] == '\\'))
2503                 s++;            /* all that, just for this */
2504         }
2505         *d++ = *s++;
2506     }
2507     *d = '\0';
2508     SvCUR_set(sv, d - SvPVX_const(sv));
2509   finish:
2510     if ( PL_hints & HINT_NEW_STRING )
2511        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2512     return sv;
2513 }
2514
2515 /*
2516  * Now come three functions related to double-quote context,
2517  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2518  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2519  * interact with PL_lex_state, and create fake ( ... ) argument lists
2520  * to handle functions and concatenation.
2521  * For example,
2522  *   "foo\lbar"
2523  * is tokenised as
2524  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2525  */
2526
2527 /*
2528  * S_sublex_start
2529  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2530  *
2531  * Pattern matching will set PL_lex_op to the pattern-matching op to
2532  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2533  *
2534  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2535  *
2536  * Everything else becomes a FUNC.
2537  *
2538  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2539  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2540  * call to S_sublex_push().
2541  */
2542
2543 STATIC I32
2544 S_sublex_start(pTHX)
2545 {
2546     dVAR;
2547     const I32 op_type = pl_yylval.ival;
2548
2549     if (op_type == OP_NULL) {
2550         pl_yylval.opval = PL_lex_op;
2551         PL_lex_op = NULL;
2552         return THING;
2553     }
2554     if (op_type == OP_CONST || op_type == OP_READLINE) {
2555         SV *sv = tokeq(PL_lex_stuff);
2556
2557         if (SvTYPE(sv) == SVt_PVIV) {
2558             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2559             STRLEN len;
2560             const char * const p = SvPV_const(sv, len);
2561             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2562             SvREFCNT_dec(sv);
2563             sv = nsv;
2564         }
2565         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2566         PL_lex_stuff = NULL;
2567         /* Allow <FH> // "foo" */
2568         if (op_type == OP_READLINE)
2569             PL_expect = XTERMORDORDOR;
2570         return THING;
2571     }
2572     else if (op_type == OP_BACKTICK && PL_lex_op) {
2573         /* readpipe() vas overriden */
2574         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2575         pl_yylval.opval = PL_lex_op;
2576         PL_lex_op = NULL;
2577         PL_lex_stuff = NULL;
2578         return THING;
2579     }
2580
2581     PL_sublex_info.super_state = PL_lex_state;
2582     PL_sublex_info.sub_inwhat = (U16)op_type;
2583     PL_sublex_info.sub_op = PL_lex_op;
2584     PL_lex_state = LEX_INTERPPUSH;
2585
2586     PL_expect = XTERM;
2587     if (PL_lex_op) {
2588         pl_yylval.opval = PL_lex_op;
2589         PL_lex_op = NULL;
2590         return PMFUNC;
2591     }
2592     else
2593         return FUNC;
2594 }
2595
2596 /*
2597  * S_sublex_push
2598  * Create a new scope to save the lexing state.  The scope will be
2599  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2600  * to the uc, lc, etc. found before.
2601  * Sets PL_lex_state to LEX_INTERPCONCAT.
2602  */
2603
2604 STATIC I32
2605 S_sublex_push(pTHX)
2606 {
2607     dVAR;
2608     LEXSHARED *shared;
2609     ENTER;
2610
2611     PL_lex_state = PL_sublex_info.super_state;
2612     SAVEBOOL(PL_lex_dojoin);
2613     SAVEI32(PL_lex_brackets);
2614     SAVEI32(PL_lex_allbrackets);
2615     SAVEI32(PL_lex_formbrack);
2616     SAVEI8(PL_lex_fakeeof);
2617     SAVEI32(PL_lex_casemods);
2618     SAVEI32(PL_lex_starts);
2619     SAVEI8(PL_lex_state);
2620     SAVESPTR(PL_lex_repl);
2621     SAVEVPTR(PL_lex_inpat);
2622     SAVEI16(PL_lex_inwhat);
2623     SAVECOPLINE(PL_curcop);
2624     SAVEPPTR(PL_bufptr);
2625     SAVEPPTR(PL_bufend);
2626     SAVEPPTR(PL_oldbufptr);
2627     SAVEPPTR(PL_oldoldbufptr);
2628     SAVEPPTR(PL_last_lop);
2629     SAVEPPTR(PL_last_uni);
2630     SAVEPPTR(PL_linestart);
2631     SAVESPTR(PL_linestr);
2632     SAVEGENERICPV(PL_lex_brackstack);
2633     SAVEGENERICPV(PL_lex_casestack);
2634     SAVEGENERICPV(PL_parser->lex_shared);
2635     SAVEBOOL(PL_parser->lex_re_reparsing);
2636
2637     /* The here-doc parser needs to be able to peek into outer lexing
2638        scopes to find the body of the here-doc.  So we put PL_linestr and
2639        PL_bufptr into lex_shared, to ‘share’ those values.
2640      */
2641     PL_parser->lex_shared->ls_linestr = PL_linestr;
2642     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2643
2644     PL_linestr = PL_lex_stuff;
2645     PL_lex_repl = PL_sublex_info.repl;
2646     PL_lex_stuff = NULL;
2647     PL_sublex_info.repl = NULL;
2648
2649     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2650         = SvPVX(PL_linestr);
2651     PL_bufend += SvCUR(PL_linestr);
2652     PL_last_lop = PL_last_uni = NULL;
2653     SAVEFREESV(PL_linestr);
2654     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2655
2656     PL_lex_dojoin = FALSE;
2657     PL_lex_brackets = PL_lex_formbrack = 0;
2658     PL_lex_allbrackets = 0;
2659     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2660     Newx(PL_lex_brackstack, 120, char);
2661     Newx(PL_lex_casestack, 12, char);
2662     PL_lex_casemods = 0;
2663     *PL_lex_casestack = '\0';
2664     PL_lex_starts = 0;
2665     PL_lex_state = LEX_INTERPCONCAT;
2666     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2667
2668     Newxz(shared, 1, LEXSHARED);
2669     shared->ls_prev = PL_parser->lex_shared;
2670     PL_parser->lex_shared = shared;
2671
2672     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2673     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2674     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2675         PL_lex_inpat = PL_sublex_info.sub_op;
2676     else
2677         PL_lex_inpat = NULL;
2678
2679     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2680     PL_in_eval &= ~EVAL_RE_REPARSING;
2681
2682     return '(';
2683 }
2684
2685 /*
2686  * S_sublex_done
2687  * Restores lexer state after a S_sublex_push.
2688  */
2689
2690 STATIC I32
2691 S_sublex_done(pTHX)
2692 {
2693     dVAR;
2694     if (!PL_lex_starts++) {
2695         SV * const sv = newSVpvs("");
2696         if (SvUTF8(PL_linestr))
2697             SvUTF8_on(sv);
2698         PL_expect = XOPERATOR;
2699         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2700         return THING;
2701     }
2702
2703     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2704         PL_lex_state = LEX_INTERPCASEMOD;
2705         return yylex();
2706     }
2707
2708     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2709     assert(PL_lex_inwhat != OP_TRANSR);
2710     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2711         PL_linestr = PL_lex_repl;
2712         PL_lex_inpat = 0;
2713         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2714         PL_bufend += SvCUR(PL_linestr);
2715         PL_last_lop = PL_last_uni = NULL;
2716         PL_lex_dojoin = FALSE;
2717         PL_lex_brackets = 0;
2718         PL_lex_allbrackets = 0;
2719         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2720         PL_lex_casemods = 0;
2721         *PL_lex_casestack = '\0';
2722         PL_lex_starts = 0;
2723         if (SvEVALED(PL_lex_repl)) {
2724             PL_lex_state = LEX_INTERPNORMAL;
2725             PL_lex_starts++;
2726             /*  we don't clear PL_lex_repl here, so that we can check later
2727                 whether this is an evalled subst; that means we rely on the
2728                 logic to ensure sublex_done() is called again only via the
2729                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2730         }
2731         else {
2732             PL_lex_state = LEX_INTERPCONCAT;
2733             PL_lex_repl = NULL;
2734         }
2735         return ',';
2736     }
2737     else {
2738 #ifdef PERL_MAD
2739         if (PL_madskills) {
2740             if (PL_thiswhite) {
2741                 if (!PL_endwhite)
2742                     PL_endwhite = newSVpvs("");
2743                 sv_catsv(PL_endwhite, PL_thiswhite);
2744                 PL_thiswhite = 0;
2745             }
2746             if (PL_thistoken)
2747                 sv_setpvs(PL_thistoken,"");
2748             else
2749                 PL_realtokenstart = -1;
2750         }
2751 #endif
2752         LEAVE;
2753         PL_bufend = SvPVX(PL_linestr);
2754         PL_bufend += SvCUR(PL_linestr);
2755         PL_expect = XOPERATOR;
2756         PL_sublex_info.sub_inwhat = 0;
2757         return ')';
2758     }
2759 }
2760
2761 PERL_STATIC_INLINE SV*
2762 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2763 {
2764     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2765      * interior, hence to the "}".  Finds what the name resolves to, returning
2766      * an SV* containing it; NULL if no valid one found */
2767
2768     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2769
2770     HV * table;
2771     SV **cvp;
2772     SV *cv;
2773     SV *rv;
2774     HV *stash;
2775     const U8* first_bad_char_loc;
2776     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2777
2778     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2779
2780     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2781                                      e - backslash_ptr,
2782                                      &first_bad_char_loc))
2783     {
2784         /* If warnings are on, this will print a more detailed analysis of what
2785          * is wrong than the error message below */
2786         utf8n_to_uvuni(first_bad_char_loc,
2787                        e - ((char *) first_bad_char_loc),
2788                        NULL, 0);
2789
2790         /* We deliberately don't try to print the malformed character, which
2791          * might not print very well; it also may be just the first of many
2792          * malformations, so don't print what comes after it */
2793         yyerror(Perl_form(aTHX_
2794             "Malformed UTF-8 character immediately after '%.*s'",
2795             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2796         return NULL;
2797     }
2798
2799     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2800                         /* include the <}> */
2801                         e - backslash_ptr + 1);
2802     if (! SvPOK(res)) {
2803         SvREFCNT_dec_NN(res);
2804         return NULL;
2805     }
2806
2807     /* See if the charnames handler is the Perl core's, and if so, we can skip
2808      * the validation needed for a user-supplied one, as Perl's does its own
2809      * validation. */
2810     table = GvHV(PL_hintgv);             /* ^H */
2811     cvp = hv_fetchs(table, "charnames", FALSE);
2812     if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
2813         && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
2814     {
2815         const char * const name = HvNAME(stash);
2816         if strEQ(name, "_charnames") {
2817            return res;
2818        }
2819     }
2820
2821     /* Here, it isn't Perl's charname handler.  We can't rely on a
2822      * user-supplied handler to validate the input name.  For non-ut8 input,
2823      * look to see that the first character is legal.  Then loop through the
2824      * rest checking that each is a continuation */
2825
2826     /* This code needs to be sync'ed with a regex in _charnames.pm which does
2827      * the same thing */
2828
2829     if (! UTF) {
2830         if (! isALPHAU(*s)) {
2831             goto bad_charname;
2832         }
2833         s++;
2834         while (s < e) {
2835             if (! isCHARNAME_CONT(*s)) {
2836                 goto bad_charname;
2837             }
2838             if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2839                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2840                            "A sequence of multiple spaces in a charnames "
2841                            "alias definition is deprecated");
2842             }
2843             s++;
2844         }
2845         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2846             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2847                         "Trailing white-space in a charnames alias "
2848                         "definition is deprecated");
2849         }
2850     }
2851     else {
2852         /* Similarly for utf8.  For invariants can check directly; for other
2853          * Latin1, can calculate their code point and check; otherwise  use a
2854          * swash */
2855         if (UTF8_IS_INVARIANT(*s)) {
2856             if (! isALPHAU(*s)) {
2857                 goto bad_charname;
2858             }
2859             s++;
2860         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2861             if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
2862                 goto bad_charname;
2863             }
2864             s += 2;
2865         }
2866         else {
2867             if (! PL_utf8_charname_begin) {
2868                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2869                 PL_utf8_charname_begin = _core_swash_init("utf8",
2870                                                         "_Perl_Charname_Begin",
2871                                                         &PL_sv_undef,
2872                                                         1, 0, NULL, &flags);
2873             }
2874             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2875                 goto bad_charname;
2876             }
2877             s += UTF8SKIP(s);
2878         }
2879
2880         while (s < e) {
2881             if (UTF8_IS_INVARIANT(*s)) {
2882                 if (! isCHARNAME_CONT(*s)) {
2883                     goto bad_charname;
2884                 }
2885                 if (*s == ' ' && *(s-1) == ' '
2886                  && ckWARN_d(WARN_DEPRECATED)) {
2887                     Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2888                                "A sequence of multiple spaces in a charnam"
2889                                "es alias definition is deprecated");
2890                 }
2891                 s++;
2892             }
2893             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2894                 if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
2895                                                                     *(s+1)))))
2896                 {
2897                     goto bad_charname;
2898                 }
2899                 s += 2;
2900             }
2901             else {
2902                 if (! PL_utf8_charname_continue) {
2903                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2904                     PL_utf8_charname_continue = _core_swash_init("utf8",
2905                                                 "_Perl_Charname_Continue",
2906                                                 &PL_sv_undef,
2907                                                 1, 0, NULL, &flags);
2908                 }
2909                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2910                     goto bad_charname;
2911                 }
2912                 s += UTF8SKIP(s);
2913             }
2914         }
2915         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2916             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2917                        "Trailing white-space in a charnames alias "
2918                        "definition is deprecated");
2919         }
2920     }
2921
2922     if (SvUTF8(res)) { /* Don't accept malformed input */
2923         const U8* first_bad_char_loc;
2924         STRLEN len;
2925         const char* const str = SvPV_const(res, len);
2926         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2927             /* If warnings are on, this will print a more detailed analysis of
2928              * what is wrong than the error message below */
2929             utf8n_to_uvuni(first_bad_char_loc,
2930                            (char *) first_bad_char_loc - str,
2931                            NULL, 0);
2932
2933             /* We deliberately don't try to print the malformed character,
2934              * which might not print very well; it also may be just the first
2935              * of many malformations, so don't print what comes after it */
2936             yyerror_pv(
2937               Perl_form(aTHX_
2938                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2939                  (int) (e - backslash_ptr + 1), backslash_ptr,
2940                  (int) ((char *) first_bad_char_loc - str), str
2941               ),
2942               SVf_UTF8);
2943             return NULL;
2944         }
2945     }
2946
2947     return res;
2948
2949   bad_charname: {
2950         int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2951
2952         /* The final %.*s makes sure that should the trailing NUL be missing
2953          * that this print won't run off the end of the string */
2954         yyerror_pv(
2955           Perl_form(aTHX_
2956             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2957             (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2958             (int)(e - s + bad_char_size), s + bad_char_size
2959           ),
2960           UTF ? SVf_UTF8 : 0);
2961         return NULL;
2962     }
2963 }
2964
2965 /*
2966   scan_const
2967
2968   Extracts the next constant part of a pattern, double-quoted string,
2969   or transliteration.  This is terrifying code.
2970
2971   For example, in parsing the double-quoted string "ab\x63$d", it would
2972   stop at the '$' and return an OP_CONST containing 'abc'.
2973
2974   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2975   processing a pattern (PL_lex_inpat is true), a transliteration
2976   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2977
2978   Returns a pointer to the character scanned up to. If this is
2979   advanced from the start pointer supplied (i.e. if anything was
2980   successfully parsed), will leave an OP_CONST for the substring scanned
2981   in pl_yylval. Caller must intuit reason for not parsing further
2982   by looking at the next characters herself.
2983
2984   In patterns:
2985     expand:
2986       \N{FOO}  => \N{U+hex_for_character_FOO}
2987       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2988
2989     pass through:
2990         all other \-char, including \N and \N{ apart from \N{ABC}
2991
2992     stops on:
2993         @ and $ where it appears to be a var, but not for $ as tail anchor
2994         \l \L \u \U \Q \E
2995         (?{  or  (??{
2996
2997
2998   In transliterations:
2999     characters are VERY literal, except for - not at the start or end
3000     of the string, which indicates a range. If the range is in bytes,
3001     scan_const expands the range to the full set of intermediate
3002     characters. If the range is in utf8, the hyphen is replaced with
3003     a certain range mark which will be handled by pmtrans() in op.c.
3004
3005   In double-quoted strings:
3006     backslashes:
3007       double-quoted style: \r and \n
3008       constants: \x31, etc.
3009       deprecated backrefs: \1 (in substitution replacements)
3010       case and quoting: \U \Q \E
3011     stops on @ and $
3012
3013   scan_const does *not* construct ops to handle interpolated strings.
3014   It stops processing as soon as it finds an embedded $ or @ variable
3015   and leaves it to the caller to work out what's going on.
3016
3017   embedded arrays (whether in pattern or not) could be:
3018       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3019
3020   $ in double-quoted strings must be the symbol of an embedded scalar.
3021
3022   $ in pattern could be $foo or could be tail anchor.  Assumption:
3023   it's a tail anchor if $ is the last thing in the string, or if it's
3024   followed by one of "()| \r\n\t"
3025
3026   \1 (backreferences) are turned into $1 in substitutions
3027
3028   The structure of the code is
3029       while (there's a character to process) {
3030           handle transliteration ranges
3031           skip regexp comments /(?#comment)/ and codes /(?{code})/
3032           skip #-initiated comments in //x patterns
3033           check for embedded arrays
3034           check for embedded scalars
3035           if (backslash) {
3036               deprecate \1 in substitution replacements
3037               handle string-changing backslashes \l \U \Q \E, etc.
3038               switch (what was escaped) {
3039                   handle \- in a transliteration (becomes a literal -)
3040                   if a pattern and not \N{, go treat as regular character
3041                   handle \132 (octal characters)
3042                   handle \x15 and \x{1234} (hex characters)
3043                   handle \N{name} (named characters, also \N{3,5} in a pattern)
3044                   handle \cV (control characters)
3045                   handle printf-style backslashes (\f, \r, \n, etc)
3046               } (end switch)
3047               continue
3048           } (end if backslash)
3049           handle regular character
3050     } (end while character to read)
3051
3052 */
3053
3054 STATIC char *
3055 S_scan_const(pTHX_ char *start)
3056 {
3057     dVAR;
3058     char *send = PL_bufend;             /* end of the constant */
3059     SV *sv = newSV(send - start);               /* sv for the constant.  See
3060                                                    note below on sizing. */
3061     char *s = start;                    /* start of the constant */
3062     char *d = SvPVX(sv);                /* destination for copies */
3063     bool dorange = FALSE;                       /* are we in a translit range? */
3064     bool didrange = FALSE;                      /* did we just finish a range? */
3065     bool in_charclass = FALSE;                  /* within /[...]/ */
3066     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
3067     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
3068                                                    to be UTF8?  But, this can
3069                                                    show as true when the source
3070                                                    isn't utf8, as for example
3071                                                    when it is entirely composed
3072                                                    of hex constants */
3073     SV *res;                            /* result from charnames */
3074
3075     /* Note on sizing:  The scanned constant is placed into sv, which is
3076      * initialized by newSV() assuming one byte of output for every byte of
3077      * input.  This routine expects newSV() to allocate an extra byte for a
3078      * trailing NUL, which this routine will append if it gets to the end of
3079      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3080      * CAPITAL LETTER A}), or more output than input if the constant ends up
3081      * recoded to utf8, but each time a construct is found that might increase
3082      * the needed size, SvGROW() is called.  Its size parameter each time is
3083      * based on the best guess estimate at the time, namely the length used so
3084      * far, plus the length the current construct will occupy, plus room for
3085      * the trailing NUL, plus one byte for every input byte still unscanned */
3086
3087     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3088                        before set */
3089 #ifdef EBCDIC
3090     UV literal_endpoint = 0;
3091     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3092 #endif
3093
3094     PERL_ARGS_ASSERT_SCAN_CONST;
3095
3096     assert(PL_lex_inwhat != OP_TRANSR);
3097     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3098         /* If we are doing a trans and we know we want UTF8 set expectation */
3099         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3100         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3101     }
3102
3103     /* Protect sv from errors and fatal warnings. */
3104     ENTER_with_name("scan_const");
3105     SAVEFREESV(sv);
3106
3107     while (s < send || dorange) {
3108
3109         /* get transliterations out of the way (they're most literal) */
3110         if (PL_lex_inwhat == OP_TRANS) {
3111             /* expand a range A-Z to the full set of characters.  AIE! */
3112             if (dorange) {
3113                 I32 i;                          /* current expanded character */
3114                 I32 min;                        /* first character in range */
3115                 I32 max;                        /* last character in range */
3116
3117 #ifdef EBCDIC
3118                 UV uvmax = 0;
3119 #endif
3120
3121                 if (has_utf8
3122 #ifdef EBCDIC
3123                     && !native_range
3124 #endif
3125                 ) {
3126                     char * const c = (char*)utf8_hop((U8*)d, -1);
3127                     char *e = d++;
3128                     while (e-- > c)
3129                         *(e + 1) = *e;
3130                     *c = (char)UTF_TO_NATIVE(0xff);
3131                     /* mark the range as done, and continue */
3132                     dorange = FALSE;
3133                     didrange = TRUE;
3134                     continue;
3135                 }
3136
3137                 i = d - SvPVX_const(sv);                /* remember current offset */
3138 #ifdef EBCDIC
3139                 SvGROW(sv,
3140                        SvLEN(sv) + (has_utf8 ?
3141                                     (512 - UTF_CONTINUATION_MARK +
3142                                      UNISKIP(0x100))
3143                                     : 256));
3144                 /* How many two-byte within 0..255: 128 in UTF-8,
3145                  * 96 in UTF-8-mod. */
3146 #else
3147                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
3148 #endif
3149                 d = SvPVX(sv) + i;              /* refresh d after realloc */
3150 #ifdef EBCDIC
3151                 if (has_utf8) {
3152                     int j;
3153                     for (j = 0; j <= 1; j++) {
3154                         char * const c = (char*)utf8_hop((U8*)d, -1);
3155                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3156                         if (j)
3157                             min = (U8)uv;
3158                         else if (uv < 256)
3159                             max = (U8)uv;
3160                         else {
3161                             max = (U8)0xff; /* only to \xff */
3162                             uvmax = uv; /* \x{100} to uvmax */
3163                         }
3164                         d = c; /* eat endpoint chars */
3165                      }
3166                 }
3167                else {
3168 #endif
3169                    d -= 2;              /* eat the first char and the - */
3170                    min = (U8)*d;        /* first char in range */
3171                    max = (U8)d[1];      /* last char in range  */
3172 #ifdef EBCDIC
3173                }
3174 #endif
3175
3176                 if (min > max) {
3177                     Perl_croak(aTHX_
3178                                "Invalid range \"%c-%c\" in transliteration operator",
3179                                (char)min, (char)max);
3180                 }
3181
3182 #ifdef EBCDIC
3183                 if (literal_endpoint == 2 &&
3184                     ((isLOWER(min) && isLOWER(max)) ||
3185                      (isUPPER(min) && isUPPER(max)))) {
3186                     if (isLOWER(min)) {
3187                         for (i = min; i <= max; i++)
3188                             if (isLOWER(i))
3189                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
3190                     } else {
3191                         for (i = min; i <= max; i++)
3192                             if (isUPPER(i))
3193                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
3194                     }
3195                 }
3196                 else
3197 #endif
3198                     for (i = min; i <= max; i++)
3199 #ifdef EBCDIC
3200                         if (has_utf8) {
3201                             const U8 ch = (U8)NATIVE_TO_UTF(i);
3202                             if (UNI_IS_INVARIANT(ch))
3203                                 *d++ = (U8)i;
3204                             else {
3205                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
3206                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
3207                             }
3208                         }
3209                         else
3210 #endif
3211                             *d++ = (char)i;
3212
3213 #ifdef EBCDIC
3214                 if (uvmax) {
3215                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3216                     if (uvmax > 0x101)
3217                         *d++ = (char)UTF_TO_NATIVE(0xff);
3218                     if (uvmax > 0x100)
3219                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3220                 }
3221 #endif
3222
3223                 /* mark the range as done, and continue */
3224                 dorange = FALSE;
3225                 didrange = TRUE;
3226 #ifdef EBCDIC
3227                 literal_endpoint = 0;
3228 #endif
3229                 continue;
3230             }
3231
3232             /* range begins (ignore - as first or last char) */
3233             else if (*s == '-' && s+1 < send  && s != start) {
3234                 if (didrange) {
3235                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3236                 }
3237                 if (has_utf8
3238 #ifdef EBCDIC
3239                     && !native_range
3240 #endif
3241                     ) {
3242                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
3243                     s++;
3244                     continue;
3245                 }
3246                 dorange = TRUE;
3247                 s++;
3248             }
3249             else {
3250                 didrange = FALSE;
3251 #ifdef EBCDIC
3252                 literal_endpoint = 0;
3253                 native_range = TRUE;
3254 #endif
3255             }
3256         }
3257
3258         /* if we get here, we're not doing a transliteration */
3259
3260         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3261             char *s1 = s-1;
3262             int esc = 0;
3263             while (s1 >= start && *s1-- == '\\')
3264                 esc = !esc;
3265             if (!esc)
3266                 in_charclass = TRUE;
3267         }
3268
3269         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
3270             char *s1 = s-1;
3271             int esc = 0;
3272             while (s1 >= start && *s1-- == '\\')
3273                 esc = !esc;
3274             if (!esc)
3275                 in_charclass = FALSE;
3276         }
3277
3278         /* skip for regexp comments /(?#comment)/, except for the last
3279          * char, which will be done separately.
3280          * Stop on (?{..}) and friends */
3281
3282         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
3283             if (s[2] == '#') {
3284                 while (s+1 < send && *s != ')')
3285                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3286             }
3287             else if (!PL_lex_casemods && !in_charclass &&
3288                      (    s[2] == '{' /* This should match regcomp.c */
3289                       || (s[2] == '?' && s[3] == '{')))
3290             {
3291                 break;
3292             }
3293         }
3294
3295         /* likewise skip #-initiated comments in //x patterns */
3296         else if (*s == '#' && PL_lex_inpat &&
3297           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3298             while (s+1 < send && *s != '\n')
3299                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3300         }
3301
3302         /* no further processing of single-quoted regex */
3303         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3304             goto default_action;
3305
3306         /* check for embedded arrays
3307            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3308            */
3309         else if (*s == '@' && s[1]) {
3310             if (isWORDCHAR_lazy_if(s+1,UTF))
3311                 break;
3312             if (strchr(":'{$", s[1]))
3313                 break;
3314             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3315                 break; /* in regexp, neither @+ nor @- are interpolated */
3316         }
3317
3318         /* check for embedded scalars.  only stop if we're sure it's a
3319            variable.
3320         */
3321         else if (*s == '$') {
3322             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3323                 break;
3324             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3325                 if (s[1] == '\\') {
3326                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3327                                    "Possible unintended interpolation of $\\ in regex");
3328                 }
3329                 break;          /* in regexp, $ might be tail anchor */
3330             }
3331         }
3332
3333         /* End of else if chain - OP_TRANS rejoin rest */
3334
3335         /* backslashes */
3336         if (*s == '\\' && s+1 < send) {
3337             char* e;    /* Can be used for ending '}', etc. */
3338
3339             s++;
3340
3341             /* warn on \1 - \9 in substitution replacements, but note that \11
3342              * is an octal; and \19 is \1 followed by '9' */
3343             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3344                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3345             {
3346                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3347                 *--s = '$';
3348                 break;
3349             }
3350
3351             /* string-change backslash escapes */
3352             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3353                 --s;
3354                 break;
3355             }
3356             /* In a pattern, process \N, but skip any other backslash escapes.
3357              * This is because we don't want to translate an escape sequence
3358              * into a meta symbol and have the regex compiler use the meta
3359              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3360              * in spite of this, we do have to process \N here while the proper
3361              * charnames handler is in scope.  See bugs #56444 and #62056.
3362              * There is a complication because \N in a pattern may also stand
3363              * for 'match a non-nl', and not mean a charname, in which case its
3364              * processing should be deferred to the regex compiler.  To be a
3365              * charname it must be followed immediately by a '{', and not look
3366              * like \N followed by a curly quantifier, i.e., not something like
3367              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3368              * quantifier */
3369             else if (PL_lex_inpat
3370                     && (*s != 'N'
3371                         || s[1] != '{'
3372                         || regcurly(s + 1, FALSE)))
3373             {
3374                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3375                 goto default_action;
3376             }
3377
3378             switch (*s) {
3379
3380             /* quoted - in transliterations */
3381             case '-':
3382                 if (PL_lex_inwhat == OP_TRANS) {
3383                     *d++ = *s++;
3384                     continue;
3385                 }
3386                 /* FALL THROUGH */
3387             default:
3388                 {
3389                     if ((isALPHANUMERIC(*s)))
3390                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3391                                        "Unrecognized escape \\%c passed through",
3392                                        *s);
3393                     /* default action is to copy the quoted character */
3394                     goto default_action;
3395                 }
3396
3397             /* eg. \132 indicates the octal constant 0132 */
3398             case '0': case '1': case '2': case '3':
3399             case '4': case '5': case '6': case '7':
3400                 {
3401                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3402                     STRLEN len = 3;
3403                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3404                     s += len;
3405                     if (len < 3 && s < send && isDIGIT(*s)
3406                         && ckWARN(WARN_MISC))
3407                     {
3408                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3409                                     "%s", form_short_octal_warning(s, len));
3410                     }
3411                 }
3412                 goto NUM_ESCAPE_INSERT;
3413
3414             /* eg. \o{24} indicates the octal constant \024 */
3415             case 'o':
3416                 {
3417                     const char* error;
3418
3419                     bool valid = grok_bslash_o(&s, &uv, &error,
3420                                                TRUE, /* Output warning */
3421                                                FALSE, /* Not strict */
3422                                                TRUE, /* Output warnings for
3423                                                          non-portables */
3424                                                UTF);
3425                     if (! valid) {
3426                         yyerror(error);
3427                         continue;
3428                     }
3429                     goto NUM_ESCAPE_INSERT;
3430                 }
3431
3432             /* eg. \x24 indicates the hex constant 0x24 */
3433             case 'x':
3434                 {
3435                     const char* error;
3436
3437                     bool valid = grok_bslash_x(&s, &uv, &error,
3438                                                TRUE, /* Output warning */
3439                                                FALSE, /* Not strict */
3440                                                TRUE,  /* Output warnings for
3441                                                          non-portables */
3442                                                UTF);
3443                     if (! valid) {
3444                         yyerror(error);
3445                         continue;
3446                     }
3447                 }
3448
3449               NUM_ESCAPE_INSERT:
3450                 /* Insert oct or hex escaped character.  There will always be
3451                  * enough room in sv since such escapes will be longer than any
3452                  * UTF-8 sequence they can end up as, except if they force us
3453                  * to recode the rest of the string into utf8 */
3454
3455                 /* Here uv is the ordinal of the next character being added in
3456                  * unicode (converted from native). */
3457                 if (!UNI_IS_INVARIANT(uv)) {
3458                     if (!has_utf8 && uv > 255) {
3459                         /* Might need to recode whatever we have accumulated so
3460                          * far if it contains any chars variant in utf8 or
3461                          * utf-ebcdic. */
3462
3463                         SvCUR_set(sv, d - SvPVX_const(sv));
3464                         SvPOK_on(sv);
3465                         *d = '\0';
3466                         /* See Note on sizing above.  */
3467                         sv_utf8_upgrade_flags_grow(sv,
3468                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3469                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3470                         d = SvPVX(sv) + SvCUR(sv);
3471                         has_utf8 = TRUE;
3472                     }
3473
3474                     if (has_utf8) {
3475                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3476                         if (PL_lex_inwhat == OP_TRANS &&
3477                             PL_sublex_info.sub_op) {
3478                             PL_sublex_info.sub_op->op_private |=
3479                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3480                                              : OPpTRANS_TO_UTF);
3481                         }
3482 #ifdef EBCDIC
3483                         if (uv > 255 && !dorange)
3484                             native_range = FALSE;
3485 #endif
3486                     }
3487                     else {
3488                         *d++ = (char)uv;
3489                     }
3490                 }
3491                 else {
3492                     *d++ = (char) uv;
3493                 }
3494                 continue;
3495
3496             case 'N':
3497                 /* In a non-pattern \N must be a named character, like \N{LATIN
3498                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3499                  * mean to match a non-newline.  For non-patterns, named
3500                  * characters are converted to their string equivalents. In
3501                  * patterns, named characters are not converted to their
3502                  * ultimate forms for the same reasons that other escapes
3503                  * aren't.  Instead, they are converted to the \N{U+...} form
3504                  * to get the value from the charnames that is in effect right
3505                  * now, while preserving the fact that it was a named character
3506                  * so that the regex compiler knows this */
3507
3508                 /* This section of code doesn't generally use the
3509                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3510                  * a close examination of this macro and determined it is a
3511                  * no-op except on utfebcdic variant characters.  Every
3512                  * character generated by this that would normally need to be
3513                  * enclosed by this macro is invariant, so the macro is not
3514                  * needed, and would complicate use of copy().  XXX There are
3515                  * other parts of this file where the macro is used
3516                  * inconsistently, but are saved by it being a no-op */
3517
3518                 /* The structure of this section of code (besides checking for
3519                  * errors and upgrading to utf8) is:
3520                  *  Further disambiguate between the two meanings of \N, and if
3521                  *      not a charname, go process it elsewhere
3522                  *  If of form \N{U+...}, pass it through if a pattern;
3523                  *      otherwise convert to utf8
3524                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3525                  *  pattern; otherwise convert to utf8 */
3526
3527                 /* Here, s points to the 'N'; the test below is guaranteed to
3528                  * succeed if we are being called on a pattern as we already
3529                  * know from a test above that the next character is a '{'.
3530                  * On a non-pattern \N must mean 'named sequence, which
3531                  * requires braces */
3532                 s++;
3533                 if (*s != '{') {
3534                     yyerror("Missing braces on \\N{}");
3535                     continue;
3536                 }
3537                 s++;
3538
3539                 /* If there is no matching '}', it is an error. */
3540                 if (! (e = strchr(s, '}'))) {
3541                     if (! PL_lex_inpat) {
3542                         yyerror("Missing right brace on \\N{}");
3543                     } else {
3544                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3545                     }
3546                     continue;
3547                 }
3548
3549                 /* Here it looks like a named character */
3550
3551                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3552                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3553                                 | PERL_SCAN_DISALLOW_PREFIX;
3554                     STRLEN len;
3555
3556                     /* For \N{U+...}, the '...' is a unicode value even on
3557                      * EBCDIC machines */
3558                     s += 2;         /* Skip to next char after the 'U+' */
3559                     len = e - s;
3560                     uv = grok_hex(s, &len, &flags, NULL);
3561                     if (len == 0 || len != (STRLEN)(e - s)) {
3562                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3563                         s = e + 1;
3564                         continue;
3565                     }
3566
3567                     if (PL_lex_inpat) {
3568
3569                         /* On non-EBCDIC platforms, pass through to the regex
3570                          * compiler unchanged.  The reason we evaluated the
3571                          * number above is to make sure there wasn't a syntax
3572                          * error.  But on EBCDIC we convert to native so
3573                          * downstream code can continue to assume it's native
3574                          */
3575                         s -= 5;     /* Include the '\N{U+' */
3576 #ifdef EBCDIC
3577                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3578                                                                and the \0 */
3579                                     "\\N{U+%X}",
3580                                     (unsigned int) UNI_TO_NATIVE(uv));
3581 #else
3582                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3583                         d += e - s + 1;
3584 #endif
3585                     }
3586                     else {  /* Not a pattern: convert the hex to string */
3587
3588                          /* If destination is not in utf8, unconditionally
3589                           * recode it to be so.  This is because \N{} implies
3590                           * Unicode semantics, and scalars have to be in utf8
3591                           * to guarantee those semantics */
3592                         if (! has_utf8) {
3593                             SvCUR_set(sv, d - SvPVX_const(sv));
3594                             SvPOK_on(sv);
3595                             *d = '\0';
3596                             /* See Note on sizing above.  */
3597                             sv_utf8_upgrade_flags_grow(
3598                                         sv,
3599                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3600                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3601                             d = SvPVX(sv) + SvCUR(sv);
3602                             has_utf8 = TRUE;
3603                         }
3604
3605                         /* Add the string to the output */
3606                         if (UNI_IS_INVARIANT(uv)) {
3607                             *d++ = (char) uv;
3608                         }
3609                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3610                     }
3611                 }
3612                 else /* Here is \N{NAME} but not \N{U+...}. */
3613                      if ((res = get_and_check_backslash_N_name(s, e)))
3614                 {
3615                     STRLEN len;
3616                     const char *str = SvPV_const(res, len);
3617                     if (PL_lex_inpat) {
3618
3619                         if (! len) { /* The name resolved to an empty string */
3620                             Copy("\\N{}", d, 4, char);
3621                             d += 4;
3622                         }
3623                         else {
3624                             /* In order to not lose information for the regex
3625                             * compiler, pass the result in the specially made
3626                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3627                             * the code points in hex of each character
3628                             * returned by charnames */
3629
3630                             const char *str_end = str + len;
3631                             const STRLEN off = d - SvPVX_const(sv);
3632
3633                             if (! SvUTF8(res)) {
3634                                 /* For the non-UTF-8 case, we can determine the
3635                                  * exact length needed without having to parse
3636                                  * through the string.  Each character takes up
3637                                  * 2 hex digits plus either a trailing dot or
3638                                  * the "}" */
3639                                 d = off + SvGROW(sv, off
3640                                                     + 3 * len
3641                                                     + 6 /* For the "\N{U+", and
3642                                                            trailing NUL */
3643                                                     + (STRLEN)(send - e));
3644                                 Copy("\\N{U+", d, 5, char);
3645                                 d += 5;
3646                                 while (str < str_end) {
3647                                     char hex_string[4];
3648                                     my_snprintf(hex_string, sizeof(hex_string),
3649                                                 "%02X.", (U8) *str);
3650                                     Copy(hex_string, d, 3, char);
3651                                     d += 3;
3652                                     str++;
3653                                 }
3654                                 d--;    /* We will overwrite below the final
3655                                            dot with a right brace */
3656                             }
3657                             else {
3658                                 STRLEN char_length; /* cur char's byte length */
3659
3660                                 /* and the number of bytes after this is
3661                                  * translated into hex digits */
3662                                 STRLEN output_length;
3663
3664                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3665                                  * for max('U+', '.'); and 1 for NUL */
3666                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3667
3668                                 /* Get the first character of the result. */
3669                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3670                                                         len,
3671                                                         &char_length,
3672                                                         UTF8_ALLOW_ANYUV);
3673                                 /* Convert first code point to hex, including
3674                                  * the boiler plate before it.  For all these,
3675                                  * we convert to native format so that
3676                                  * downstream code can continue to assume the
3677                                  * input is native */
3678                                 output_length =
3679                                     my_snprintf(hex_string, sizeof(hex_string),
3680                                             "\\N{U+%X",
3681                                             (unsigned int) UNI_TO_NATIVE(uv));
3682
3683                                 /* Make sure there is enough space to hold it */
3684                                 d = off + SvGROW(sv, off
3685                                                     + output_length
3686                                                     + (STRLEN)(send - e)
3687                                                     + 2);       /* '}' + NUL */
3688                                 /* And output it */
3689                                 Copy(hex_string, d, output_length, char);
3690                                 d += output_length;
3691
3692                                 /* For each subsequent character, append dot and
3693                                 * its ordinal in hex */
3694                                 while ((str += char_length) < str_end) {
3695                                     const STRLEN off = d - SvPVX_const(sv);
3696                                     U32 uv = utf8n_to_uvuni((U8 *) str,
3697                                                             str_end - str,
3698                                                             &char_length,
3699                                                             UTF8_ALLOW_ANYUV);
3700                                     output_length =
3701                                         my_snprintf(hex_string,
3702                                             sizeof(hex_string),
3703                                             ".%X",
3704                                             (unsigned int) UNI_TO_NATIVE(uv));
3705
3706                                     d = off + SvGROW(sv, off
3707                                                         + output_length
3708                                                         + (STRLEN)(send - e)
3709                                                         + 2);   /* '}' +  NUL */
3710                                     Copy(hex_string, d, output_length, char);
3711                                     d += output_length;
3712                                 }
3713                             }
3714
3715                             *d++ = '}'; /* Done.  Add the trailing brace */
3716                         }
3717                     }
3718                     else { /* Here, not in a pattern.  Convert the name to a
3719                             * string. */
3720
3721                          /* If destination is not in utf8, unconditionally
3722                           * recode it to be so.  This is because \N{} implies
3723                           * Unicode semantics, and scalars have to be in utf8
3724                           * to guarantee those semantics */
3725                         if (! has_utf8) {
3726                             SvCUR_set(sv, d - SvPVX_const(sv));
3727                             SvPOK_on(sv);
3728                             *d = '\0';
3729                             /* See Note on sizing above.  */
3730                             sv_utf8_upgrade_flags_grow(sv,
3731                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3732                                                 len + (STRLEN)(send - s) + 1);
3733                             d = SvPVX(sv) + SvCUR(sv);
3734                             has_utf8 = TRUE;
3735                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3736
3737                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3738                              * set correctly here). */
3739                             const STRLEN off = d - SvPVX_const(sv);
3740                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3741                         }
3742                         Copy(str, d, len, char);
3743                         d += len;
3744                     }
3745
3746                     SvREFCNT_dec(res);
3747
3748                 } /* End \N{NAME} */
3749 #ifdef EBCDIC
3750                 if (!dorange)
3751                     native_range = FALSE; /* \N{} is defined to be Unicode */
3752 #endif
3753                 s = e + 1;  /* Point to just after the '}' */
3754                 continue;
3755
3756             /* \c is a control character */
3757             case 'c':
3758                 s++;
3759                 if (s < send) {
3760                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3761                 }
3762                 else {
3763                     yyerror("Missing control char name in \\c");
3764                 }
3765                 continue;
3766
3767             /* printf-style backslashes, formfeeds, newlines, etc */
3768             case 'b':
3769                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3770                 break;
3771             case 'n':
3772                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3773                 break;
3774             case 'r':
3775                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3776                 break;
3777             case 'f':
3778                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3779                 break;
3780             case 't':
3781                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3782                 break;
3783             case 'e':
3784                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3785                 break;
3786             case 'a':
3787                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3788                 break;
3789             } /* end switch */
3790
3791             s++;
3792             continue;
3793         } /* end if (backslash) */
3794 #ifdef EBCDIC
3795         else
3796             literal_endpoint++;
3797 #endif
3798
3799     default_action:
3800         /* If we started with encoded form, or already know we want it,
3801            then encode the next character */
3802         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3803             STRLEN len  = 1;
3804
3805
3806             /* One might think that it is wasted effort in the case of the
3807              * source being utf8 (this_utf8 == TRUE) to take the next character
3808              * in the source, convert it to an unsigned value, and then convert
3809              * it back again.  But the source has not been validated here.  The
3810              * routine that does the conversion checks for errors like
3811              * malformed utf8 */
3812
3813             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3814             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3815             if (!has_utf8) {
3816                 SvCUR_set(sv, d - SvPVX_const(sv));
3817                 SvPOK_on(sv);
3818                 *d = '\0';
3819                 /* See Note on sizing above.  */
3820                 sv_utf8_upgrade_flags_grow(sv,
3821                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3822                                         need + (STRLEN)(send - s) + 1);
3823                 d = SvPVX(sv) + SvCUR(sv);
3824                 has_utf8 = TRUE;
3825             } else if (need > len) {
3826                 /* encoded value larger than old, may need extra space (NOTE:
3827                  * SvCUR() is not set correctly here).   See Note on sizing
3828                  * above.  */
3829                 const STRLEN off = d - SvPVX_const(sv);
3830                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3831             }
3832             s += len;
3833
3834             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3835 #ifdef EBCDIC
3836             if (uv > 255 && !dorange)
3837                 native_range = FALSE;
3838 #endif
3839         }
3840         else {
3841             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3842         }
3843     } /* while loop to process each character */
3844
3845     /* terminate the string and set up the sv */
3846     *d = '\0';
3847     SvCUR_set(sv, d - SvPVX_const(sv));
3848     if (SvCUR(sv) >= SvLEN(sv))
3849         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3850                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3851
3852     SvPOK_on(sv);
3853     if (PL_encoding && !has_utf8) {
3854         sv_recode_to_utf8(sv, PL_encoding);
3855         if (SvUTF8(sv))
3856             has_utf8 = TRUE;
3857     }
3858     if (has_utf8) {
3859         SvUTF8_on(sv);
3860         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3861             PL_sublex_info.sub_op->op_private |=
3862                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3863         }
3864     }
3865
3866     /* shrink the sv if we allocated more than we used */
3867     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3868         SvPV_shrink_to_cur(sv);
3869     }
3870
3871     /* return the substring (via pl_yylval) only if we parsed anything */
3872     if (s > PL_bufptr) {
3873         SvREFCNT_inc_simple_void_NN(sv);
3874         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3875             && ! PL_parser->lex_re_reparsing)
3876         {
3877             const char *const key = PL_lex_inpat ? "qr" : "q";
3878             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3879             const char *type;
3880             STRLEN typelen;
3881
3882             if (PL_lex_inwhat == OP_TRANS) {
3883                 type = "tr";
3884                 typelen = 2;
3885             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3886                 type = "s";
3887                 typelen = 1;
3888             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3889                 type = "q";
3890                 typelen = 1;
3891             } else  {
3892                 type = "qq";
3893                 typelen = 2;
3894             }
3895
3896             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3897                                 type, typelen);
3898         }
3899         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3900     }
3901     LEAVE_with_name("scan_const");
3902     return s;
3903 }
3904
3905 /* S_intuit_more
3906  * Returns TRUE if there's more to the expression (e.g., a subscript),
3907  * FALSE otherwise.
3908  *
3909  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3910  *
3911  * ->[ and ->{ return TRUE
3912  * { and [ outside a pattern are always subscripts, so return TRUE
3913  * if we're outside a pattern and it's not { or [, then return FALSE
3914  * if we're in a pattern and the first char is a {
3915  *   {4,5} (any digits around the comma) returns FALSE
3916  * if we're in a pattern and the first char is a [
3917  *   [] returns FALSE
3918  *   [SOMETHING] has a funky algorithm to decide whether it's a
3919  *      character class or not.  It has to deal with things like
3920  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3921  * anything else returns TRUE
3922  */
3923
3924 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3925
3926 STATIC int
3927 S_intuit_more(pTHX_ char *s)
3928 {
3929     dVAR;
3930
3931     PERL_ARGS_ASSERT_INTUIT_MORE;
3932
3933     if (PL_lex_brackets)
3934         return TRUE;
3935     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3936         return TRUE;
3937     if (*s != '{' && *s != '[')
3938         return FALSE;
3939     if (!PL_lex_inpat)
3940         return TRUE;
3941
3942     /* In a pattern, so maybe we have {n,m}. */
3943     if (*s == '{') {
3944         if (regcurly(s, FALSE)) {
3945             return FALSE;
3946         }
3947         return TRUE;
3948     }
3949
3950     /* On the other hand, maybe we have a character class */
3951
3952     s++;
3953     if (*s == ']' || *s == '^')
3954         return FALSE;
3955     else {
3956         /* this is terrifying, and it works */
3957         int weight;
3958         char seen[256];
3959         const char * const send = strchr(s,']');
3960         unsigned char un_char, last_un_char;
3961         char tmpbuf[sizeof PL_tokenbuf * 4];
3962
3963         if (!send)              /* has to be an expression */
3964             return TRUE;
3965         weight = 2;             /* let's weigh the evidence */
3966
3967         if (*s == '$')
3968             weight -= 3;
3969         else if (isDIGIT(*s)) {
3970             if (s[1] != ']') {
3971                 if (isDIGIT(s[1]) && s[2] == ']')
3972                     weight -= 10;
3973             }
3974             else
3975                 weight -= 100;
3976         }
3977         Zero(seen,256,char);
3978         un_char = 255;
3979         for (; s < send; s++) {
3980             last_un_char = un_char;
3981             un_char = (unsigned char)*s;
3982             switch (*s) {
3983             case '@':
3984             case '&':
3985             case '$':
3986                 weight -= seen[un_char] * 10;
3987                 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3988                     int len;
3989                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3990                     len = (int)strlen(tmpbuf);
3991                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3992                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3993                         weight -= 100;
3994                     else
3995                         weight -= 10;
3996                 }
3997                 else if (*s == '$' && s[1] &&
3998                   strchr("[#!%*<>()-=",s[1])) {
3999                     if (/*{*/ strchr("])} =",s[2]))
4000                         weight -= 10;
4001                     else
4002                         weight -= 1;
4003                 }
4004                 break;
4005             case '\\':
4006                 un_char = 254;
4007                 if (s[1]) {
4008                     if (strchr("wds]",s[1]))
4009                         weight += 100;
4010                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4011                         weight += 1;
4012                     else if (strchr("rnftbxcav",s[1]))
4013                         weight += 40;
4014                     else if (isDIGIT(s[1])) {
4015                         weight += 40;
4016                         while (s[1] && isDIGIT(s[1]))
4017                             s++;
4018                     }
4019                 }
4020                 else
4021                     weight += 100;
4022                 break;
4023             case '-':
4024                 if (s[1] == '\\')
4025                     weight += 50;
4026                 if (strchr("aA01! ",last_un_char))
4027                     weight += 30;
4028                 if (strchr("zZ79~",s[1]))
4029                     weight += 30;
4030                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4031                     weight -= 5;        /* cope with negative subscript */
4032                 break;
4033             default:
4034                 if (!isWORDCHAR(last_un_char)
4035                     && !(last_un_char == '$' || last_un_char == '@'
4036                          || last_un_char == '&')
4037                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4038                     char *d = tmpbuf;
4039                     while (isALPHA(*s))
4040                         *d++ = *s++;
4041                     *d = '\0';
4042                     if (keyword(tmpbuf, d - tmpbuf, 0))
4043                         weight -= 150;
4044                 }
4045                 if (un_char == last_un_char + 1)
4046                     weight += 5;
4047                 weight -= seen[un_char];
4048                 break;
4049             }
4050             seen[un_char]++;
4051         }
4052         if (weight >= 0)        /* probably a character class */
4053             return FALSE;
4054     }
4055
4056     return TRUE;
4057 }
4058
4059 /*
4060  * S_intuit_method
4061  *
4062  * Does all the checking to disambiguate
4063  *   foo bar
4064  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4065  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4066  *
4067  * First argument is the stuff after the first token, e.g. "bar".
4068  *
4069  * Not a method if foo is a filehandle.
4070  * Not a method if foo is a subroutine prototyped to take a filehandle.
4071  * Not a method if it's really "Foo $bar"
4072  * Method if it's "foo $bar"
4073  * Not a method if it's really "print foo $bar"
4074  * Method if it's really "foo package::" (interpreted as package->foo)
4075  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4076  * Not a method if bar is a filehandle or package, but is quoted with
4077  *   =>
4078  */
4079
4080 STATIC int
4081 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4082 {
4083     dVAR;
4084     char *s = start + (*start == '$');
4085     char tmpbuf[sizeof PL_tokenbuf];
4086     STRLEN len;
4087     GV* indirgv;
4088 #ifdef PERL_MAD
4089     int soff;
4090 #endif
4091
4092     PERL_ARGS_ASSERT_INTUIT_METHOD;
4093
4094     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4095             return 0;
4096     if (cv && SvPOK(cv)) {
4097         const char *proto = CvPROTO(cv);
4098         if (proto) {
4099             while (*proto && (isSPACE(*proto) || *proto == ';'))
4100                 proto++;
4101             if (*proto == '*')
4102                 return 0;
4103         }
4104     }
4105
4106     if (*start == '$') {
4107         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4108                 isUPPER(*PL_tokenbuf))
4109             return 0;
4110 #ifdef PERL_MAD
4111         len = start - SvPVX(PL_linestr);
4112 #endif
4113         s = PEEKSPACE(s);
4114 #ifdef PERL_MAD
4115         start = SvPVX(PL_linestr) + len;
4116 #endif
4117         PL_bufptr = start;
4118         PL_expect = XREF;
4119         return *s == '(' ? FUNCMETH : METHOD;
4120     }
4121
4122     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4123     /* start is the beginning of the possible filehandle/object,
4124      * and s is the end of it
4125      * tmpbuf is a copy of it (but with single quotes as double colons)
4126      */
4127
4128     if (!keyword(tmpbuf, len, 0)) {
4129         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4130             len -= 2;
4131             tmpbuf[len] = '\0';
4132 #ifdef PERL_MAD
4133             soff = s - SvPVX(PL_linestr);
4134 #endif
4135             goto bare_package;
4136         }
4137         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4138         if (indirgv && GvCVu(indirgv))
4139             return 0;
4140         /* filehandle or package name makes it a method */
4141         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4142 #ifdef PERL_MAD
4143             soff = s - SvPVX(PL_linestr);
4144 #endif
4145             s = PEEKSPACE(s);
4146             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4147                 return 0;       /* no assumptions -- "=>" quotes bareword */
4148       bare_package:
4149             start_force(PL_curforce);
4150             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4151                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4152             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4153             if (PL_madskills)
4154                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4155                                                             ( UTF ? SVf_UTF8 : 0 )));
4156             PL_expect = XTERM;
4157             force_next(WORD);
4158             PL_bufptr = s;
4159 #ifdef PERL_MAD
4160             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4161 #endif
4162             return *s == '(' ? FUNCMETH : METHOD;
4163         }
4164     }
4165     return 0;
4166 }
4167
4168 /* Encoded script support. filter_add() effectively inserts a
4169  * 'pre-processing' function into the current source input stream.
4170  * Note that the filter function only applies to the current source file
4171  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4172  *
4173  * The datasv parameter (which may be NULL) can be used to pass
4174  * private data to this instance of the filter. The filter function
4175  * can recover the SV using the FILTER_DATA macro and use it to
4176  * store private buffers and state information.
4177  *
4178  * The supplied datasv parameter is upgraded to a PVIO type
4179  * and the IoDIRP/IoANY field is used to store the function pointer,
4180  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4181  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4182  * private use must be set using malloc'd pointers.
4183  */
4184
4185 SV *
4186 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4187 {
4188     dVAR;
4189     if (!funcp)
4190         return NULL;
4191
4192     if (!PL_parser)
4193         return NULL;
4194
4195     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4196         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4197
4198     if (!PL_rsfp_filters)
4199         PL_rsfp_filters = newAV();
4200     if (!datasv)
4201         datasv = newSV(0);
4202     SvUPGRADE(datasv, SVt_PVIO);
4203     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4204     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4205     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4206                           FPTR2DPTR(void *, IoANY(datasv)),
4207                           SvPV_nolen(datasv)));
4208     av_unshift(PL_rsfp_filters, 1);
4209     av_store(PL_rsfp_filters, 0, datasv) ;
4210     if (
4211         !PL_parser->filtered
4212      && PL_parser->lex_flags & LEX_EVALBYTES
4213      && PL_bufptr < PL_bufend
4214     ) {
4215         const char *s = PL_bufptr;
4216         while (s < PL_bufend) {
4217             if (*s == '\n') {
4218                 SV *linestr = PL_parser->linestr;
4219                 char *buf = SvPVX(linestr);
4220                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4221                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4222                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4223                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4224                 STRLEN const last_uni_pos =
4225                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4226                 STRLEN const last_lop_pos =
4227                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4228                 av_push(PL_rsfp_filters, linestr);
4229                 PL_parser->linestr =
4230                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4231                 buf = SvPVX(PL_parser->linestr);
4232                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4233                 PL_parser->bufptr = buf + bufptr_pos;
4234                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4235                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4236                 PL_parser->linestart = buf + linestart_pos;
4237                 if (PL_parser->last_uni)
4238                     PL_parser->last_uni = buf + last_uni_pos;
4239                 if (PL_parser->last_lop)
4240                     PL_parser->last_lop = buf + last_lop_pos;
4241                 SvLEN(linestr) = SvCUR(linestr);
4242                 SvCUR(linestr) = s-SvPVX(linestr);
4243                 PL_parser->filtered = 1;
4244                 break;
4245             }
4246             s++;
4247         }
4248     }
4249     return(datasv);
4250 }
4251
4252
4253 /* Delete most recently added instance of this filter function. */
4254 void
4255 Perl_filter_del(pTHX_ filter_t funcp)
4256 {
4257     dVAR;
4258     SV *datasv;
4259
4260     PERL_ARGS_ASSERT_FILTER_DEL;
4261
4262 #ifdef DEBUGGING
4263     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4264                           FPTR2DPTR(void*, funcp)));
4265 #endif
4266     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4267         return;
4268     /* if filter is on top of stack (usual case) just pop it off */
4269     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4270     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4271         sv_free(av_pop(PL_rsfp_filters));
4272
4273         return;
4274     }
4275     /* we need to search for the correct entry and clear it     */
4276     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4277 }
4278
4279
4280 /* Invoke the idxth filter function for the current rsfp.        */
4281 /* maxlen 0 = read one text line */
4282 I32
4283 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4284 {
4285     dVAR;
4286     filter_t funcp;
4287     SV *datasv = NULL;
4288     /* This API is bad. It should have been using unsigned int for maxlen.
4289        Not sure if we want to change the API, but if not we should sanity
4290        check the value here.  */
4291     unsigned int correct_length
4292         = maxlen < 0 ?
4293 #ifdef PERL_MICRO
4294         0x7FFFFFFF
4295 #else
4296         INT_MAX
4297 #endif
4298         : maxlen;
4299
4300     PERL_ARGS_ASSERT_FILTER_READ;
4301
4302     if (!PL_parser || !PL_rsfp_filters)
4303         return -1;
4304     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4305         /* Provide a default input filter to make life easy.    */
4306         /* Note that we append to the line. This is handy.      */
4307         DEBUG_P(PerlIO_printf(Perl_debug_log,
4308                               "filter_read %d: from rsfp\n", idx));
4309         if (correct_length) {
4310             /* Want a block */
4311             int len ;
4312             const int old_len = SvCUR(buf_sv);
4313
4314             /* ensure buf_sv is large enough */
4315             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4316             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4317                                    correct_length)) <= 0) {
4318                 if (PerlIO_error(PL_rsfp))
4319                     return -1;          /* error */
4320                 else
4321                     return 0 ;          /* end of file */
4322             }
4323             SvCUR_set(buf_sv, old_len + len) ;
4324             SvPVX(buf_sv)[old_len + len] = '\0';
4325         } else {
4326             /* Want a line */
4327             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4328                 if (PerlIO_error(PL_rsfp))
4329                     return -1;          /* error */
4330                 else
4331                     return 0 ;          /* end of file */
4332             }
4333         }
4334         return SvCUR(buf_sv);
4335     }
4336     /* Skip this filter slot if filter has been deleted */
4337     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4338         DEBUG_P(PerlIO_printf(Perl_debug_log,
4339                               "filter_read %d: skipped (filter deleted)\n",
4340                               idx));
4341         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4342     }
4343     if (SvTYPE(datasv) != SVt_PVIO) {
4344         if (correct_length) {
4345             /* Want a block */
4346             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4347             if (!remainder) return 0; /* eof */
4348             if (correct_length > remainder) correct_length = remainder;
4349             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4350             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4351         } else {
4352             /* Want a line */
4353             const char *s = SvEND(datasv);
4354             const char *send = SvPVX(datasv) + SvLEN(datasv);
4355             while (s < send) {
4356                 if (*s == '\n') {
4357                     s++;
4358                     break;
4359                 }
4360                 s++;
4361             }
4362             if (s == send) return 0; /* eof */
4363             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4364             SvCUR_set(datasv, s-SvPVX(datasv));
4365         }
4366         return SvCUR(buf_sv);
4367     }
4368     /* Get function pointer hidden within datasv        */
4369     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4370     DEBUG_P(PerlIO_printf(Perl_debug_log,
4371                           "filter_read %d: via function %p (%s)\n",
4372                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4373     /* Call function. The function is expected to       */
4374     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4375     /* Return: <0:error, =0:eof, >0:not eof             */
4376     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4377 }
4378
4379 STATIC char *
4380 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4381 {
4382     dVAR;
4383
4384     PERL_ARGS_ASSERT_FILTER_GETS;
4385
4386 #ifdef PERL_CR_FILTER
4387     if (!PL_rsfp_filters) {
4388         filter_add(S_cr_textfilter,NULL);
4389     }
4390 #endif
4391     if (PL_rsfp_filters) {
4392         if (!append)
4393             SvCUR_set(sv, 0);   /* start with empty line        */
4394         if (FILTER_READ(0, sv, 0) > 0)
4395             return ( SvPVX(sv) ) ;
4396         else
4397             return NULL ;
4398     }
4399     else
4400         return (sv_gets(sv, PL_rsfp, append));
4401 }
4402
4403 STATIC HV *
4404 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4405 {
4406     dVAR;
4407     GV *gv;
4408
4409     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4410
4411     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4412         return PL_curstash;
4413
4414     if (len > 2 &&
4415         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4416         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4417     {
4418         return GvHV(gv);                        /* Foo:: */
4419     }
4420
4421     /* use constant CLASS => 'MyClass' */
4422     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4423     if (gv && GvCV(gv)) {
4424         SV * const sv = cv_const_sv(GvCV(gv));
4425         if (sv)
4426             pkgname = SvPV_const(sv, len);
4427     }
4428
4429     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4430 }
4431
4432 /*
4433  * S_readpipe_override
4434  * Check whether readpipe() is overridden, and generates the appropriate
4435  * optree, provided sublex_start() is called afterwards.
4436  */
4437 STATIC void
4438 S_readpipe_override(pTHX)
4439 {
4440     GV **gvp;
4441     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4442     pl_yylval.ival = OP_BACKTICK;
4443     if ((gv_readpipe
4444                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4445             ||
4446             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4447              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4448              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4449     {
4450         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4451             op_append_elem(OP_LIST,
4452                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4453                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4454     }
4455 }
4456
4457 #ifdef PERL_MAD
4458  /*
4459  * Perl_madlex
4460  * The intent of this yylex wrapper is to minimize the changes to the
4461  * tokener when we aren't interested in collecting madprops.  It remains
4462  * to be seen how successful this strategy will be...
4463  */
4464
4465 int
4466 Perl_madlex(pTHX)
4467 {
4468     int optype;
4469     char *s = PL_bufptr;
4470
4471     /* make sure PL_thiswhite is initialized */
4472     PL_thiswhite = 0;
4473     PL_thismad = 0;
4474
4475     /* previous token ate up our whitespace? */
4476     if (!PL_lasttoke && PL_nextwhite) {
4477         PL_thiswhite = PL_nextwhite;
4478         PL_nextwhite = 0;
4479     }
4480
4481     /* isolate the token, and figure out where it is without whitespace */
4482     PL_realtokenstart = -1;
4483     PL_thistoken = 0;
4484     optype = yylex();
4485     s = PL_bufptr;
4486     assert(PL_curforce < 0);
4487
4488     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4489         if (!PL_thistoken) {
4490             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4491                 PL_thistoken = newSVpvs("");
4492             else {
4493                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4494                 PL_thistoken = newSVpvn(tstart, s - tstart);
4495             }
4496         }
4497         if (PL_thismad) /* install head */
4498             CURMAD('X', PL_thistoken);
4499     }
4500
4501     /* last whitespace of a sublex? */
4502     if (optype == ')' && PL_endwhite) {
4503         CURMAD('X', PL_endwhite);
4504     }
4505
4506     if (!PL_thismad) {
4507
4508         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4509         if (!PL_thiswhite && !PL_endwhite && !optype) {
4510             sv_free(PL_thistoken);
4511             PL_thistoken = 0;
4512             return 0;
4513         }
4514
4515         /* put off final whitespace till peg */
4516         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4517             PL_nextwhite = PL_thiswhite;
4518             PL_thiswhite = 0;
4519         }
4520         else if (PL_thisopen) {
4521             CURMAD('q', PL_thisopen);
4522             if (PL_thistoken)
4523                 sv_free(PL_thistoken);
4524             PL_thistoken = 0;
4525         }
4526         else {
4527             /* Store actual token text as madprop X */
4528             CURMAD('X', PL_thistoken);
4529         }
4530
4531         if (PL_thiswhite) {
4532             /* add preceding whitespace as madprop _ */
4533             CURMAD('_', PL_thiswhite);
4534         }
4535
4536         if (PL_thisstuff) {
4537             /* add quoted material as madprop = */
4538             CURMAD('=', PL_thisstuff);
4539         }
4540
4541         if (PL_thisclose) {
4542             /* add terminating quote as madprop Q */
4543             CURMAD('Q', PL_thisclose);
4544         }
4545     }
4546
4547     /* special processing based on optype */
4548
4549     switch (optype) {
4550
4551     /* opval doesn't need a TOKEN since it can already store mp */
4552     case WORD:
4553     case METHOD:
4554     case FUNCMETH:
4555     case THING:
4556     case PMFUNC:
4557     case PRIVATEREF:
4558     case FUNC0SUB:
4559     case UNIOPSUB:
4560     case LSTOPSUB:
4561         if (pl_yylval.opval)
4562             append_madprops(PL_thismad, pl_yylval.opval, 0);
4563         PL_thismad = 0;
4564         return optype;
4565
4566     /* fake EOF */
4567     case 0:
4568         optype = PEG;
4569         if (PL_endwhite) {
4570             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4571             PL_endwhite = 0;
4572         }
4573         break;
4574
4575     /* pval */
4576     case LABEL:
4577         break;
4578
4579     case ']':
4580     case '}':
4581         if (PL_faketokens)
4582             break;
4583         /* remember any fake bracket that lexer is about to discard */
4584         if (PL_lex_brackets == 1 &&
4585             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4586         {
4587             s = PL_bufptr;
4588             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4589                 s++;
4590             if (*s == '}') {
4591                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4592                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4593                 PL_thiswhite = 0;
4594                 PL_bufptr = s - 1;
4595                 break;  /* don't bother looking for trailing comment */
4596             }
4597             else
4598                 s = PL_bufptr;
4599         }
4600         if (optype == ']')
4601             break;
4602         /* FALLTHROUGH */
4603
4604     /* attach a trailing comment to its statement instead of next token */
4605     case ';':
4606         if (PL_faketokens)
4607             break;
4608         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4609             s = PL_bufptr;
4610             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4611                 s++;
4612             if (*s == '\n' || *s == '#') {
4613                 while (s < PL_bufend && *s != '\n')
4614                     s++;
4615                 if (s < PL_bufend)
4616                     s++;
4617                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4618                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4619                 PL_thiswhite = 0;
4620                 PL_bufptr = s;
4621             }
4622         }
4623         break;
4624
4625     /* ival */
4626     default:
4627         break;
4628
4629     }
4630
4631     /* Create new token struct.  Note: opvals return early above. */
4632     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4633     PL_thismad = 0;
4634     return optype;
4635 }
4636 #endif
4637
4638 STATIC char *
4639 S_tokenize_use(pTHX_ int is_use, char *s) {
4640     dVAR;
4641
4642     PERL_ARGS_ASSERT_TOKENIZE_USE;
4643
4644     if (PL_expect != XSTATE)
4645         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4646                     is_use ? "use" : "no"));
4647     PL_expect = XTERM;
4648     s = SKIPSPACE1(s);
4649     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4650         s = force_version(s, TRUE);
4651         if (*s == ';' || *s == '}'
4652                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4653             start_force(PL_curforce);
4654             NEXTVAL_NEXTTOKE.opval = NULL;
4655             force_next(WORD);
4656         }
4657         else if (*s == 'v') {
4658             s = force_word(s,WORD,FALSE,TRUE);
4659             s = force_version(s, FALSE);
4660         }
4661     }
4662     else {
4663         s = force_word(s,WORD,FALSE,TRUE);
4664         s = force_version(s, FALSE);
4665     }
4666     pl_yylval.ival = is_use;
4667     return s;
4668 }
4669 #ifdef DEBUGGING
4670     static const char* const exp_name[] =
4671         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4672           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4673         };
4674 #endif
4675
4676 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4677 STATIC bool
4678 S_word_takes_any_delimeter(char *p, STRLEN len)
4679 {
4680     return (len == 1 && strchr("msyq", p[0])) ||
4681            (len == 2 && (
4682             (p[0] == 't' && p[1] == 'r') ||
4683             (p[0] == 'q' && strchr("qwxr", p[1]))));
4684 }
4685
4686 /*
4687   yylex
4688
4689   Works out what to call the token just pulled out of the input
4690   stream.  The yacc parser takes care of taking the ops we return and
4691   stitching them into a tree.
4692
4693   Returns:
4694     The type of the next token
4695
4696   Structure:
4697       Switch based on the current state:
4698           - if we already built the token before, use it
4699           - if we have a case modifier in a string, deal with that
4700           - handle other cases of interpolation inside a string
4701           - scan the next line if we are inside a format
4702       In the normal state switch on the next character:
4703           - default:
4704             if alphabetic, go to key lookup
4705             unrecoginized character - croak
4706           - 0/4/26: handle end-of-line or EOF
4707           - cases for whitespace
4708           - \n and #: handle comments and line numbers
4709           - various operators, brackets and sigils
4710           - numbers
4711           - quotes
4712           - 'v': vstrings (or go to key lookup)
4713           - 'x' repetition operator (or go to key lookup)
4714           - other ASCII alphanumerics (key lookup begins here):
4715               word before => ?
4716               keyword plugin
4717               scan built-in keyword (but do nothing with it yet)
4718               check for statement label
4719               check for lexical subs
4720                   goto just_a_word if there is one
4721               see whether built-in keyword is overridden
4722               switch on keyword number:
4723                   - default: just_a_word:
4724                       not a built-in keyword; handle bareword lookup
4725                       disambiguate between method and sub call
4726                       fall back to bareword
4727                   - cases for built-in keywords
4728 */
4729
4730
4731 #ifdef __SC__
4732 #pragma segment Perl_yylex
4733 #endif
4734 int
4735 Perl_yylex(pTHX)
4736 {
4737     dVAR;
4738     char *s = PL_bufptr;
4739     char *d;
4740     STRLEN len;
4741     bool bof = FALSE;
4742     const bool saw_infix_sigil = PL_parser->saw_infix_sigil;
4743     U8 formbrack = 0;
4744     U32 fake_eof = 0;
4745
4746     /* orig_keyword, gvp, and gv are initialized here because
4747      * jump to the label just_a_word_zero can bypass their
4748      * initialization later. */
4749     I32 orig_keyword = 0;
4750     GV *gv = NULL;
4751     GV **gvp = NULL;
4752
4753     DEBUG_T( {
4754         SV* tmp = newSVpvs("");
4755         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4756             (IV)CopLINE(PL_curcop),
4757             lex_state_names[PL_lex_state],
4758             exp_name[PL_expect],
4759             pv_display(tmp, s, strlen(s), 0, 60));
4760         SvREFCNT_dec(tmp);
4761     } );
4762
4763     switch (PL_lex_state) {
4764 #ifdef COMMENTARY
4765     case LEX_NORMAL:            /* Some compilers will produce faster */
4766     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4767         break;
4768 #endif
4769
4770     /* when we've already built the next token, just pull it out of the queue */
4771     case LEX_KNOWNEXT:
4772 #ifdef PERL_MAD
4773         PL_lasttoke--;
4774         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4775         if (PL_madskills) {
4776             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4777             PL_nexttoke[PL_lasttoke].next_mad = 0;
4778             if (PL_thismad && PL_thismad->mad_key == '_') {
4779                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4780                 PL_thismad->mad_val = 0;
4781                 mad_free(PL_thismad);
4782                 PL_thismad = 0;
4783             }
4784         }
4785         if (!PL_lasttoke) {
4786             PL_lex_state = PL_lex_defer;
4787             PL_expect = PL_lex_expect;
4788             PL_lex_defer = LEX_NORMAL;
4789             if (!PL_nexttoke[PL_lasttoke].next_type)
4790                 return yylex();
4791         }
4792 #else
4793         PL_nexttoke--;
4794         pl_yylval = PL_nextval[PL_nexttoke];
4795         if (!PL_nexttoke) {
4796             PL_lex_state = PL_lex_defer;
4797             PL_expect = PL_lex_expect;
4798             PL_lex_defer = LEX_NORMAL;
4799         }
4800 #endif
4801         {
4802             I32 next_type;
4803 #ifdef PERL_MAD
4804             next_type = PL_nexttoke[PL_lasttoke].next_type;
4805 #else
4806             next_type = PL_nexttype[PL_nexttoke];
4807 #endif
4808             if (next_type & (7<<24)) {
4809                 if (next_type & (1<<24)) {
4810                     if (PL_lex_brackets > 100)
4811                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4812                     PL_lex_brackstack[PL_lex_brackets++] =
4813                         (char) ((next_type >> 16) & 0xff);
4814                 }
4815                 if (next_type & (2<<24))
4816                     PL_lex_allbrackets++;
4817                 if (next_type & (4<<24))
4818                     PL_lex_allbrackets--;
4819                 next_type &= 0xffff;
4820             }
4821             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4822         }
4823
4824     /* interpolated case modifiers like \L \U, including \Q and \E.
4825        when we get here, PL_bufptr is at the \
4826     */
4827     case LEX_INTERPCASEMOD:
4828 #ifdef DEBUGGING
4829         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4830             Perl_croak(aTHX_
4831                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4832                        PL_bufptr, PL_bufend, *PL_bufptr);
4833 #endif
4834         /* handle \E or end of string */
4835         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4836             /* if at a \E */
4837             if (PL_lex_casemods) {
4838                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4839                 PL_lex_casestack[PL_lex_casemods] = '\0';
4840
4841                 if (PL_bufptr != PL_bufend
4842                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4843                         || oldmod == 'F')) {
4844                     PL_bufptr += 2;
4845                     PL_lex_state = LEX_INTERPCONCAT;
4846 #ifdef PERL_MAD
4847                     if (PL_madskills)
4848                         PL_thistoken = newSVpvs("\\E");
4849 #endif
4850                 }
4851                 PL_lex_allbrackets--;
4852                 return REPORT(')');
4853             }
4854             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4855                /* Got an unpaired \E */
4856                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4857                         "Useless use of \\E");
4858             }
4859 #ifdef PERL_MAD
4860             while (PL_bufptr != PL_bufend &&
4861               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4862                 if (PL_madskills) {
4863                   if (!PL_thiswhite)
4864                     PL_thiswhite = newSVpvs("");
4865                   sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4866                 }
4867                 PL_bufptr += 2;
4868             }
4869 #else
4870             if (PL_bufptr != PL_bufend)
4871                 PL_bufptr += 2;
4872 #endif
4873             PL_lex_state = LEX_INTERPCONCAT;
4874             return yylex();
4875         }
4876         else {
4877             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4878               "### Saw case modifier\n"); });
4879             s = PL_bufptr + 1;
4880             if (s[1] == '\\' && s[2] == 'E') {
4881 #ifdef PERL_MAD
4882                 if (PL_madskills) {
4883                   if (!PL_thiswhite)
4884                     PL_thiswhite = newSVpvs("");
4885                   sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4886                 }
4887 #endif
4888                 PL_bufptr = s + 3;
4889                 PL_lex_state = LEX_INTERPCONCAT;
4890                 return yylex();
4891             }
4892             else {
4893                 I32 tmp;
4894                 if (!PL_madskills) /* when just compiling don't need correct */
4895                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4896                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4897                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4898                     (strchr(PL_lex_casestack, 'L')
4899                         || strchr(PL_lex_casestack, 'U')
4900                         || strchr(PL_lex_casestack, 'F'))) {
4901                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4902                     PL_lex_allbrackets--;
4903                     return REPORT(')');
4904                 }
4905                 if (PL_lex_casemods > 10)
4906                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4907                 PL_lex_casestack[PL_lex_casemods++] = *s;
4908                 PL_lex_casestack[PL_lex_casemods] = '\0';
4909                 PL_lex_state = LEX_INTERPCONCAT;
4910                 start_force(PL_curforce);
4911                 NEXTVAL_NEXTTOKE.ival = 0;
4912                 force_next((2<<24)|'(');
4913                 start_force(PL_curforce);
4914                 if (*s == 'l')
4915                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4916                 else if (*s == 'u')
4917                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4918                 else if (*s == 'L')
4919                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4920                 else if (*s == 'U')
4921                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4922                 else if (*s == 'Q')
4923                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4924                 else if (*s == 'F')
4925                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4926                 else
4927                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4928                 if (PL_madskills) {
4929                     SV* const tmpsv = newSVpvs("\\ ");
4930                     /* replace the space with the character we want to escape
4931                      */
4932                     SvPVX(tmpsv)[1] = *s;
4933                     curmad('_', tmpsv);
4934                 }
4935                 PL_bufptr = s + 1;
4936             }
4937             force_next(FUNC);
4938             if (PL_lex_starts) {
4939                 s = PL_bufptr;
4940                 PL_lex_starts = 0;
4941 #ifdef PERL_MAD
4942                 if (PL_madskills) {
4943                     if (PL_thistoken)
4944                         sv_free(PL_thistoken);
4945                     PL_thistoken = newSVpvs("");
4946                 }
4947 #endif
4948                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4949                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4950                     OPERATOR(',');
4951                 else
4952                     Aop(OP_CONCAT);
4953             }
4954             else
4955                 return yylex();
4956         }
4957
4958     case LEX_INTERPPUSH:
4959         return REPORT(sublex_push());
4960
4961     case LEX_INTERPSTART:
4962         if (PL_bufptr == PL_bufend)
4963             return REPORT(sublex_done());
4964         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4965               "### Interpolated variable\n"); });
4966         PL_expect = XTERM;
4967         /* for /@a/, we leave the joining for the regex engine to do
4968          * (unless we're within \Q etc) */
4969         PL_lex_dojoin = (*PL_bufptr == '@'
4970                             && (!PL_lex_inpat || PL_lex_casemods));
4971         PL_lex_state = LEX_INTERPNORMAL;
4972         if (PL_lex_dojoin) {
4973             start_force(PL_curforce);
4974             NEXTVAL_NEXTTOKE.ival = 0;
4975             force_next(',');
4976             start_force(PL_curforce);
4977             force_ident("\"", '$');
4978             start_force(PL_curforce);
4979             NEXTVAL_NEXTTOKE.ival = 0;
4980             force_next('$');
4981             start_force(PL_curforce);
4982             NEXTVAL_NEXTTOKE.ival = 0;
4983             force_next((2<<24)|'(');
4984             start_force(PL_curforce);
4985             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4986             force_next(FUNC);
4987         }
4988         /* Convert (?{...}) and friends to 'do {...}' */
4989         if (PL_lex_inpat && *PL_bufptr == '(') {
4990             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4991             PL_bufptr += 2;
4992             if (*PL_bufptr != '{')
4993                 PL_bufptr++;
4994             start_force(PL_curforce);
4995             /* XXX probably need a CURMAD(something) here */
4996             PL_expect = XTERMBLOCK;
4997             force_next(DO);
4998         }
4999
5000         if (PL_lex_starts++) {
5001             s = PL_bufptr;
5002 #ifdef PERL_MAD
5003             if (PL_madskills) {
5004                 if (PL_thistoken)
5005                     sv_free(PL_thistoken);
5006                 PL_thistoken = newSVpvs("");
5007             }
5008 #endif
5009             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5010             if (!PL_lex_casemods && PL_lex_inpat)
5011                 OPERATOR(',');
5012             else
5013                 Aop(OP_CONCAT);
5014         }
5015         return yylex();
5016
5017     case LEX_INTERPENDMAYBE:
5018         if (intuit_more(PL_bufptr)) {
5019             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
5020             break;
5021         }
5022         /* FALL THROUGH */
5023
5024     case LEX_INTERPEND:
5025         if (PL_lex_dojoin) {
5026             PL_lex_dojoin = FALSE;
5027             PL_lex_state = LEX_INTERPCONCAT;
5028 #ifdef PERL_MAD
5029             if (PL_madskills) {
5030                 if (PL_thistoken)
5031                     sv_free(PL_thistoken);
5032                 PL_thistoken = newSVpvs("");
5033             }
5034 #endif
5035             PL_lex_allbrackets--;
5036             return REPORT(')');
5037         }
5038         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5039             && SvEVALED(PL_lex_repl))
5040         {
5041             if (PL_bufptr != PL_bufend)
5042                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5043             PL_lex_repl = NULL;
5044         }
5045         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
5046            re_eval_str.  If the here-doc body’s length equals the previous
5047            value of re_eval_start, re_eval_start will now be null.  So
5048            check re_eval_str as well. */
5049         if (PL_parser->lex_shared->re_eval_start
5050          || PL_parser->lex_shared->re_eval_str) {
5051             SV *sv;
5052             if (*PL_bufptr != ')')
5053                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5054             PL_bufptr++;
5055             /* having compiled a (?{..}) expression, return the original
5056              * text too, as a const */
5057             if (PL_parser->lex_shared->re_eval_str) {
5058                 sv = PL_parser->lex_shared->re_eval_str;
5059                 PL_parser->lex_shared->re_eval_str = NULL;
5060                 SvCUR_set(sv,
5061                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5062                 SvPV_shrink_to_cur(sv);
5063             }
5064             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5065                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5066             start_force(PL_curforce);
5067             /* XXX probably need a CURMAD(something) here */
5068             NEXTVAL_NEXTTOKE.opval =
5069                     (OP*)newSVOP(OP_CONST, 0,
5070                                  sv);
5071             force_next(THING);
5072             PL_parser->lex_shared->re_eval_start = NULL;
5073             PL_expect = XTERM;
5074             return REPORT(',');
5075         }
5076
5077         /* FALLTHROUGH */
5078     case LEX_INTERPCONCAT:
5079 #ifdef DEBUGGING
5080         if (PL_lex_brackets)
5081             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5082                        (long) PL_lex_brackets);
5083 #endif
5084         if (PL_bufptr == PL_bufend)
5085             return REPORT(sublex_done());
5086
5087         /* m'foo' still needs to be parsed for possible (?{...}) */
5088         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5089             SV *sv = newSVsv(PL_linestr);
5090             sv = tokeq(sv);
5091             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5092             s = PL_bufend;
5093         }
5094         else {
5095             s = scan_const(PL_bufptr);
5096             if (*s == '\\')
5097                 PL_lex_state = LEX_INTERPCASEMOD;
5098             else
5099                 PL_lex_state = LEX_INTERPSTART;
5100         }
5101
5102         if (s != PL_bufptr) {
5103             start_force(PL_curforce);
5104             if (PL_madskills) {
5105                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5106             }
5107             NEXTVAL_NEXTTOKE = pl_yylval;
5108             PL_expect = XTERM;
5109             force_next(THING);
5110             if (PL_lex_starts++) {
5111 #ifdef PERL_MAD
5112                 if (PL_madskills) {
5113                     if (PL_thistoken)
5114                         sv_free(PL_thistoken);
5115                     PL_thistoken = newSVpvs("");
5116                 }
5117 #endif
5118                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5119                 if (!PL_lex_casemods && PL_lex_inpat)
5120                     OPERATOR(',');
5121                 else
5122                     Aop(OP_CONCAT);
5123             }
5124             else {
5125                 PL_bufptr = s;
5126                 return yylex();
5127             }
5128         }
5129
5130         return yylex();
5131     case LEX_FORMLINE:
5132         s = scan_formline(PL_bufptr);
5133         if (!PL_lex_formbrack)
5134         {
5135             formbrack = 1;
5136             goto rightbracket;
5137         }
5138         PL_bufptr = s;
5139         return yylex();
5140     }
5141
5142     s = PL_bufptr;
5143     PL_oldoldbufptr = PL_oldbufptr;
5144     PL_oldbufptr = s;
5145     PL_parser->saw_infix_sigil = 0;
5146
5147   retry:
5148 #ifdef PERL_MAD
5149     if (PL_thistoken) {
5150         sv_free(PL_thistoken);
5151         PL_thistoken = 0;
5152     }
5153     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
5154 #endif
5155     switch (*s) {
5156     default:
5157         if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5158             goto keylookup;
5159         {
5160         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5161         const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
5162                                                     UTF8SKIP(s),
5163                                                     SVs_TEMP | SVf_UTF8),
5164                                             10, UNI_DISPLAY_ISPRINT))
5165                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5166         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5167         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5168             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5169         } else {
5170             d = PL_linestart;
5171         }
5172         *s = '\0';
5173         sv_setpv(dsv, d);
5174         if (UTF)
5175             SvUTF8_on(dsv);
5176         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
5177     }
5178     case 4:
5179     case 26:
5180         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
5181     case 0:
5182 #ifdef PERL_MAD
5183         if (PL_madskills)
5184             PL_faketokens = 0;
5185 #endif
5186         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5187             PL_last_uni = 0;
5188             PL_last_lop = 0;
5189             if (PL_lex_brackets &&
5190                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5191                 yyerror((const char *)
5192                         (PL_lex_formbrack
5193                          ? "Format not terminated"
5194                          : "Missing right curly or square bracket"));
5195             }
5196             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5197                         "### Tokener got EOF\n");
5198             } );
5199             TOKEN(0);
5200         }
5201         if (s++ < PL_bufend)
5202             goto retry;                 /* ignore stray nulls */
5203         PL_last_uni = 0;
5204         PL_last_lop = 0;
5205         if (!PL_in_eval && !PL_preambled) {
5206             PL_preambled = TRUE;
5207 #ifdef PERL_MAD
5208             if (PL_madskills)
5209                 PL_faketokens = 1;
5210 #endif
5211             if (PL_perldb) {
5212                 /* Generate a string of Perl code to load the debugger.
5213                  * If PERL5DB is set, it will return the contents of that,
5214                  * otherwise a compile-time require of perl5db.pl.  */
5215
5216                 const char * const pdb = PerlEnv_getenv("PERL5DB");
5217
5218                 if (pdb) {
5219                     sv_setpv(PL_linestr, pdb);
5220                     sv_catpvs(PL_linestr,";");
5221                 } else {
5222                     SETERRNO(0,SS_NORMAL);
5223                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5224                 }
5225             } else
5226                 sv_setpvs(PL_linestr,"");
5227             if (PL_preambleav) {
5228                 SV **svp = AvARRAY(PL_preambleav);
5229                 SV **const end = svp + AvFILLp(PL_preambleav);
5230                 while(svp <= end) {
5231                     sv_catsv(PL_linestr, *svp);
5232                     ++svp;
5233                     sv_catpvs(PL_linestr, ";");
5234                 }
5235                 sv_free(MUTABLE_SV(PL_preambleav));
5236                 PL_preambleav = NULL;
5237             }
5238             if (PL_minus_E)
5239                 sv_catpvs(PL_linestr,
5240                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5241             if (PL_minus_n || PL_minus_p) {
5242                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5243                 if (PL_minus_l)
5244                     sv_catpvs(PL_linestr,"chomp;");
5245                 if (PL_minus_a) {
5246                     if (PL_minus_F) {
5247                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5248                              || *PL_splitstr == '"')
5249                               && strchr(PL_splitstr + 1, *PL_splitstr))
5250                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5251                         else {
5252                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5253                                bytes can be used as quoting characters.  :-) */
5254                             const char *splits = PL_splitstr;
5255                             sv_catpvs(PL_linestr, "our @F=split(q\0");
5256                             do {
5257                                 /* Need to \ \s  */
5258                                 if (*splits == '\\')
5259                                     sv_catpvn(PL_linestr, splits, 1);
5260                                 sv_catpvn(PL_linestr, splits, 1);
5261                             } while (*splits++);
5262                             /* This loop will embed the trailing NUL of
5263                                PL_linestr as the last thing it does before
5264                                terminating.  */
5265                             sv_catpvs(PL_linestr, ");");
5266                         }
5267                     }
5268                     else
5269                         sv_catpvs(PL_linestr,"our @F=split(' ');");
5270                 }
5271             }
5272             sv_catpvs(PL_linestr, "\n");
5273             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5274             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5275             PL_last_lop = PL_last_uni = NULL;
5276             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5277                 update_debugger_info(PL_linestr, NULL, 0);
5278             goto retry;
5279         }
5280         do {
5281             fake_eof = 0;
5282             bof = PL_rsfp ? TRUE : FALSE;
5283             if (0) {
5284               fake_eof:
5285                 fake_eof = LEX_FAKE_EOF;
5286             }
5287             PL_bufptr = PL_bufend;
5288             COPLINE_INC_WITH_HERELINES;
5289             if (!lex_next_chunk(fake_eof)) {
5290                 CopLINE_dec(PL_curcop);
5291                 s = PL_bufptr;
5292                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5293             }
5294             CopLINE_dec(PL_curcop);
5295 #ifdef PERL_MAD
5296             if (!PL_rsfp)
5297                 PL_realtokenstart = -1;
5298 #endif
5299             s = PL_bufptr;
5300             /* If it looks like the start of a BOM or raw UTF-16,
5301              * check if it in fact is. */
5302             if (bof && PL_rsfp &&
5303                      (*s == 0 ||
5304                       *(U8*)s == 0xEF ||
5305                       *(U8*)s >= 0xFE ||
5306                       s[1] == 0)) {
5307                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5308                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5309 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5310                 /* offset may include swallowed CR */
5311                 if (!bof)
5312                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5313 #endif
5314                 if (bof) {
5315                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5316                     s = swallow_bom((U8*)s);
5317                 }
5318             }
5319             if (PL_parser->in_pod) {
5320                 /* Incest with pod. */
5321 #ifdef PERL_MAD
5322                 if (PL_madskills)
5323                     sv_catsv(PL_thiswhite, PL_linestr);
5324 #endif
5325                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5326                     sv_setpvs(PL_linestr, "");
5327                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5328                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5329                     PL_last_lop = PL_last_uni = NULL;
5330                     PL_parser->in_pod = 0;
5331                 }
5332             }
5333             if (PL_rsfp || PL_parser->filtered)
5334                 incline(s);
5335         } while (PL_parser->in_pod);
5336         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5337         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5338         PL_last_lop = PL_last_uni = NULL;
5339         if (CopLINE(PL_curcop) == 1) {
5340             while (s < PL_bufend && isSPACE(*s))
5341                 s++;
5342             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5343                 s++;
5344 #ifdef PERL_MAD
5345             if (PL_madskills)
5346                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5347 #endif
5348             d = NULL;
5349             if (!PL_in_eval) {
5350                 if (*s == '#' && *(s+1) == '!')
5351                     d = s + 2;
5352 #ifdef ALTERNATE_SHEBANG
5353                 else {
5354                     static char const as[] = ALTERNATE_SHEBANG;
5355                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5356                         d = s + (sizeof(as) - 1);
5357                 }
5358 #endif /* ALTERNATE_SHEBANG */
5359             }
5360             if (d) {
5361                 char *ipath;
5362                 char *ipathend;
5363
5364                 while (isSPACE(*d))
5365                     d++;
5366                 ipath = d;
5367                 while (*d && !isSPACE(*d))
5368                     d++;
5369                 ipathend = d;
5370
5371 #ifdef ARG_ZERO_IS_SCRIPT
5372                 if (ipathend > ipath) {
5373                     /*
5374                      * HP-UX (at least) sets argv[0] to the script name,
5375                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5376                      * at least, set argv[0] to the basename of the Perl
5377                      * interpreter. So, having found "#!", we'll set it right.
5378                      */
5379                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5380                                                     SVt_PV)); /* $^X */
5381                     assert(SvPOK(x) || SvGMAGICAL(x));
5382                     if (sv_eq(x, CopFILESV(PL_curcop))) {
5383                         sv_setpvn(x, ipath, ipathend - ipath);
5384                         SvSETMAGIC(x);
5385                     }
5386                     else {
5387                         STRLEN blen;
5388                         STRLEN llen;
5389                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5390                         const char * const lstart = SvPV_const(x,llen);
5391                         if (llen < blen) {
5392                             bstart += blen - llen;
5393                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5394                                 sv_setpvn(x, ipath, ipathend - ipath);
5395                                 SvSETMAGIC(x);
5396                             }
5397                         }
5398                     }
5399                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5400                 }
5401 #endif /* ARG_ZERO_IS_SCRIPT */
5402
5403                 /*
5404                  * Look for options.
5405                  */
5406                 d = instr(s,"perl -");
5407                 if (!d) {
5408                     d = instr(s,"perl");
5409 #if defined(DOSISH)
5410                     /* avoid getting into infinite loops when shebang
5411                      * line contains "Perl" rather than "perl" */
5412                     if (!d) {
5413                         for (d = ipathend-4; d >= ipath; --d) {
5414                             if ((*d == 'p' || *d == 'P')
5415                                 && !ibcmp(d, "perl", 4))
5416                             {
5417                                 break;
5418                             }
5419                         }
5420                         if (d < ipath)
5421                             d = NULL;
5422                     }
5423 #endif
5424                 }
5425 #ifdef ALTERNATE_SHEBANG
5426                 /*
5427                  * If the ALTERNATE_SHEBANG on this system starts with a
5428                  * character that can be part of a Perl expression, then if
5429                  * we see it but not "perl", we're probably looking at the
5430                  * start of Perl code, not a request to hand off to some
5431                  * other interpreter.  Similarly, if "perl" is there, but
5432                  * not in the first 'word' of the line, we assume the line
5433                  * contains the start of the Perl program.
5434                  */
5435                 if (d && *s != '#') {
5436                     const char *c = ipath;
5437                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5438                         c++;
5439                     if (c < d)
5440                         d = NULL;       /* "perl" not in first word; ignore */
5441                     else
5442                         *s = '#';       /* Don't try to parse shebang line */
5443                 }
5444 #endif /* ALTERNATE_SHEBANG */
5445                 if (!d &&
5446                     *s == '#' &&
5447                     ipathend > ipath &&
5448                     !PL_minus_c &&
5449                     !instr(s,"indir") &&
5450                     instr(PL_origargv[0],"perl"))
5451                 {
5452                     dVAR;
5453                     char **newargv;
5454
5455                     *ipathend = '\0';
5456                     s = ipathend + 1;
5457                     while (s < PL_bufend && isSPACE(*s))
5458                         s++;
5459                     if (s < PL_bufend) {
5460                         Newx(newargv,PL_origargc+3,char*);
5461                         newargv[1] = s;
5462                         while (s < PL_bufend && !isSPACE(*s))
5463                             s++;
5464                         *s = '\0';
5465                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5466                     }
5467                     else
5468                         newargv = PL_origargv;
5469                     newargv[0] = ipath;
5470                     PERL_FPU_PRE_EXEC
5471                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5472                     PERL_FPU_POST_EXEC
5473                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5474                 }
5475                 if (d) {
5476                     while (*d && !isSPACE(*d))
5477                         d++;
5478                     while (SPACE_OR_TAB(*d))
5479                         d++;
5480
5481                     if (*d++ == '-') {
5482                         const bool switches_done = PL_doswitches;
5483                         const U32 oldpdb = PL_perldb;
5484                         const bool oldn = PL_minus_n;
5485                         const bool oldp = PL_minus_p;
5486                         const char *d1 = d;
5487
5488                         do {
5489                             bool baduni = FALSE;
5490                             if (*d1 == 'C') {
5491                                 const char *d2 = d1 + 1;
5492                                 if (parse_unicode_opts((const char **)&d2)
5493                                     != PL_unicode)
5494                                     baduni = TRUE;
5495                             }
5496                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5497                                 const char * const m = d1;
5498                                 while (*d1 && !isSPACE(*d1))
5499                                     d1++;
5500                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5501                                       (int)(d1 - m), m);
5502                             }
5503                             d1 = moreswitches(d1);
5504                         } while (d1);
5505                         if (PL_doswitches && !switches_done) {
5506                             int argc = PL_origargc;
5507                             char **argv = PL_origargv;
5508                             do {
5509                                 argc--,argv++;
5510                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5511                             init_argv_symbols(argc,argv);
5512                         }
5513                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5514                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5515                               /* if we have already added "LINE: while (<>) {",
5516                                  we must not do it again */
5517                         {
5518                             sv_setpvs(PL_linestr, "");
5519                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5520                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5521                             PL_last_lop = PL_last_uni = NULL;
5522                             PL_preambled = FALSE;
5523                             if (PERLDB_LINE || PERLDB_SAVESRC)
5524                                 (void)gv_fetchfile(PL_origfilename);
5525                             goto retry;
5526                         }
5527                     }
5528                 }
5529             }
5530         }
5531         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5532             PL_lex_state = LEX_FORMLINE;
5533             start_force(PL_curforce);
5534             NEXTVAL_NEXTTOKE.ival = 0;
5535             force_next(FORMRBRACK);
5536             TOKEN(';');
5537         }
5538         goto retry;
5539     case '\r':
5540 #ifdef PERL_STRICT_CR
5541         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5542         Perl_croak(aTHX_
5543       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5544 #endif
5545     case ' ': case '\t': case '\f': case 013:
5546 #ifdef PERL_MAD
5547         PL_realtokenstart = -1;
5548         if (PL_madskills) {
5549           if (!PL_thiswhite)
5550             PL_thiswhite = newSVpvs("");
5551           sv_catpvn(PL_thiswhite, s, 1);
5552         }
5553 #endif
5554         s++;
5555         goto retry;
5556     case '#':
5557     case '\n':
5558 #ifdef PERL_MAD
5559         PL_realtokenstart = -1;
5560         if (PL_madskills)
5561             PL_faketokens = 0;
5562 #endif
5563         if (PL_lex_state != LEX_NORMAL ||
5564              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5565             if (*s == '#' && s == PL_linestart && PL_in_eval
5566              && !PL_rsfp && !PL_parser->filtered) {
5567                 /* handle eval qq[#line 1 "foo"\n ...] */
5568                 CopLINE_dec(PL_curcop);
5569                 incline(s);
5570             }
5571             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5572                 s = SKIPSPACE0(s);
5573                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5574                     incline(s);
5575             }
5576             else {
5577                 const bool in_comment = *s == '#';
5578                 d = s;
5579                 while (d < PL_bufend && *d != '\n')
5580                     d++;
5581                 if (d < PL_bufend)
5582                     d++;
5583                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5584                     Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5585                                d, PL_bufend);
5586 #ifdef PERL_MAD
5587                 if (PL_madskills)
5588                     PL_thiswhite = newSVpvn(s, d - s);
5589 #endif
5590                 s = d;
5591                 if (in_comment && d == PL_bufend
5592                  && PL_lex_state == LEX_INTERPNORMAL
5593                  && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5594                  && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5595                 else incline(s);
5596             }
5597             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5598                 PL_lex_state = LEX_FORMLINE;
5599                 start_force(PL_curforce);
5600                 NEXTVAL_NEXTTOKE.ival = 0;
5601                 force_next(FORMRBRACK);
5602                 TOKEN(';');
5603             }
5604         }
5605         else {
5606 #ifdef PERL_MAD
5607             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5608                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5609                     PL_faketokens = 0;
5610                     s = SKIPSPACE0(s);
5611                     TOKEN(PEG); /* make sure any #! line is accessible */
5612                 }
5613                 s = SKIPSPACE0(s);
5614             }
5615             else {
5616 /*              if (PL_madskills && PL_lex_formbrack) { */
5617                     d = s;
5618                     while (d < PL_bufend && *d != '\n')
5619                         d++;
5620                     if (d < PL_bufend)
5621                         d++;
5622                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5623                       Perl_croak(aTHX_ "panic: input overflow");
5624                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5625                         if (!PL_thiswhite)
5626                             PL_thiswhite = newSVpvs("");
5627                         if (CopLINE(PL_curcop) == 1) {
5628                             sv_setpvs(PL_thiswhite, "");
5629                             PL_faketokens = 0;
5630                         }
5631                         sv_catpvn(PL_thiswhite, s, d - s);
5632                     }
5633                     s = d;
5634 /*              }
5635                 *s = '\0';
5636                 PL_bufend = s; */
5637             }
5638 #else
5639             *s = '\0';
5640             PL_bufend = s;
5641 #endif
5642         }
5643         goto retry;
5644     case '-':
5645         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5646             I32 ftst = 0;
5647             char tmp;
5648
5649             s++;
5650             PL_bufptr = s;
5651             tmp = *s++;
5652
5653             while (s < PL_bufend && SPACE_OR_TAB(*s))
5654                 s++;
5655
5656             if (strnEQ(s,"=>",2)) {
5657                 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5658                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5659                 OPERATOR('-');          /* unary minus */
5660             }
5661             PL_last_uni = PL_oldbufptr;
5662             switch (tmp) {
5663             case 'r': ftst = OP_FTEREAD;        break;
5664             case 'w': ftst = OP_FTEWRITE;       break;
5665             case 'x': ftst = OP_FTEEXEC;        break;
5666             case 'o': ftst = OP_FTEOWNED;       break;
5667             case 'R': ftst = OP_FTRREAD;        break;
5668             case 'W': ftst = OP_FTRWRITE;       break;
5669             case 'X': ftst = OP_FTREXEC;        break;
5670             case 'O': ftst = OP_FTROWNED;       break;
5671             case 'e': ftst = OP_FTIS;           break;
5672             case 'z': ftst = OP_FTZERO;         break;
5673             case 's': ftst = OP_FTSIZE;         break;
5674             case 'f': ftst = OP_FTFILE;         break;
5675             case 'd': ftst = OP_FTDIR;          break;
5676             case 'l': ftst = OP_FTLINK;         break;
5677             case 'p': ftst = OP_FTPIPE;         break;
5678             case 'S': ftst = OP_FTSOCK;         break;
5679             case 'u': ftst = OP_FTSUID;         break;
5680             case 'g': ftst = OP_FTSGID;         break;
5681             case 'k': ftst = OP_FTSVTX;         break;
5682             case 'b': ftst = OP_FTBLK;          break;
5683             case 'c': ftst = OP_FTCHR;          break;
5684             case 't': ftst = OP_FTTTY;          break;
5685             case 'T': ftst = OP_FTTEXT;         break;
5686             case 'B': ftst = OP_FTBINARY;       break;
5687             case 'M': case 'A': case 'C':
5688                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5689                 switch (tmp) {
5690                 case 'M': ftst = OP_FTMTIME;    break;
5691                 case 'A': ftst = OP_FTATIME;    break;
5692                 case 'C': ftst = OP_FTCTIME;    break;
5693                 default:                        break;
5694                 }
5695                 break;
5696             default:
5697                 break;
5698             }
5699             if (ftst) {
5700                 PL_last_lop_op = (OPCODE)ftst;
5701                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5702                         "### Saw file test %c\n", (int)tmp);
5703                 } );
5704                 FTST(ftst);
5705             }
5706             else {
5707                 /* Assume it was a minus followed by a one-letter named
5708                  * subroutine call (or a -bareword), then. */
5709                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5710                         "### '-%c' looked like a file test but was not\n",
5711                         (int) tmp);
5712                 } );
5713                 s = --PL_bufptr;
5714             }
5715         }
5716         {
5717             const char tmp = *s++;
5718             if (*s == tmp) {
5719                 s++;
5720                 if (PL_expect == XOPERATOR)
5721                     TERM(POSTDEC);
5722                 else
5723                     OPERATOR(PREDEC);
5724             }
5725             else if (*s == '>') {
5726                 s++;
5727                 s = SKIPSPACE1(s);
5728                 if (isIDFIRST_lazy_if(s,UTF)) {
5729                     s = force_word(s,METHOD,FALSE,TRUE);
5730                     TOKEN(ARROW);
5731                 }
5732                 else if (*s == '$')
5733                     OPERATOR(ARROW);
5734                 else
5735                     TERM(ARROW);
5736             }
5737             if (PL_expect == XOPERATOR) {
5738                 if (*s == '=' && !PL_lex_allbrackets &&
5739                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5740                     s--;
5741                     TOKEN(0);
5742                 }
5743                 Aop(OP_SUBTRACT);
5744             }
5745             else {
5746                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5747                     check_uni();
5748                 OPERATOR('-');          /* unary minus */
5749             }
5750         }
5751
5752     case '+':
5753         {
5754             const char tmp = *s++;
5755             if (*s == tmp) {
5756                 s++;
5757                 if (PL_expect == XOPERATOR)
5758                     TERM(POSTINC);
5759                 else
5760                     OPERATOR(PREINC);
5761             }
5762             if (PL_expect == XOPERATOR) {
5763                 if (*s == '=' && !PL_lex_allbrackets &&
5764                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5765                     s--;
5766                     TOKEN(0);
5767                 }
5768                 Aop(OP_ADD);
5769             }
5770             else {
5771                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5772                     check_uni();
5773                 OPERATOR('+');
5774             }
5775         }
5776
5777     case '*':
5778         if (PL_expect != XOPERATOR) {
5779             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5780             PL_expect = XOPERATOR;
5781             force_ident(PL_tokenbuf, '*');
5782             if (!*PL_tokenbuf)
5783                 PREREF('*');
5784             TERM('*');
5785         }
5786         s++;
5787         if (*s == '*') {
5788             s++;
5789             if (*s == '=' && !PL_lex_allbrackets &&
5790                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5791                 s -= 2;
5792                 TOKEN(0);
5793             }
5794             PWop(OP_POW);
5795         }
5796         if (*s == '=' && !PL_lex_allbrackets &&
5797                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5798             s--;
5799             TOKEN(0);
5800         }
5801         PL_parser->saw_infix_sigil = 1;
5802         Mop(OP_MULTIPLY);
5803
5804     case '%':
5805         if (PL_expect == XOPERATOR) {
5806             if (s[1] == '=' && !PL_lex_allbrackets &&
5807                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5808                 TOKEN(0);
5809             ++s;
5810             PL_parser->saw_infix_sigil = 1;
5811             Mop(OP_MODULO);
5812         }
5813         PL_tokenbuf[0] = '%';
5814         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5815                 sizeof PL_tokenbuf - 1, FALSE);
5816         if (!PL_tokenbuf[1]) {
5817             PREREF('%');
5818         }
5819         PL_expect = XOPERATOR;
5820         force_ident_maybe_lex('%');
5821         TERM('%');
5822
5823     case '^':
5824         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5825                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5826             TOKEN(0);
5827         s++;
5828         BOop(OP_BIT_XOR);
5829     case '[':
5830         if (PL_lex_brackets > 100)
5831             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5832         PL_lex_brackstack[PL_lex_brackets++] = 0;
5833         PL_lex_allbrackets++;
5834         {
5835             const char tmp = *s++;
5836             OPERATOR(tmp);
5837         }
5838     case '~':
5839         if (s[1] == '~'
5840             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5841         {
5842             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5843                 TOKEN(0);
5844             s += 2;
5845             Perl_ck_warner_d(aTHX_
5846                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5847                 "Smartmatch is experimental");
5848             Eop(OP_SMARTMATCH);
5849         }
5850         s++;
5851         OPERATOR('~');
5852     case ',':
5853         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5854             TOKEN(0);
5855         s++;
5856         OPERATOR(',');
5857     case ':':
5858         if (s[1] == ':') {
5859             len = 0;
5860             goto just_a_word_zero_gv;
5861         }
5862         s++;
5863         switch (PL_expect) {
5864             OP *attrs;
5865 #ifdef PERL_MAD
5866             I32 stuffstart;
5867 #endif
5868         case XOPERATOR:
5869             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5870                 break;
5871             PL_bufptr = s;      /* update in case we back off */
5872             if (*s == '=') {
5873                 Perl_croak(aTHX_
5874                            "Use of := for an empty attribute list is not allowed");
5875             }
5876             goto grabattrs;
5877         case XATTRBLOCK:
5878             PL_expect = XBLOCK;
5879             goto grabattrs;
5880         case XATTRTERM:
5881             PL_expect = XTERMBLOCK;
5882          grabattrs:
5883 #ifdef PERL_MAD
5884             stuffstart = s - SvPVX(PL_linestr) - 1;
5885 #endif
5886             s = PEEKSPACE(s);
5887             attrs = NULL;
5888             while (isIDFIRST_lazy_if(s,UTF)) {
5889                 I32 tmp;
5890                 SV *sv;
5891                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5892                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5893                     if (tmp < 0) tmp = -tmp;
5894                     switch (tmp) {
5895                     case KEY_or:
5896                     case KEY_and:
5897                     case KEY_for:
5898                     case KEY_foreach:
5899                     case KEY_unless:
5900                     case KEY_if:
5901                     case KEY_while:
5902                     case KEY_until:
5903                         goto got_attrs;
5904                     default:
5905                         break;
5906                     }
5907                 }
5908                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5909                 if (*d == '(') {
5910                     d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
5911                     if (!d) {
5912                         /* MUST advance bufptr here to avoid bogus
5913                            "at end of line" context messages from yyerror().
5914                          */
5915                         PL_bufptr = s + len;
5916                         yyerror("Unterminated attribute parameter in attribute list");
5917                         if (attrs)
5918                             op_free(attrs);
5919                         sv_free(sv);
5920                         return REPORT(0);       /* EOF indicator */
5921                     }
5922                 }
5923                 if (PL_lex_stuff) {
5924                     sv_catsv(sv, PL_lex_stuff);
5925                     attrs = op_append_elem(OP_LIST, attrs,
5926                                         newSVOP(OP_CONST, 0, sv));
5927                     SvREFCNT_dec(PL_lex_stuff);
5928                     PL_lex_stuff = NULL;
5929                 }
5930                 else {
5931                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5932                         sv_free(sv);
5933                         if (PL_in_my == KEY_our) {
5934                             deprecate(":unique");
5935                         }
5936                         else
5937                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5938                     }
5939
5940                     /* NOTE: any CV attrs applied here need to be part of
5941                        the CVf_BUILTIN_ATTRS define in cv.h! */
5942                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5943                         sv_free(sv);
5944                         CvLVALUE_on(PL_compcv);
5945                     }
5946                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5947                         sv_free(sv);
5948                         deprecate(":locked");
5949                     }
5950                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5951                         sv_free(sv);
5952                         CvMETHOD_on(PL_compcv);
5953                     }
5954                     /* After we've set the flags, it could be argued that
5955                        we don't need to do the attributes.pm-based setting
5956                        process, and shouldn't bother appending recognized
5957                        flags.  To experiment with that, uncomment the
5958                        following "else".  (Note that's already been
5959                        uncommented.  That keeps the above-applied built-in
5960                        attributes from being intercepted (and possibly
5961                        rejected) by a package's attribute routines, but is
5962                        justified by the performance win for the common case
5963                        of applying only built-in attributes.) */
5964                     else
5965                         attrs = op_append_elem(OP_LIST, attrs,
5966                                             newSVOP(OP_CONST, 0,
5967                                                     sv));
5968                 }
5969                 s = PEEKSPACE(d);
5970                 if (*s == ':' && s[1] != ':')
5971                     s = PEEKSPACE(s+1);
5972                 else if (s == d)
5973                     break;      /* require real whitespace or :'s */
5974                 /* XXX losing whitespace on sequential attributes here */
5975             }
5976             {
5977                 const char tmp
5978                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5979                 if (*s != ';' && *s != '}' && *s != tmp
5980                     && (tmp != '=' || *s != ')')) {
5981                     const char q = ((*s == '\'') ? '"' : '\'');
5982                     /* If here for an expression, and parsed no attrs, back
5983                        off. */
5984                     if (tmp == '=' && !attrs) {
5985                         s = PL_bufptr;
5986                         break;
5987                     }
5988                     /* MUST advance bufptr here to avoid bogus "at end of line"
5989                        context messages from yyerror().
5990                     */
5991                     PL_bufptr = s;
5992                     yyerror( (const char *)
5993                              (*s
5994                               ? Perl_form(aTHX_ "Invalid separator character "
5995                                           "%c%c%c in attribute list", q, *s, q)
5996                               : "Unterminated attribute list" ) );
5997                     if (attrs)
5998                         op_free(attrs);
5999                     OPERATOR(':');
6000                 }
6001             }
6002         got_attrs:
6003             if (attrs) {
6004                 start_force(PL_curforce);
6005                 NEXTVAL_NEXTTOKE.opval = attrs;
6006                 CURMAD('_', PL_nextwhite);
6007                 force_next(THING);
6008             }
6009 #ifdef PERL_MAD
6010             if (PL_madskills) {
6011                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6012                                      (s - SvPVX(PL_linestr)) - stuffstart);
6013             }
6014 #endif
6015             TOKEN(COLONATTR);
6016         }
6017         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6018             s--;
6019             TOKEN(0);
6020         }
6021         PL_lex_allbrackets--;
6022         OPERATOR(':');
6023     case '(':
6024         s++;
6025         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6026             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
6027         else
6028             PL_expect = XTERM;
6029         s = SKIPSPACE1(s);
6030         PL_lex_allbrackets++;
6031         TOKEN('(');
6032     case ';':
6033         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6034             TOKEN(0);
6035         CLINE;
6036         s++;
6037         OPERATOR(';');
6038     case ')':
6039         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6040             TOKEN(0);
6041         s++;
6042         PL_lex_allbrackets--;
6043         s = SKIPSPACE1(s);
6044         if (*s == '{')
6045             PREBLOCK(')');
6046         TERM(')');
6047     case ']':
6048         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6049             TOKEN(0);
6050         s++;
6051         if (PL_lex_brackets <= 0)
6052             yyerror("Unmatched right square bracket");
6053         else
6054             --PL_lex_brackets;
6055         PL_lex_allbrackets--;
6056         if (PL_lex_state == LEX_INTERPNORMAL) {
6057             if (PL_lex_brackets == 0) {
6058                 if (*s == '-' && s[1] == '>')
6059                     PL_lex_state = LEX_INTERPENDMAYBE;
6060                 else if (*s != '[' && *s != '{')
6061                     PL_lex_state = LEX_INTERPEND;
6062             }
6063         }
6064         TERM(']');
6065     case '{':
6066         s++;
6067       leftbracket:
6068         if (PL_lex_brackets > 100) {
6069             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6070         }
6071         switch (PL_expect) {
6072         case XTERM:
6073             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6074             PL_lex_allbrackets++;
6075             OPERATOR(HASHBRACK);
6076         case XOPERATOR:
6077             while (s < PL_bufend && SPACE_OR_TAB(*s))
6078                 s++;
6079             d = s;
6080             PL_tokenbuf[0] = '\0';
6081             if (d < PL_bufend && *d == '-') {
6082                 PL_tokenbuf[0] = '-';
6083                 d++;
6084                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6085                     d++;
6086             }
6087             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6088                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6089                               FALSE, &len);
6090                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6091                     d++;
6092                 if (*d == '}') {
6093                     const char minus = (PL_tokenbuf[0] == '-');
6094                     s = force_word(s + minus, WORD, FALSE, TRUE);
6095                     if (minus)
6096                         force_next('-');
6097                 }
6098             }
6099             /* FALL THROUGH */
6100         case XATTRBLOCK:
6101         case XBLOCK:
6102             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6103             PL_lex_allbrackets++;
6104             PL_expect = XSTATE;
6105             break;
6106         case XATTRTERM:
6107         case XTERMBLOCK:
6108             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6109             PL_lex_allbrackets++;
6110             PL_expect = XSTATE;
6111             break;
6112         default: {
6113                 const char *t;
6114                 if (PL_oldoldbufptr == PL_last_lop)
6115                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6116                 else
6117                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6118                 PL_lex_allbrackets++;
6119                 s = SKIPSPACE1(s);
6120                 if (*s == '}') {
6121                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6122                         PL_expect = XTERM;
6123                         /* This hack is to get the ${} in the message. */
6124                         PL_bufptr = s+1;
6125                         yyerror("syntax error");
6126                         break;
6127                     }
6128                     OPERATOR(HASHBRACK);
6129                 }
6130                 /* This hack serves to disambiguate a pair of curlies
6131                  * as being a block or an anon hash.  Normally, expectation
6132                  * determines that, but in cases where we're not in a
6133                  * position to expect anything in particular (like inside
6134                  * eval"") we have to resolve the ambiguity.  This code
6135                  * covers the case where the first term in the curlies is a
6136                  * quoted string.  Most other cases need to be explicitly
6137                  * disambiguated by prepending a "+" before the opening
6138                  * curly in order to force resolution as an anon hash.
6139                  *
6140                  * XXX should probably propagate the outer expectation
6141                  * into eval"" to rely less on this hack, but that could
6142                  * potentially break current behavior of eval"".
6143                  * GSAR 97-07-21
6144                  */
6145                 t = s;
6146                 if (*s == '\'' || *s == '"' || *s == '`') {
6147                     /* common case: get past first string, handling escapes */
6148                     for (t++; t < PL_bufend && *t != *s;)
6149                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
6150                             t++;
6151                     t++;
6152                 }
6153                 else if (*s == 'q') {
6154                     if (++t < PL_bufend
6155                         && (!isWORDCHAR(*t)
6156                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6157                                 && !isWORDCHAR(*t))))
6158                     {
6159                         /* skip q//-like construct */
6160                         const char *tmps;
6161                         char open, close, term;
6162                         I32 brackets = 1;
6163
6164                         while (t < PL_bufend && isSPACE(*t))
6165                             t++;
6166                         /* check for q => */
6167                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6168                             OPERATOR(HASHBRACK);
6169                         }
6170                         term = *t;
6171                         open = term;
6172                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6173                             term = tmps[5];
6174                         close = term;
6175                         if (open == close)
6176                             for (t++; t < PL_bufend; t++) {
6177                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6178                                     t++;
6179                                 else if (*t == open)
6180                                     break;
6181                             }
6182                         else {
6183                             for (t++; t < PL_bufend; t++) {
6184                                 if (*t == '\\' && t+1 < PL_bufend)
6185                                     t++;
6186                                 else if (*t == close && --brackets <= 0)
6187                                     break;
6188                                 else if (*t == open)
6189                                     brackets++;
6190                             }
6191                         }
6192                         t++;
6193                     }
6194                     else
6195                         /* skip plain q word */
6196                         while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6197                              t += UTF8SKIP(t);
6198                 }
6199                 else if (isWORDCHAR_lazy_if(t,UTF)) {
6200                     t += UTF8SKIP(t);
6201                     while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6202                          t += UTF8SKIP(t);
6203                 }
6204                 while (t < PL_bufend && isSPACE(*t))
6205                     t++;
6206                 /* if comma follows first term, call it an anon hash */
6207                 /* XXX it could be a comma expression with loop modifiers */
6208                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6209                                    || (*t == '=' && t[1] == '>')))
6210                     OPERATOR(HASHBRACK);
6211                 if (PL_expect == XREF)
6212                     PL_expect = XTERM;
6213                 else {
6214                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6215                     PL_expect = XSTATE;
6216                 }
6217             }
6218             break;
6219         }
6220         pl_yylval.ival = CopLINE(PL_curcop);
6221         if (isSPACE(*s) || *s == '#')
6222             PL_copline = NOLINE;   /* invalidate current command line number */
6223         TOKEN(formbrack ? '=' : '{');
6224     case '}':
6225         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6226             TOKEN(0);
6227       rightbracket:
6228         s++;
6229         if (PL_lex_brackets <= 0)
6230             yyerror("Unmatched right curly bracket");
6231         else
6232             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6233         PL_lex_allbrackets--;
6234         if (PL_lex_state == LEX_INTERPNORMAL) {
6235             if (PL_lex_brackets == 0) {
6236                 if (PL_expect & XFAKEBRACK) {
6237                     PL_expect &= XENUMMASK;
6238                     PL_lex_state = LEX_INTERPEND;
6239                     PL_bufptr = s;
6240 #if 0
6241                     if (PL_madskills) {
6242                         if (!PL_thiswhite)
6243                             PL_thiswhite = newSVpvs("");
6244                         sv_catpvs(PL_thiswhite,"}");
6245                     }
6246 #endif
6247                     return yylex();     /* ignore fake brackets */
6248                 }
6249                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6250                  && SvEVALED(PL_lex_repl))
6251                     PL_lex_state = LEX_INTERPEND;
6252                 else if (*s == '-' && s[1] == '>')
6253                     PL_lex_state = LEX_INTERPENDMAYBE;
6254                 else if (*s != '[' && *s != '{')
6255                     PL_lex_state = LEX_INTERPEND;
6256             }
6257         }
6258         if (PL_expect & XFAKEBRACK) {
6259             PL_expect &= XENUMMASK;
6260             PL_bufptr = s;
6261             return yylex();             /* ignore fake brackets */
6262         }
6263         start_force(PL_curforce);
6264         if (PL_madskills) {
6265             curmad('X', newSVpvn(s-1,1));
6266             CURMAD('_', PL_thiswhite);
6267         }
6268         force_next(formbrack ? '.' : '}');
6269         if (formbrack) LEAVE;
6270 #ifdef PERL_MAD
6271         if (PL_madskills && !PL_thistoken)
6272             PL_thistoken = newSVpvs("");
6273 #endif
6274         if (formbrack == 2) { /* means . where arguments were expected */
6275             start_force(PL_curforce);
6276             force_next(';');
6277             TOKEN(FORMRBRACK);
6278         }
6279         TOKEN(';');
6280     case '&':
6281         s++;
6282         if (*s++ == '&') {
6283             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6284                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6285                 s -= 2;
6286                 TOKEN(0);
6287             }
6288             AOPERATOR(ANDAND);
6289         }
6290         s--;
6291         if (PL_expect == XOPERATOR) {
6292             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6293                 && isIDFIRST_lazy_if(s,UTF))
6294             {
6295                 CopLINE_dec(PL_curcop);
6296                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6297                 CopLINE_inc(PL_curcop);
6298             }
6299             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6300                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6301                 s--;
6302                 TOKEN(0);
6303             }
6304             PL_parser->saw_infix_sigil = 1;
6305             BAop(OP_BIT_AND);
6306         }
6307
6308         PL_tokenbuf[0] = '&';
6309         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6310                        sizeof PL_tokenbuf - 1, TRUE);
6311         if (PL_tokenbuf[1]) {
6312             PL_expect = XOPERATOR;
6313             force_ident_maybe_lex('&');
6314         }
6315         else
6316             PREREF('&');
6317         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6318         TERM('&');
6319
6320     case '|':
6321         s++;
6322         if (*s++ == '|') {
6323             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6324                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6325                 s -= 2;
6326                 TOKEN(0);
6327             }
6328             AOPERATOR(OROR);
6329         }
6330         s--;
6331         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6332                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6333             s--;
6334             TOKEN(0);
6335         }
6336         BOop(OP_BIT_OR);
6337     case '=':
6338         s++;
6339         {
6340             const char tmp = *s++;
6341             if (tmp == '=') {
6342                 if (!PL_lex_allbrackets &&
6343                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6344                     s -= 2;
6345                     TOKEN(0);
6346                 }
6347                 Eop(OP_EQ);
6348             }
6349             if (tmp == '>') {
6350                 if (!PL_lex_allbrackets &&
6351                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6352                     s -= 2;
6353                     TOKEN(0);
6354                 }
6355                 OPERATOR(',');
6356             }
6357             if (tmp == '~')
6358                 PMop(OP_MATCH);
6359             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6360                 && strchr("+-*/%.^&|<",tmp))
6361                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6362                             "Reversed %c= operator",(int)tmp);
6363             s--;
6364             if (PL_expect == XSTATE && isALPHA(tmp) &&
6365                 (s == PL_linestart+1 || s[-2] == '\n') )
6366                 {
6367                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6368                         || PL_lex_state != LEX_NORMAL) {
6369                         d = PL_bufend;
6370                         while (s < d) {
6371                             if (*s++ == '\n') {
6372                                 incline(s);
6373                                 if (strnEQ(s,"=cut",4)) {
6374                                     s = strchr(s,'\n');
6375                                     if (s)
6376                                         s++;
6377                                     else
6378                                         s = d;
6379                                     incline(s);
6380                                     goto retry;
6381                                 }
6382                             }
6383                         }
6384                         goto retry;
6385                     }
6386 #ifdef PERL_MAD
6387                     if (PL_madskills) {
6388                         if (!PL_thiswhite)
6389                             PL_thiswhite = newSVpvs("");
6390                         sv_catpvn(PL_thiswhite, PL_linestart,
6391                                   PL_bufend - PL_linestart);
6392                     }
6393 #endif
6394                     s = PL_bufend;
6395                     PL_parser->in_pod = 1;
6396                     goto retry;
6397                 }
6398         }
6399         if (PL_expect == XBLOCK) {
6400             const char *t = s;
6401 #ifdef PERL_STRICT_CR
6402             while (SPACE_OR_TAB(*t))
6403 #else
6404             while (SPACE_OR_TAB(*t) || *t == '\r')
6405 #endif
6406                 t++;
6407             if (*t == '\n' || *t == '#') {
6408                 formbrack = 1;
6409                 ENTER;
6410                 SAVEI8(PL_parser->form_lex_state);
6411                 SAVEI32(PL_lex_formbrack);
6412                 PL_parser->form_lex_state = PL_lex_state;
6413                 PL_lex_formbrack = PL_lex_brackets + 1;
6414                 goto leftbracket;
6415             }
6416         }
6417         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6418             s--;
6419             TOKEN(0);
6420         }
6421         pl_yylval.ival = 0;
6422         OPERATOR(ASSIGNOP);
6423     case '!':
6424         s++;
6425         {
6426             const char tmp = *s++;
6427             if (tmp == '=') {
6428                 /* was this !=~ where !~ was meant?
6429                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6430
6431                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6432                     const char *t = s+1;
6433
6434                     while (t < PL_bufend && isSPACE(*t))
6435                         ++t;
6436
6437                     if (*t == '/' || *t == '?' ||
6438                         ((*t == 'm' || *t == 's' || *t == 'y')
6439                          && !isWORDCHAR(t[1])) ||
6440                         (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6441                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6442                                     "!=~ should be !~");
6443                 }
6444                 if (!PL_lex_allbrackets &&
6445                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6446                     s -= 2;
6447                     TOKEN(0);
6448                 }
6449                 Eop(OP_NE);
6450             }
6451             if (tmp == '~')
6452                 PMop(OP_NOT);
6453         }
6454         s--;
6455         OPERATOR('!');
6456     case '<':
6457         if (PL_expect != XOPERATOR) {
6458             if (s[1] != '<' && !strchr(s,'>'))
6459                 check_uni();
6460             if (s[1] == '<')
6461                 s = scan_heredoc(s);
6462             else
6463                 s = scan_inputsymbol(s);
6464             PL_expect = XOPERATOR;
6465             TOKEN(sublex_start());
6466         }
6467         s++;
6468         {
6469             char tmp = *s++;
6470             if (tmp == '<') {
6471                 if (*s == '=' && !PL_lex_allbrackets &&
6472                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6473                     s -= 2;
6474                     TOKEN(0);
6475                 }
6476                 SHop(OP_LEFT_SHIFT);
6477             }
6478             if (tmp == '=') {
6479                 tmp = *s++;
6480                 if (tmp == '>') {
6481                     if (!PL_lex_allbrackets &&
6482                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6483                         s -= 3;
6484                         TOKEN(0);
6485                     }
6486                     Eop(OP_NCMP);
6487                 }
6488                 s--;
6489                 if (!PL_lex_allbrackets &&
6490                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6491                     s -= 2;
6492                     TOKEN(0);
6493                 }
6494                 Rop(OP_LE);
6495             }
6496         }
6497         s--;
6498         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6499             s--;
6500             TOKEN(0);
6501         }
6502         Rop(OP_LT);
6503     case '>':
6504         s++;
6505         {
6506             const char tmp = *s++;
6507             if (tmp == '>') {
6508                 if (*s == '=' && !PL_lex_allbrackets &&
6509                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6510                     s -= 2;
6511                     TOKEN(0);
6512                 }
6513                 SHop(OP_RIGHT_SHIFT);
6514             }
6515             else if (tmp == '=') {
6516                 if (!PL_lex_allbrackets &&
6517                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6518                     s -= 2;
6519                     TOKEN(0);
6520                 }
6521                 Rop(OP_GE);
6522             }
6523         }
6524         s--;
6525         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6526             s--;
6527             TOKEN(0);
6528         }
6529         Rop(OP_GT);
6530
6531     case '$':
6532         CLINE;
6533
6534         if (PL_expect == XOPERATOR) {
6535             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6536                 return deprecate_commaless_var_list();
6537             }
6538         }
6539
6540         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6541             PL_tokenbuf[0] = '@';
6542             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6543                            sizeof PL_tokenbuf - 1, FALSE);
6544             if (PL_expect == XOPERATOR)
6545                 no_op("Array length", s);
6546             if (!PL_tokenbuf[1])
6547                 PREREF(DOLSHARP);
6548             PL_expect = XOPERATOR;
6549             force_ident_maybe_lex('#');
6550             TOKEN(DOLSHARP);
6551         }
6552
6553         PL_tokenbuf[0] = '$';
6554         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6555                        sizeof PL_tokenbuf - 1, FALSE);
6556         if (PL_expect == XOPERATOR)
6557             no_op("Scalar", s);
6558         if (!PL_tokenbuf[1]) {
6559             if (s == PL_bufend)
6560                 yyerror("Final $ should be \\$ or $name");
6561             PREREF('$');
6562         }
6563
6564         d = s;
6565         {
6566             const char tmp = *s;
6567             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6568                 s = SKIPSPACE1(s);
6569
6570             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6571                 && intuit_more(s)) {
6572                 if (*s == '[') {
6573                     PL_tokenbuf[0] = '@';
6574                     if (ckWARN(WARN_SYNTAX)) {
6575                         char *t = s+1;
6576
6577                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6578                             t++;
6579                         if (*t++ == ',') {
6580                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6581                             while (t < PL_bufend && *t != ']')
6582                                 t++;
6583                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6584                                         "Multidimensional syntax %.*s not supported",
6585                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6586                         }
6587                     }
6588                 }
6589                 else if (*s == '{') {
6590                     char *t;
6591                     PL_tokenbuf[0] = '%';
6592                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6593                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6594                         {
6595                             char tmpbuf[sizeof PL_tokenbuf];
6596                             do {
6597                                 t++;
6598                             } while (isSPACE(*t));
6599                             if (isIDFIRST_lazy_if(t,UTF)) {
6600                                 STRLEN len;
6601                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6602                                               &len);
6603                                 while (isSPACE(*t))
6604                                     t++;
6605                                 if (*t == ';'
6606                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6607                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6608                                         "You need to quote \"%"UTF8f"\"",
6609                                          UTF8fARG(UTF, len, tmpbuf));
6610                             }
6611                         }
6612                 }
6613             }
6614
6615             PL_expect = XOPERATOR;
6616             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6617                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6618                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6619                     PL_expect = XOPERATOR;
6620                 else if (strchr("$@\"'`q", *s))
6621                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6622                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6623                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6624                 else if (isIDFIRST_lazy_if(s,UTF)) {
6625                     char tmpbuf[sizeof PL_tokenbuf];
6626                     int t2;
6627                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6628                     if ((t2 = keyword(tmpbuf, len, 0))) {
6629                         /* binary operators exclude handle interpretations */
6630                         switch (t2) {
6631                         case -KEY_x:
6632                         case -KEY_eq:
6633                         case -KEY_ne:
6634                         case -KEY_gt:
6635                         case -KEY_lt:
6636                         case -KEY_ge:
6637                         case -KEY_le:
6638                         case -KEY_cmp:
6639                             break;
6640                         default:
6641                             PL_expect = XTERM;  /* e.g. print $fh length() */
6642                             break;
6643                         }
6644                     }
6645                     else {
6646                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6647                     }
6648                 }
6649                 else if (isDIGIT(*s))
6650                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6651                 else if (*s == '.' && isDIGIT(s[1]))
6652                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6653                 else if ((*s == '?' || *s == '-' || *s == '+')
6654                          && !isSPACE(s[1]) && s[1] != '=')
6655                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6656                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6657                          && s[1] != '/')
6658                     PL_expect = XTERM;          /* e.g. print $fh /.../
6659                                                    XXX except DORDOR operator
6660                                                 */
6661                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6662                          && s[2] != '=')
6663                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6664             }
6665         }
6666         force_ident_maybe_lex('$');
6667         TOKEN('$');
6668
6669     case '@':
6670         if (PL_expect == XOPERATOR)
6671             no_op("Array", s);
6672         PL_tokenbuf[0] = '@';
6673         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6674         if (!PL_tokenbuf[1]) {
6675             PREREF('@');
6676         }
6677         if (PL_lex_state == LEX_NORMAL)
6678             s = SKIPSPACE1(s);
6679         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6680             if (*s == '{')
6681                 PL_tokenbuf[0] = '%';
6682
6683             /* Warn about @ where they meant $. */
6684             if (*s == '[' || *s == '{') {
6685                 if (ckWARN(WARN_SYNTAX)) {
6686                     const char *t = s + 1;
6687                     while (*t && (isWORDCHAR_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6688                         t += UTF ? UTF8SKIP(t) : 1;
6689                     if (*t == '}' || *t == ']') {
6690                         t++;
6691                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6692        /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6693                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6694                          "Scalar value %"UTF8f" better written as $%"UTF8f,
6695                           UTF8fARG(UTF, t-PL_bufptr, PL_bufptr),
6696                           UTF8fARG(UTF, t-PL_bufptr-1, PL_bufptr+1));
6697                     }
6698                 }
6699             }
6700         }
6701         PL_expect = XOPERATOR;
6702         force_ident_maybe_lex('@');
6703         TERM('@');
6704
6705      case '/':                  /* may be division, defined-or, or pattern */
6706         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6707             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6708                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6709                 TOKEN(0);
6710             s += 2;
6711             AOPERATOR(DORDOR);
6712         }
6713      case '?':                  /* may either be conditional or pattern */
6714         if (PL_expect == XOPERATOR) {
6715              char tmp = *s++;
6716              if(tmp == '?') {
6717                 if (!PL_lex_allbrackets &&
6718                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6719                     s--;
6720                     TOKEN(0);
6721                 }
6722                 PL_lex_allbrackets++;
6723                 OPERATOR('?');
6724              }
6725              else {
6726                  tmp = *s++;
6727                  if(tmp == '/') {
6728                      /* A // operator. */
6729                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6730                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6731                                             LEX_FAKEEOF_LOGIC)) {
6732                         s -= 2;
6733                         TOKEN(0);
6734                     }
6735                     AOPERATOR(DORDOR);
6736                  }
6737                  else {
6738                      s--;
6739                      if (*s == '=' && !PL_lex_allbrackets &&
6740                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6741                          s--;
6742                          TOKEN(0);
6743                      }
6744                      Mop(OP_DIVIDE);
6745                  }
6746              }
6747          }
6748          else {
6749              /* Disable warning on "study /blah/" */
6750              if (PL_oldoldbufptr == PL_last_uni
6751               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6752                   || memNE(PL_last_uni, "study", 5)
6753                   || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6754               ))
6755                  check_uni();
6756              if (*s == '?')
6757                  deprecate("?PATTERN? without explicit operator");
6758              s = scan_pat(s,OP_MATCH);
6759              TERM(sublex_start());
6760          }
6761
6762     case '.':
6763         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6764 #ifdef PERL_STRICT_CR
6765             && s[1] == '\n'
6766 #else
6767             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6768 #endif
6769             && (s == PL_linestart || s[-1] == '\n') )
6770         {
6771             PL_expect = XSTATE;
6772             formbrack = 2; /* dot seen where arguments expected */
6773             goto rightbracket;
6774         }
6775         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6776             s += 3;
6777             OPERATOR(YADAYADA);
6778         }
6779         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6780             char tmp = *s++;
6781             if (*s == tmp) {
6782                 if (!PL_lex_allbrackets &&
6783                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6784                     s--;
6785                     TOKEN(0);
6786                 }
6787                 s++;
6788                 if (*s == tmp) {
6789                     s++;
6790                     pl_yylval.ival = OPf_SPECIAL;
6791                 }
6792                 else
6793                     pl_yylval.ival = 0;
6794                 OPERATOR(DOTDOT);
6795             }
6796             if (*s == '=' && !PL_lex_allbrackets &&
6797                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6798                 s--;
6799                 TOKEN(0);
6800             }
6801             Aop(OP_CONCAT);
6802         }
6803         /* FALL THROUGH */
6804     case '0': case '1': case '2': case '3': case '4':
6805     case '5': case '6': case '7': case '8': case '9':
6806         s = scan_num(s, &pl_yylval);
6807         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6808         if (PL_expect == XOPERATOR)
6809             no_op("Number",s);
6810         TERM(THING);
6811
6812     case '\'':
6813         s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6814         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6815         if (PL_expect == XOPERATOR) {
6816             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6817                 return deprecate_commaless_var_list();
6818             }
6819             else
6820                 no_op("String",s);
6821         }
6822         if (!s)
6823             missingterm(NULL);
6824         pl_yylval.ival = OP_CONST;
6825         TERM(sublex_start());
6826
6827     case '"':
6828         s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6829         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6830         if (PL_expect == XOPERATOR) {
6831             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6832                 return deprecate_commaless_var_list();
6833             }
6834             else
6835                 no_op("String",s);
6836         }
6837         if (!s)
6838             missingterm(NULL);
6839         pl_yylval.ival = OP_CONST;
6840         /* FIXME. I think that this can be const if char *d is replaced by
6841            more localised variables.  */
6842         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6843             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6844                 pl_yylval.ival = OP_STRINGIFY;
6845                 break;
6846             }
6847         }
6848         TERM(sublex_start());
6849
6850     case '`':
6851         s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6852         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6853         if (PL_expect == XOPERATOR)
6854             no_op("Backticks",s);
6855         if (!s)
6856             missingterm(NULL);
6857         readpipe_override();
6858         TERM(sublex_start());
6859
6860     case '\\':
6861         s++;
6862         if (PL_lex_inwhat && isDIGIT(*s))
6863             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6864                            *s, *s);
6865         if (PL_expect == XOPERATOR)
6866             no_op("Backslash",s);
6867         OPERATOR(REFGEN);
6868
6869     case 'v':
6870         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6871             char *start = s + 2;
6872             while (isDIGIT(*start) || *start == '_')
6873                 start++;
6874             if (*start == '.' && isDIGIT(start[1])) {
6875                 s = scan_num(s, &pl_yylval);
6876                 TERM(THING);
6877             }
6878             else if ((*start == ':' && start[1] == ':')
6879                   || (PL_expect == XSTATE && *start == ':'))
6880                 goto keylookup;
6881             else if (PL_expect == XSTATE) {
6882                 d = start;
6883                 while (d < PL_bufend && isSPACE(*d)) d++;
6884                 if (*d == ':') goto keylookup;
6885             }
6886             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6887             if (!isALPHA(*start) && (PL_expect == XTERM
6888                         || PL_expect == XREF || PL_expect == XSTATE
6889                         || PL_expect == XTERMORDORDOR)) {
6890                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6891                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6892                 if (!gv) {
6893                     s = scan_num(s, &pl_yylval);
6894                     TERM(THING);
6895                 }
6896             }
6897         }
6898         goto keylookup;
6899     case 'x':
6900         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6901             s++;
6902             Mop(OP_REPEAT);
6903         }
6904         goto keylookup;
6905
6906     case '_':
6907     case 'a': case 'A':
6908     case 'b': case 'B':
6909     case 'c': case 'C':
6910     case 'd': case 'D':
6911     case 'e': case 'E':
6912     case 'f': case 'F':
6913     case 'g': case 'G':
6914     case 'h': case 'H':
6915     case 'i': case 'I':
6916     case 'j': case 'J':
6917     case 'k': case 'K':
6918     case 'l': case 'L':
6919     case 'm': case 'M':
6920     case 'n': case 'N':
6921     case 'o': case 'O':
6922     case 'p': case 'P':
6923     case 'q': case 'Q':
6924     case 'r': case 'R':
6925     case 's': case 'S':
6926     case 't': case 'T':
6927     case 'u': case 'U':
6928               case 'V':
6929     case 'w': case 'W':
6930               case 'X':
6931     case 'y': case 'Y':
6932     case 'z': case 'Z':
6933
6934       keylookup: {
6935         bool anydelim;
6936         bool lex;
6937         I32 tmp;
6938         SV *sv;
6939         CV *cv;
6940         PADOFFSET off;
6941         OP *rv2cv_op;
6942
6943         lex = FALSE;
6944         orig_keyword = 0;
6945         off = 0;
6946         sv = NULL;
6947         cv = NULL;
6948         gv = NULL;
6949         gvp = NULL;
6950         rv2cv_op = NULL;
6951
6952         PL_bufptr = s;
6953         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6954
6955         /* Some keywords can be followed by any delimiter, including ':' */
6956         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6957
6958         /* x::* is just a word, unless x is "CORE" */
6959         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6960             goto just_a_word;
6961
6962         d = s;
6963         while (d < PL_bufend && isSPACE(*d))
6964                 d++;    /* no comments skipped here, or s### is misparsed */
6965
6966         /* Is this a word before a => operator? */
6967         if (*d == '=' && d[1] == '>') {
6968             CLINE;
6969             pl_yylval.opval
6970                 = (OP*)newSVOP(OP_CONST, 0,
6971                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6972             pl_yylval.opval->op_private = OPpCONST_BARE;
6973             TERM(WORD);
6974         }
6975
6976         /* Check for plugged-in keyword */
6977         {
6978             OP *o;
6979             int result;
6980             char *saved_bufptr = PL_bufptr;
6981             PL_bufptr = s;
6982             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6983             s = PL_bufptr;
6984             if (result == KEYWORD_PLUGIN_DECLINE) {
6985                 /* not a plugged-in keyword */
6986                 PL_bufptr = saved_bufptr;
6987             } else if (result == KEYWORD_PLUGIN_STMT) {
6988                 pl_yylval.opval = o;
6989                 CLINE;
6990                 PL_expect = XSTATE;
6991                 return REPORT(PLUGSTMT);
6992             } else if (result == KEYWORD_PLUGIN_EXPR) {
6993                 pl_yylval.opval = o;
6994                 CLINE;
6995                 PL_expect = XOPERATOR;
6996                 return REPORT(PLUGEXPR);
6997             } else {
6998                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6999                                         PL_tokenbuf);
7000             }
7001         }
7002
7003         /* Check for built-in keyword */
7004         tmp = keyword(PL_tokenbuf, len, 0);
7005
7006         /* Is this a label? */
7007         if (!anydelim && PL_expect == XSTATE
7008               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7009             s = d + 1;
7010             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7011             pl_yylval.pval[len] = '\0';
7012             pl_yylval.pval[len+1] = UTF ? 1 : 0;
7013             CLINE;
7014             TOKEN(LABEL);
7015         }
7016
7017         /* Check for lexical sub */
7018         if (PL_expect != XOPERATOR) {
7019             char tmpbuf[sizeof PL_tokenbuf + 1];
7020             *tmpbuf = '&';
7021             Copy(PL_tokenbuf, tmpbuf+1, len, char);
7022             off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7023             if (off != NOT_IN_PAD) {
7024                 assert(off); /* we assume this is boolean-true below */
7025                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7026                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
7027                     HEK * const stashname = HvNAME_HEK(stash);
7028                     sv = newSVhek(stashname);
7029                     sv_catpvs(sv, "::");
7030                     sv_catpvn_flags(sv, PL_tokenbuf, len,
7031                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
7032                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7033                                     SVt_PVCV);
7034                     off = 0;
7035                     if (!gv) {
7036                         sv_free(sv);
7037                         sv = NULL;
7038                         goto just_a_word;
7039                     }
7040                 }
7041                 else {
7042                     rv2cv_op = newOP(OP_PADANY, 0);
7043                     rv2cv_op->op_targ = off;
7044                     cv = find_lexical_cv(off);
7045                 }
7046                 lex = TRUE;
7047                 goto just_a_word;
7048             }
7049             off = 0;
7050         }
7051
7052         if (tmp < 0) {                  /* second-class keyword? */
7053             GV *ogv = NULL;     /* override (winner) */
7054             GV *hgv = NULL;     /* hidden (loser) */
7055             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7056                 CV *cv;
7057                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7058                                             UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
7059                     (cv = GvCVu(gv)))
7060                 {
7061                     if (GvIMPORTED_CV(gv))
7062                         ogv = gv;
7063                     else if (! CvMETHOD(cv))
7064                         hgv = gv;
7065                 }
7066                 if (!ogv &&
7067                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7068                                             UTF ? -(I32)len : (I32)len, FALSE)) &&
7069                     (gv = *gvp) && isGV_with_GP(gv) &&
7070                     GvCVu(gv) && GvIMPORTED_CV(gv))
7071                 {
7072                     ogv = gv;
7073                 }
7074             }
7075             if (ogv) {
7076                 orig_keyword = tmp;
7077                 tmp = 0;                /* overridden by import or by GLOBAL */
7078             }
7079             else if (gv && !gvp
7080                      && -tmp==KEY_lock  /* XXX generalizable kludge */
7081                      && GvCVu(gv))
7082             {
7083                 tmp = 0;                /* any sub overrides "weak" keyword */
7084             }
7085             else {                      /* no override */
7086                 tmp = -tmp;
7087                 if (tmp == KEY_dump) {
7088                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7089                                    "dump() better written as CORE::dump()");
7090                 }
7091                 gv = NULL;
7092                 gvp = 0;
7093                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
7094                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7095                                    "Ambiguous call resolved as CORE::%s(), "
7096                                    "qualify as such or use &",
7097                                    GvENAME(hgv));
7098             }
7099         }
7100
7101       reserved_word:
7102         switch (tmp) {
7103
7104         default:                        /* not a keyword */
7105             /* Trade off - by using this evil construction we can pull the
7106                variable gv into the block labelled keylookup. If not, then
7107                we have to give it function scope so that the goto from the
7108                earlier ':' case doesn't bypass the initialisation.  */
7109             if (0) {
7110             just_a_word_zero_gv:
7111                 sv = NULL;
7112                 cv = NULL;
7113                 gv = NULL;
7114                 gvp = NULL;
7115                 rv2cv_op = NULL;
7116                 orig_keyword = 0;
7117                 lex = 0;
7118                 off = 0;
7119             }
7120           just_a_word: {
7121                 int pkgname = 0;
7122                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7123                 const char penultchar =
7124                     lastchar && PL_bufptr - 2 >= PL_linestart
7125                          ? PL_bufptr[-2]
7126                          : 0;
7127 #ifdef PERL_MAD
7128                 SV *nextPL_nextwhite = 0;
7129 #endif
7130
7131
7132                 /* Get the rest if it looks like a package qualifier */
7133
7134                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7135                     STRLEN morelen;
7136                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7137                                   TRUE, &morelen);
7138                     if (!morelen)
7139                         Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7140                                 UTF8fARG(UTF, len, PL_tokenbuf),
7141                                 *s == '\'' ? "'" : "::");
7142                     len += morelen;
7143                     pkgname = 1;
7144                 }
7145
7146                 if (PL_expect == XOPERATOR) {
7147                     if (PL_bufptr == PL_linestart) {
7148                         CopLINE_dec(PL_curcop);
7149                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7150                         CopLINE_inc(PL_curcop);
7151                     }
7152                     else
7153                         no_op("Bareword",s);
7154                 }
7155
7156                 /* Look for a subroutine with this name in current package,
7157                    unless this is a lexical sub, or name is "Foo::",
7158                    in which case Foo is a bareword
7159                    (and a package name). */
7160
7161                 if (len > 2 && !PL_madskills &&
7162                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7163                 {
7164                     if (ckWARN(WARN_BAREWORD)
7165                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7166                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7167                           "Bareword \"%"UTF8f"\" refers to nonexistent package",
7168                            UTF8fARG(UTF, len, PL_tokenbuf));
7169                     len -= 2;
7170                     PL_tokenbuf[len] = '\0';
7171                     gv = NULL;
7172                     gvp = 0;
7173                 }
7174                 else {
7175                     if (!lex && !gv) {
7176                         /* Mustn't actually add anything to a symbol table.
7177                            But also don't want to "initialise" any placeholder
7178                            constants that might already be there into full
7179                            blown PVGVs with attached PVCV.  */
7180                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7181                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7182                                                SVt_PVCV);
7183                     }
7184                     len = 0;
7185                 }
7186
7187                 /* if we saw a global override before, get the right name */
7188
7189                 if (!sv)
7190                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7191                     len ? len : strlen(PL_tokenbuf));
7192                 if (gvp) {
7193                     SV * const tmp_sv = sv;
7194                     sv = newSVpvs("CORE::GLOBAL::");
7195                     sv_catsv(sv, tmp_sv);
7196                     SvREFCNT_dec(tmp_sv);
7197                 }
7198
7199 #ifdef PERL_MAD
7200                 if (PL_madskills && !PL_thistoken) {
7201                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7202                     PL_thistoken = newSVpvn(start,s - start);
7203                     PL_realtokenstart = s - SvPVX(PL_linestr);
7204                 }
7205 #endif
7206
7207                 /* Presume this is going to be a bareword of some sort. */
7208                 CLINE;
7209                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7210                 pl_yylval.opval->op_private = OPpCONST_BARE;
7211
7212                 /* And if "Foo::", then that's what it certainly is. */
7213                 if (len)
7214                     goto safe_bareword;
7215
7216                 if (!off)
7217                 {
7218                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7219                     const_op->op_private = OPpCONST_BARE;
7220                     rv2cv_op = newCVREF(0, const_op);
7221                     cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7222                 }
7223
7224                 /* See if it's the indirect object for a list operator. */
7225
7226                 if (PL_oldoldbufptr &&
7227                     PL_oldoldbufptr < PL_bufptr &&
7228                     (PL_oldoldbufptr == PL_last_lop
7229                      || PL_oldoldbufptr == PL_last_uni) &&
7230                     /* NO SKIPSPACE BEFORE HERE! */
7231                     (PL_expect == XREF ||
7232                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7233                 {
7234                     bool immediate_paren = *s == '(';
7235
7236                     /* (Now we can afford to cross potential line boundary.) */
7237                     s = SKIPSPACE2(s,nextPL_nextwhite);
7238 #ifdef PERL_MAD
7239                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
7240 #endif
7241
7242                     /* Two barewords in a row may indicate method call. */
7243
7244                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7245                         (tmp = intuit_method(s, gv, cv))) {
7246                         op_free(rv2cv_op);
7247                         if (tmp == METHOD && !PL_lex_allbrackets &&
7248                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7249                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7250                         return REPORT(tmp);
7251                     }
7252
7253                     /* If not a declared subroutine, it's an indirect object. */
7254                     /* (But it's an indir obj regardless for sort.) */
7255                     /* Also, if "_" follows a filetest operator, it's a bareword */
7256
7257                     if (
7258                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7259                          (!cv &&
7260                         (PL_last_lop_op != OP_MAPSTART &&
7261                          PL_last_lop_op != OP_GREPSTART))))
7262                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7263                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7264                        )
7265                     {
7266                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7267                         goto bareword;
7268                     }
7269                 }
7270
7271                 PL_expect = XOPERATOR;
7272 #ifdef PERL_MAD
7273                 if (isSPACE(*s))
7274                     s = SKIPSPACE2(s,nextPL_nextwhite);
7275                 PL_nextwhite = nextPL_nextwhite;
7276 #else
7277                 s = skipspace(s);
7278 #endif
7279
7280                 /* Is this a word before a => operator? */
7281                 if (*s == '=' && s[1] == '>' && !pkgname) {
7282                     op_free(rv2cv_op);
7283                     CLINE;
7284                     /* This is our own scalar, created a few lines above,
7285                        so this is safe. */
7286                     SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7287                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7288                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7289                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7290                     SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7291                     TERM(WORD);
7292                 }
7293
7294                 /* If followed by a paren, it's certainly a subroutine. */
7295                 if (*s == '(') {
7296                     CLINE;
7297                     if (cv) {
7298                         d = s + 1;
7299                         while (SPACE_OR_TAB(*d))
7300                             d++;
7301                         if (*d == ')' && (sv = cv_const_sv(cv))) {
7302                             s = d + 1;
7303                             goto its_constant;
7304                         }
7305                     }
7306 #ifdef PERL_MAD
7307                     if (PL_madskills) {
7308                         PL_nextwhite = PL_thiswhite;
7309                         PL_thiswhite = 0;
7310                     }
7311                     start_force(PL_curforce);
7312 #endif
7313                     NEXTVAL_NEXTTOKE.opval =
7314                         off ? rv2cv_op : pl_yylval.opval;
7315                     PL_expect = XOPERATOR;
7316 #ifdef PERL_MAD
7317                     if (PL_madskills) {
7318                         PL_nextwhite = nextPL_nextwhite;
7319                         curmad('X', PL_thistoken);
7320                         PL_thistoken = newSVpvs("");
7321                     }
7322 #endif
7323                     if (off)
7324                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7325                     else op_free(rv2cv_op),        force_next(WORD);
7326                     pl_yylval.ival = 0;
7327                     TOKEN('&');
7328                 }
7329
7330                 /* If followed by var or block, call it a method (unless sub) */
7331
7332                 if ((*s == '$' || *s == '{') && !cv) {
7333                     op_free(rv2cv_op);
7334                     PL_last_lop = PL_oldbufptr;
7335                     PL_last_lop_op = OP_METHOD;
7336                     if (!PL_lex_allbrackets &&
7337                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7338                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7339                     PREBLOCK(METHOD);
7340                 }
7341
7342                 /* If followed by a bareword, see if it looks like indir obj. */
7343
7344                 if (!orig_keyword
7345                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7346                         && (tmp = intuit_method(s, gv, cv))) {
7347                     op_free(rv2cv_op);
7348                     if (tmp == METHOD && !PL_lex_allbrackets &&
7349                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7350                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7351                     return REPORT(tmp);
7352                 }
7353
7354                 /* Not a method, so call it a subroutine (if defined) */
7355
7356                 if (cv) {
7357                     if (lastchar == '-' && penultchar != '-') {
7358                         const STRLEN l = len ? len : strlen(PL_tokenbuf);
7359                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7360                             "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7361                              UTF8fARG(UTF, l, PL_tokenbuf),
7362                              UTF8fARG(UTF, l, PL_tokenbuf));
7363                     }
7364                     /* Check for a constant sub */
7365                     if ((sv = cv_const_sv(cv))) {
7366                   its_constant:
7367                         op_free(rv2cv_op);
7368                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7369                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7370                         pl_yylval.opval->op_private = OPpCONST_FOLDED;
7371                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
7372                         TOKEN(WORD);
7373                     }
7374
7375                     op_free(pl_yylval.opval);
7376                     pl_yylval.opval =
7377                         off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7378                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7379                     PL_last_lop = PL_oldbufptr;
7380                     PL_last_lop_op = OP_ENTERSUB;
7381                     /* Is there a prototype? */
7382                     if (
7383 #ifdef PERL_MAD
7384                         cv &&
7385 #endif
7386                         SvPOK(cv))
7387                     {
7388                         STRLEN protolen = CvPROTOLEN(cv);
7389                         const char *proto = CvPROTO(cv);
7390                         bool optional;
7391                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7392                         if (!protolen)
7393                             TERM(FUNC0SUB);
7394                         if ((optional = *proto == ';'))
7395                           do
7396                             proto++;
7397                           while (*proto == ';');
7398                         if (
7399                             (
7400                                 (
7401                                     *proto == '$' || *proto == '_'
7402                                  || *proto == '*' || *proto == '+'
7403                                 )
7404                              && proto[1] == '\0'
7405                             )
7406                          || (
7407                              *proto == '\\' && proto[1] && proto[2] == '\0'
7408                             )
7409                         )
7410                             UNIPROTO(UNIOPSUB,optional);
7411                         if (*proto == '\\' && proto[1] == '[') {
7412                             const char *p = proto + 2;
7413                             while(*p && *p != ']')
7414                                 ++p;
7415                             if(*p == ']' && !p[1])
7416                                 UNIPROTO(UNIOPSUB,optional);
7417                         }
7418                         if (*proto == '&' && *s == '{') {
7419                             if (PL_curstash)
7420                                 sv_setpvs(PL_subname, "__ANON__");
7421                             else
7422                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7423                             if (!PL_lex_allbrackets &&
7424                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7425                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7426                             PREBLOCK(LSTOPSUB);
7427                         }
7428                     }
7429 #ifdef PERL_MAD
7430                     {
7431                         if (PL_madskills) {
7432                             PL_nextwhite = PL_thiswhite;
7433                             PL_thiswhite = 0;
7434                         }
7435                         start_force(PL_curforce);
7436                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7437                         PL_expect = XTERM;
7438                         if (PL_madskills) {
7439                             PL_nextwhite = nextPL_nextwhite;
7440                             curmad('X', PL_thistoken);
7441                             PL_thistoken = newSVpvs("");
7442                         }
7443                         force_next(off ? PRIVATEREF : WORD);
7444                         if (!PL_lex_allbrackets &&
7445                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7446                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7447                         TOKEN(NOAMP);
7448                     }
7449                 }
7450
7451                 /* Guess harder when madskills require "best effort". */
7452                 if (PL_madskills && (!gv || !GvCVu(gv))) {
7453                     int probable_sub = 0;
7454                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
7455                         probable_sub = 1;
7456                     else if (isALPHA(*s)) {
7457                         char tmpbuf[1024];
7458                         STRLEN tmplen;
7459                         d = s;
7460                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7461                         if (!keyword(tmpbuf, tmplen, 0))
7462                             probable_sub = 1;
7463                         else {
7464                             while (d < PL_bufend && isSPACE(*d))
7465                                 d++;
7466                             if (*d == '=' && d[1] == '>')
7467                                 probable_sub = 1;
7468                         }
7469                     }
7470                     if (probable_sub) {
7471                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7472                                         SVt_PVCV);
7473                         op_free(pl_yylval.opval);
7474                         pl_yylval.opval =
7475                             off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7476                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7477                         PL_last_lop = PL_oldbufptr;
7478                         PL_last_lop_op = OP_ENTERSUB;
7479                         PL_nextwhite = PL_thiswhite;
7480                         PL_thiswhite = 0;
7481                         start_force(PL_curforce);
7482                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7483                         PL_expect = XTERM;
7484                         PL_nextwhite = nextPL_nextwhite;
7485                         curmad('X', PL_thistoken);
7486                         PL_thistoken = newSVpvs("");
7487                         force_next(off ? PRIVATEREF : WORD);
7488                         if (!PL_lex_allbrackets &&
7489                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7490                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7491                         TOKEN(NOAMP);
7492                     }
7493 #else
7494                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7495                     PL_expect = XTERM;
7496                     force_next(off ? PRIVATEREF : WORD);
7497                     if (!PL_lex_allbrackets &&
7498                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7499                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7500                     TOKEN(NOAMP);
7501 #endif
7502                 }
7503
7504                 /* Call it a bare word */
7505
7506                 if (PL_hints & HINT_STRICT_SUBS)
7507                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7508                 else {
7509                 bareword:
7510                     /* after "print" and similar functions (corresponding to
7511                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7512                      * a filehandle should be subject to "strict subs".
7513                      * Likewise for the optional indirect-object argument to system
7514                      * or exec, which can't be a bareword */
7515                     if ((PL_last_lop_op == OP_PRINT
7516                             || PL_last_lop_op == OP_PRTF
7517                             || PL_last_lop_op == OP_SAY
7518                             || PL_last_lop_op == OP_SYSTEM
7519                             || PL_last_lop_op == OP_EXEC)
7520                             && (PL_hints & HINT_STRICT_SUBS))
7521                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7522                     if (lastchar != '-') {
7523                         if (ckWARN(WARN_RESERVED)) {
7524                             d = PL_tokenbuf;
7525                             while (isLOWER(*d))
7526                                 d++;
7527                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7528                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7529                                        PL_tokenbuf);
7530                         }
7531                     }
7532                 }
7533                 op_free(rv2cv_op);
7534
7535             safe_bareword:
7536                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7537                  && saw_infix_sigil) {
7538                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7539                                      "Operator or semicolon missing before %c%"UTF8f,
7540                                      lastchar,
7541                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7542                                               PL_tokenbuf));
7543                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7544                                      "Ambiguous use of %c resolved as operator %c",
7545                                      lastchar, lastchar);
7546                 }
7547                 TOKEN(WORD);
7548             }
7549
7550         case KEY___FILE__:
7551             FUN0OP(
7552                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7553             );
7554
7555         case KEY___LINE__:
7556             FUN0OP(
7557                 (OP*)newSVOP(OP_CONST, 0,
7558                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7559             );
7560
7561         case KEY___PACKAGE__:
7562             FUN0OP(
7563                 (OP*)newSVOP(OP_CONST, 0,
7564                                         (PL_curstash
7565                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7566                                          : &PL_sv_undef))
7567             );
7568
7569         case KEY___DATA__:
7570         case KEY___END__: {
7571             GV *gv;
7572             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7573                 const char *pname = "main";
7574                 STRLEN plen = 4;
7575                 U32 putf8 = 0;
7576                 if (PL_tokenbuf[2] == 'D')
7577                 {
7578                     HV * const stash =
7579                         PL_curstash ? PL_curstash : PL_defstash;
7580                     pname = HvNAME_get(stash);
7581                     plen  = HvNAMELEN (stash);
7582                     if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7583                 }
7584                 gv = gv_fetchpvn_flags(
7585                         Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7586                         plen+6, GV_ADD|putf8, SVt_PVIO
7587                 );
7588                 GvMULTI_on(gv);
7589                 if (!GvIO(gv))
7590                     GvIOp(gv) = newIO();
7591                 IoIFP(GvIOp(gv)) = PL_rsfp;
7592 #if defined(HAS_FCNTL) && defined(F_SETFD)
7593                 {
7594                     const int fd = PerlIO_fileno(PL_rsfp);
7595                     fcntl(fd,F_SETFD,fd >= 3);
7596                 }
7597 #endif
7598                 /* Mark this internal pseudo-handle as clean */
7599                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7600                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7601                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7602                 else
7603                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7604 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7605                 /* if the script was opened in binmode, we need to revert
7606                  * it to text mode for compatibility; but only iff it has CRs
7607                  * XXX this is a questionable hack at best. */
7608                 if (PL_bufend-PL_bufptr > 2
7609                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7610                 {
7611                     Off_t loc = 0;
7612                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7613                         loc = PerlIO_tell(PL_rsfp);
7614                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7615                     }
7616 #ifdef NETWARE
7617                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7618 #else
7619                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7620 #endif  /* NETWARE */
7621                         if (loc > 0)
7622                             PerlIO_seek(PL_rsfp, loc, 0);
7623                     }
7624                 }
7625 #endif
7626 #ifdef PERLIO_LAYERS
7627                 if (!IN_BYTES) {
7628                     if (UTF)
7629                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7630                     else if (PL_encoding) {
7631                         SV *name;
7632                         dSP;
7633                         ENTER;
7634                         SAVETMPS;
7635                         PUSHMARK(sp);
7636                         EXTEND(SP, 1);
7637                         XPUSHs(PL_encoding);
7638                         PUTBACK;
7639                         call_method("name", G_SCALAR);
7640                         SPAGAIN;
7641                         name = POPs;
7642                         PUTBACK;
7643                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7644                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7645                                                       SVfARG(name)));
7646                         FREETMPS;
7647                         LEAVE;
7648                     }
7649                 }
7650 #endif
7651 #ifdef PERL_MAD
7652                 if (PL_madskills) {
7653                     if (PL_realtokenstart >= 0) {
7654                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7655                         if (!PL_endwhite)
7656                             PL_endwhite = newSVpvs("");
7657                         sv_catsv(PL_endwhite, PL_thiswhite);
7658                         PL_thiswhite = 0;
7659                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7660                         PL_realtokenstart = -1;
7661                     }
7662                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7663                            != NULL) ;
7664                 }
7665 #endif
7666                 PL_rsfp = NULL;
7667             }
7668             goto fake_eof;
7669         }
7670
7671         case KEY___SUB__:
7672             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7673
7674         case KEY_AUTOLOAD:
7675         case KEY_DESTROY:
7676         case KEY_BEGIN:
7677         case KEY_UNITCHECK:
7678         case KEY_CHECK:
7679         case KEY_INIT:
7680         case KEY_END:
7681             if (PL_expect == XSTATE) {
7682                 s = PL_bufptr;
7683                 goto really_sub;
7684             }
7685             goto just_a_word;
7686
7687         case KEY_CORE:
7688             if (*s == ':' && s[1] == ':') {
7689                 STRLEN olen = len;
7690                 d = s;
7691                 s += 2;
7692                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7693                 if ((*s == ':' && s[1] == ':')
7694                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7695                 {
7696                     s = d;
7697                     len = olen;
7698                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7699                     goto just_a_word;
7700                 }
7701                 if (!tmp)
7702                     Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7703                                       UTF8fARG(UTF, len, PL_tokenbuf));
7704                 if (tmp < 0)
7705                     tmp = -tmp;
7706                 else if (tmp == KEY_require || tmp == KEY_do
7707                       || tmp == KEY_glob)
7708                     /* that's a way to remember we saw "CORE::" */
7709                     orig_keyword = tmp;
7710                 goto reserved_word;
7711             }
7712             goto just_a_word;
7713
7714         case KEY_abs:
7715             UNI(OP_ABS);
7716
7717         case KEY_alarm:
7718             UNI(OP_ALARM);
7719
7720         case KEY_accept:
7721             LOP(OP_ACCEPT,XTERM);
7722
7723         case KEY_and:
7724             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7725                 return REPORT(0);
7726             OPERATOR(ANDOP);
7727
7728         case KEY_atan2:
7729             LOP(OP_ATAN2,XTERM);
7730
7731         case KEY_bind:
7732             LOP(OP_BIND,XTERM);
7733
7734         case KEY_binmode:
7735             LOP(OP_BINMODE,XTERM);
7736
7737         case KEY_bless:
7738             LOP(OP_BLESS,XTERM);
7739
7740         case KEY_break:
7741             FUN0(OP_BREAK);
7742
7743         case KEY_chop:
7744             UNI(OP_CHOP);
7745
7746         case KEY_continue:
7747                     /* We have to disambiguate the two senses of
7748                       "continue". If the next token is a '{' then
7749                       treat it as the start of a continue block;
7750                       otherwise treat it as a control operator.
7751                      */
7752                     s = skipspace(s);
7753                     if (*s == '{')
7754             PREBLOCK(CONTINUE);
7755                     else
7756                         FUN0(OP_CONTINUE);
7757
7758         case KEY_chdir:
7759             /* may use HOME */
7760             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7761             UNI(OP_CHDIR);
7762
7763         case KEY_close:
7764             UNI(OP_CLOSE);
7765
7766         case KEY_closedir:
7767             UNI(OP_CLOSEDIR);
7768
7769         case KEY_cmp:
7770             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7771                 return REPORT(0);
7772             Eop(OP_SCMP);
7773
7774         case KEY_caller:
7775             UNI(OP_CALLER);
7776
7777         case KEY_crypt:
7778 #ifdef FCRYPT
7779             if (!PL_cryptseen) {
7780                 PL_cryptseen = TRUE;
7781                 init_des();
7782             }
7783 #endif
7784             LOP(OP_CRYPT,XTERM);
7785
7786         case KEY_chmod:
7787             LOP(OP_CHMOD,XTERM);
7788
7789         case KEY_chown:
7790             LOP(OP_CHOWN,XTERM);
7791
7792         case KEY_connect:
7793             LOP(OP_CONNECT,XTERM);
7794
7795         case KEY_chr:
7796             UNI(OP_CHR);
7797
7798         case KEY_cos:
7799             UNI(OP_COS);
7800
7801         case KEY_chroot:
7802             UNI(OP_CHROOT);
7803
7804         case KEY_default:
7805             PREBLOCK(DEFAULT);
7806
7807         case KEY_do:
7808             s = SKIPSPACE1(s);
7809             if (*s == '{')
7810                 PRETERMBLOCK(DO);
7811             if (*s != '\'') {
7812                 *PL_tokenbuf = '&';
7813                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7814                               1, &len);
7815                 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7816                     d = SKIPSPACE1(d);
7817                     if (*d == '(') {
7818                         force_ident_maybe_lex('&');
7819                         s = d;
7820                     }
7821                 }
7822             }
7823             if (orig_keyword == KEY_do) {
7824                 orig_keyword = 0;
7825                 pl_yylval.ival = 1;
7826             }
7827             else
7828                 pl_yylval.ival = 0;
7829             OPERATOR(DO);
7830
7831         case KEY_die:
7832             PL_hints |= HINT_BLOCK_SCOPE;
7833             LOP(OP_DIE,XTERM);
7834
7835         case KEY_defined:
7836             UNI(OP_DEFINED);
7837
7838         case KEY_delete:
7839             UNI(OP_DELETE);
7840
7841         case KEY_dbmopen:
7842             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7843                               STR_WITH_LEN("NDBM_File::"),
7844                               STR_WITH_LEN("DB_File::"),
7845                               STR_WITH_LEN("GDBM_File::"),
7846                               STR_WITH_LEN("SDBM_File::"),
7847                               STR_WITH_LEN("ODBM_File::"),
7848                               NULL);
7849             LOP(OP_DBMOPEN,XTERM);
7850
7851         case KEY_dbmclose:
7852             UNI(OP_DBMCLOSE);
7853
7854         case KEY_dump:
7855             PL_expect = XOPERATOR;
7856             s = force_word(s,WORD,TRUE,FALSE);
7857             LOOPX(OP_DUMP);
7858
7859         case KEY_else:
7860             PREBLOCK(ELSE);
7861
7862         case KEY_elsif:
7863             pl_yylval.ival = CopLINE(PL_curcop);
7864             OPERATOR(ELSIF);
7865
7866         case KEY_eq:
7867             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7868                 return REPORT(0);
7869             Eop(OP_SEQ);
7870
7871         case KEY_exists:
7872             UNI(OP_EXISTS);
7873
7874         case KEY_exit:
7875             if (PL_madskills)
7876                 UNI(OP_INT);
7877             UNI(OP_EXIT);
7878
7879         case KEY_eval:
7880             s = SKIPSPACE1(s);
7881             if (*s == '{') { /* block eval */
7882                 PL_expect = XTERMBLOCK;
7883                 UNIBRACK(OP_ENTERTRY);
7884             }
7885             else { /* string eval */
7886                 PL_expect = XTERM;
7887                 UNIBRACK(OP_ENTEREVAL);
7888             }
7889
7890         case KEY_evalbytes:
7891             PL_expect = XTERM;
7892             UNIBRACK(-OP_ENTEREVAL);
7893
7894         case KEY_eof:
7895             UNI(OP_EOF);
7896
7897         case KEY_exp:
7898             UNI(OP_EXP);
7899
7900         case KEY_each:
7901             UNI(OP_EACH);
7902
7903         case KEY_exec:
7904             LOP(OP_EXEC,XREF);
7905
7906         case KEY_endhostent:
7907             FUN0(OP_EHOSTENT);
7908
7909         case KEY_endnetent:
7910             FUN0(OP_ENETENT);
7911
7912         case KEY_endservent:
7913             FUN0(OP_ESERVENT);
7914
7915         case KEY_endprotoent:
7916             FUN0(OP_EPROTOENT);
7917
7918         case KEY_endpwent:
7919             FUN0(OP_EPWENT);
7920
7921         case KEY_endgrent:
7922             FUN0(OP_EGRENT);
7923
7924         case KEY_for:
7925         case KEY_foreach:
7926             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7927                 return REPORT(0);
7928             pl_yylval.ival = CopLINE(PL_curcop);
7929             s = SKIPSPACE1(s);
7930             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7931                 char *p = s;
7932 #ifdef PERL_MAD
7933                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7934 #endif
7935
7936                 if ((PL_bufend - p) >= 3 &&
7937                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7938                     p += 2;
7939                 else if ((PL_bufend - p) >= 4 &&
7940                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7941                     p += 3;
7942                 p = PEEKSPACE(p);
7943                 if (isIDFIRST_lazy_if(p,UTF)) {
7944                     p = scan_ident(p, PL_bufend,
7945                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7946                     p = PEEKSPACE(p);
7947                 }
7948                 if (*p != '$')
7949                     Perl_croak(aTHX_ "Missing $ on loop variable");
7950 #ifdef PERL_MAD
7951                 s = SvPVX(PL_linestr) + soff;
7952 #endif
7953             }
7954             OPERATOR(FOR);
7955
7956         case KEY_formline:
7957             LOP(OP_FORMLINE,XTERM);
7958
7959         case KEY_fork:
7960             FUN0(OP_FORK);
7961
7962         case KEY_fc:
7963             UNI(OP_FC);
7964
7965         case KEY_fcntl:
7966             LOP(OP_FCNTL,XTERM);
7967
7968         case KEY_fileno:
7969             UNI(OP_FILENO);
7970
7971         case KEY_flock:
7972             LOP(OP_FLOCK,XTERM);
7973
7974         case KEY_gt:
7975             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7976                 return REPORT(0);
7977             Rop(OP_SGT);
7978
7979         case KEY_ge:
7980             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7981                 return REPORT(0);
7982             Rop(OP_SGE);
7983
7984         case KEY_grep:
7985             LOP(OP_GREPSTART, XREF);
7986
7987         case KEY_goto:
7988             PL_expect = XOPERATOR;
7989             s = force_word(s,WORD,TRUE,FALSE);
7990             LOOPX(OP_GOTO);
7991
7992         case KEY_gmtime:
7993             UNI(OP_GMTIME);
7994
7995         case KEY_getc:
7996             UNIDOR(OP_GETC);
7997
7998         case KEY_getppid:
7999             FUN0(OP_GETPPID);
8000
8001         case KEY_getpgrp:
8002             UNI(OP_GETPGRP);
8003
8004         case KEY_getpriority:
8005             LOP(OP_GETPRIORITY,XTERM);
8006
8007         case KEY_getprotobyname:
8008             UNI(OP_GPBYNAME);
8009
8010         case KEY_getprotobynumber:
8011             LOP(OP_GPBYNUMBER,XTERM);
8012
8013         case KEY_getprotoent:
8014             FUN0(OP_GPROTOENT);
8015
8016         case KEY_getpwent:
8017             FUN0(OP_GPWENT);
8018
8019         case KEY_getpwnam:
8020             UNI(OP_GPWNAM);
8021
8022         case KEY_getpwuid:
8023             UNI(OP_GPWUID);
8024
8025         case KEY_getpeername:
8026             UNI(OP_GETPEERNAME);
8027
8028         case KEY_gethostbyname:
8029             UNI(OP_GHBYNAME);
8030
8031         case KEY_gethostbyaddr:
8032             LOP(OP_GHBYADDR,XTERM);
8033
8034         case KEY_gethostent:
8035             FUN0(OP_GHOSTENT);
8036
8037         case KEY_getnetbyname:
8038             UNI(OP_GNBYNAME);
8039
8040         case KEY_getnetbyaddr:
8041             LOP(OP_GNBYADDR,XTERM);
8042
8043         case KEY_getnetent:
8044             FUN0(OP_GNETENT);
8045
8046         case KEY_getservbyname:
8047             LOP(OP_GSBYNAME,XTERM);
8048
8049         case KEY_getservbyport:
8050             LOP(OP_GSBYPORT,XTERM);
8051
8052         case KEY_getservent:
8053             FUN0(OP_GSERVENT);
8054
8055         case KEY_getsockname:
8056             UNI(OP_GETSOCKNAME);
8057
8058         case KEY_getsockopt:
8059             LOP(OP_GSOCKOPT,XTERM);
8060
8061         case KEY_getgrent:
8062             FUN0(OP_GGRENT);
8063
8064         case KEY_getgrnam:
8065             UNI(OP_GGRNAM);
8066
8067         case KEY_getgrgid:
8068             UNI(OP_GGRGID);
8069
8070         case KEY_getlogin:
8071             FUN0(OP_GETLOGIN);
8072
8073         case KEY_given:
8074             pl_yylval.ival = CopLINE(PL_curcop);
8075             Perl_ck_warner_d(aTHX_
8076                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8077                 "given is experimental");
8078             OPERATOR(GIVEN);
8079
8080         case KEY_glob:
8081             LOP(
8082              orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
8083              XTERM
8084             );
8085
8086         case KEY_hex:
8087             UNI(OP_HEX);
8088
8089         case KEY_if:
8090             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8091                 return REPORT(0);
8092             pl_yylval.ival = CopLINE(PL_curcop);
8093             OPERATOR(IF);
8094
8095         case KEY_index:
8096             LOP(OP_INDEX,XTERM);
8097
8098         case KEY_int:
8099             UNI(OP_INT);
8100
8101         case KEY_ioctl:
8102             LOP(OP_IOCTL,XTERM);
8103
8104         case KEY_join:
8105             LOP(OP_JOIN,XTERM);
8106
8107         case KEY_keys:
8108             UNI(OP_KEYS);
8109
8110         case KEY_kill:
8111             LOP(OP_KILL,XTERM);
8112
8113         case KEY_last:
8114             PL_expect = XOPERATOR;
8115             s = force_word(s,WORD,TRUE,FALSE);
8116             LOOPX(OP_LAST);
8117
8118         case KEY_lc:
8119             UNI(OP_LC);
8120
8121         case KEY_lcfirst:
8122             UNI(OP_LCFIRST);
8123
8124         case KEY_local:
8125             pl_yylval.ival = 0;
8126             OPERATOR(LOCAL);
8127
8128         case KEY_length:
8129             UNI(OP_LENGTH);
8130
8131         case KEY_lt:
8132             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8133                 return REPORT(0);
8134             Rop(OP_SLT);
8135
8136         case KEY_le:
8137             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8138                 return REPORT(0);
8139             Rop(OP_SLE);
8140
8141         case KEY_localtime:
8142             UNI(OP_LOCALTIME);
8143
8144         case KEY_log:
8145             UNI(OP_LOG);
8146
8147         case KEY_link:
8148             LOP(OP_LINK,XTERM);
8149
8150         case KEY_listen:
8151             LOP(OP_LISTEN,XTERM);
8152
8153         case KEY_lock:
8154             UNI(OP_LOCK);
8155
8156         case KEY_lstat:
8157             UNI(OP_LSTAT);
8158
8159         case KEY_m:
8160             s = scan_pat(s,OP_MATCH);
8161             TERM(sublex_start());
8162
8163         case KEY_map:
8164             LOP(OP_MAPSTART, XREF);
8165
8166         case KEY_mkdir:
8167             LOP(OP_MKDIR,XTERM);
8168
8169         case KEY_msgctl:
8170             LOP(OP_MSGCTL,XTERM);
8171
8172         case KEY_msgget:
8173             LOP(OP_MSGGET,XTERM);
8174
8175         case KEY_msgrcv:
8176             LOP(OP_MSGRCV,XTERM);
8177
8178         case KEY_msgsnd:
8179             LOP(OP_MSGSND,XTERM);
8180
8181         case KEY_our:
8182         case KEY_my:
8183         case KEY_state:
8184             PL_in_my = (U16)tmp;
8185             s = SKIPSPACE1(s);
8186             if (isIDFIRST_lazy_if(s,UTF)) {
8187 #ifdef PERL_MAD
8188                 char* start = s;
8189 #endif
8190                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8191                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8192                 {
8193                     if (!FEATURE_LEXSUBS_IS_ENABLED)
8194                         Perl_croak(aTHX_
8195                                   "Experimental \"%s\" subs not enabled",
8196                                    tmp == KEY_my    ? "my"    :
8197                                    tmp == KEY_state ? "state" : "our");
8198                     Perl_ck_warner_d(aTHX_
8199                         packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8200                         "The lexical_subs feature is experimental");
8201                     goto really_sub;
8202                 }
8203                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8204                 if (!PL_in_my_stash) {
8205                     char tmpbuf[1024];
8206                     PL_bufptr = s;
8207                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8208                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8209                 }
8210 #ifdef PERL_MAD
8211                 if (PL_madskills) {     /* just add type to declarator token */
8212                     sv_catsv(PL_thistoken, PL_nextwhite);
8213                     PL_nextwhite = 0;
8214                     sv_catpvn(PL_thistoken, start, s - start);
8215                 }
8216 #endif
8217             }
8218             pl_yylval.ival = 1;
8219             OPERATOR(MY);
8220
8221         case KEY_next:
8222             PL_expect = XOPERATOR;
8223             s = force_word(s,WORD,TRUE,FALSE);
8224             LOOPX(OP_NEXT);
8225
8226         case KEY_ne:
8227             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8228                 return REPORT(0);
8229             Eop(OP_SNE);
8230
8231         case KEY_no:
8232             s = tokenize_use(0, s);
8233             TERM(USE);
8234
8235         case KEY_not:
8236             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8237                 FUN1(OP_NOT);
8238             else {
8239                 if (!PL_lex_allbrackets &&
8240                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8241                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8242                 OPERATOR(NOTOP);
8243             }
8244
8245         case KEY_open:
8246             s = SKIPSPACE1(s);
8247             if (isIDFIRST_lazy_if(s,UTF)) {
8248           const char *t;
8249           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8250               &len);
8251                 for (t=d; isSPACE(*t);)
8252                     t++;
8253                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8254                     /* [perl #16184] */
8255                     && !(t[0] == '=' && t[1] == '>')
8256                     && !(t[0] == ':' && t[1] == ':')
8257                     && !keyword(s, d-s, 0)
8258                 ) {
8259                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8260                        "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8261                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8262                 }
8263             }
8264             LOP(OP_OPEN,XTERM);
8265
8266         case KEY_or:
8267             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8268                 return REPORT(0);
8269             pl_yylval.ival = OP_OR;
8270             OPERATOR(OROP);
8271
8272         case KEY_ord:
8273             UNI(OP_ORD);
8274
8275         case KEY_oct:
8276             UNI(OP_OCT);
8277
8278         case KEY_opendir:
8279             LOP(OP_OPEN_DIR,XTERM);
8280
8281         case KEY_print:
8282             checkcomma(s,PL_tokenbuf,"filehandle");
8283             LOP(OP_PRINT,XREF);
8284
8285         case KEY_printf:
8286             checkcomma(s,PL_tokenbuf,"filehandle");
8287             LOP(OP_PRTF,XREF);
8288
8289         case KEY_prototype:
8290             UNI(OP_PROTOTYPE);
8291
8292         case KEY_push:
8293             LOP(OP_PUSH,XTERM);
8294
8295         case KEY_pop:
8296             UNIDOR(OP_POP);
8297
8298         case KEY_pos:
8299             UNIDOR(OP_POS);
8300
8301         case KEY_pack:
8302             LOP(OP_PACK,XTERM);
8303
8304         case KEY_package:
8305             s = force_word(s,WORD,FALSE,TRUE);
8306             s = SKIPSPACE1(s);
8307             s = force_strict_version(s);
8308             PL_lex_expect = XBLOCK;
8309             OPERATOR(PACKAGE);
8310
8311         case KEY_pipe:
8312             LOP(OP_PIPE_OP,XTERM);
8313
8314         case KEY_q:
8315             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8316             if (!s)
8317                 missingterm(NULL);
8318             pl_yylval.ival = OP_CONST;
8319             TERM(sublex_start());
8320
8321         case KEY_quotemeta:
8322             UNI(OP_QUOTEMETA);
8323
8324         case KEY_qw: {
8325             OP *words = NULL;
8326             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8327             if (!s)
8328                 missingterm(NULL);
8329             PL_expect = XOPERATOR;
8330             if (SvCUR(PL_lex_stuff)) {
8331                 int warned_comma = !ckWARN(WARN_QW);
8332                 int warned_comment = warned_comma;
8333                 d = SvPV_force(PL_lex_stuff, len);
8334                 while (len) {
8335                     for (; isSPACE(*d) && len; --len, ++d)
8336                         /**/;
8337                     if (len) {
8338                         SV *sv;
8339                         const char *b = d;
8340                         if (!warned_comma || !warned_comment) {
8341                             for (; !isSPACE(*d) && len; --len, ++d) {
8342                                 if (!warned_comma && *d == ',') {
8343                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8344                                         "Possible attempt to separate words with commas");
8345                                     ++warned_comma;
8346                                 }
8347                                 else if (!warned_comment && *d == '#') {
8348                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8349                                         "Possible attempt to put comments in qw() list");
8350                                     ++warned_comment;
8351                                 }
8352                             }
8353                         }
8354                         else {
8355                             for (; !isSPACE(*d) && len; --len, ++d)
8356                                 /**/;
8357                         }
8358                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8359                         words = op_append_elem(OP_LIST, words,
8360                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8361                     }
8362                 }
8363             }
8364             if (!words)
8365                 words = newNULLLIST();
8366             if (PL_lex_stuff) {
8367                 SvREFCNT_dec(PL_lex_stuff);
8368                 PL_lex_stuff = NULL;
8369             }
8370             PL_expect = XOPERATOR;
8371             pl_yylval.opval = sawparens(words);
8372             TOKEN(QWLIST);
8373         }
8374
8375         case KEY_qq:
8376             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8377             if (!s)
8378                 missingterm(NULL);
8379             pl_yylval.ival = OP_STRINGIFY;
8380             if (SvIVX(PL_lex_stuff) == '\'')
8381                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8382             TERM(sublex_start());
8383
8384         case KEY_qr:
8385             s = scan_pat(s,OP_QR);
8386             TERM(sublex_start());
8387
8388         case KEY_qx:
8389             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8390             if (!s)
8391                 missingterm(NULL);
8392             readpipe_override();
8393             TERM(sublex_start());
8394
8395         case KEY_return:
8396             OLDLOP(OP_RETURN);
8397
8398         case KEY_require:
8399             s = SKIPSPACE1(s);
8400             PL_expect = XOPERATOR;
8401             if (isDIGIT(*s)) {
8402                 s = force_version(s, FALSE);
8403             }
8404             else if (*s != 'v' || !isDIGIT(s[1])
8405                     || (s = force_version(s, TRUE), *s == 'v'))
8406             {
8407                 *PL_tokenbuf = '\0';
8408                 s = force_word(s,WORD,TRUE,TRUE);
8409                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8410                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8411                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8412                 else if (*s == '<')
8413                     yyerror("<> should be quotes");
8414             }
8415             if (orig_keyword == KEY_require) {
8416                 orig_keyword = 0;
8417                 pl_yylval.ival = 1;
8418             }
8419             else
8420                 pl_yylval.ival = 0;
8421             PL_expect = XTERM;
8422             PL_bufptr = s;
8423             PL_last_uni = PL_oldbufptr;
8424             PL_last_lop_op = OP_REQUIRE;
8425             s = skipspace(s);
8426             return REPORT( (int)REQUIRE );
8427
8428         case KEY_reset:
8429             UNI(OP_RESET);
8430
8431         case KEY_redo:
8432             PL_expect = XOPERATOR;
8433             s = force_word(s,WORD,TRUE,FALSE);
8434             LOOPX(OP_REDO);
8435
8436         case KEY_rename:
8437             LOP(OP_RENAME,XTERM);
8438
8439         case KEY_rand:
8440             UNI(OP_RAND);
8441
8442         case KEY_rmdir:
8443             UNI(OP_RMDIR);
8444
8445         case KEY_rindex:
8446             LOP(OP_RINDEX,XTERM);
8447
8448         case KEY_read:
8449             LOP(OP_READ,XTERM);
8450
8451         case KEY_readdir:
8452             UNI(OP_READDIR);
8453
8454         case KEY_readline:
8455             UNIDOR(OP_READLINE);
8456
8457         case KEY_readpipe:
8458             UNIDOR(OP_BACKTICK);
8459
8460         case KEY_rewinddir:
8461             UNI(OP_REWINDDIR);
8462
8463         case KEY_recv:
8464             LOP(OP_RECV,XTERM);
8465
8466         case KEY_reverse:
8467             LOP(OP_REVERSE,XTERM);
8468
8469         case KEY_readlink:
8470             UNIDOR(OP_READLINK);
8471
8472         case KEY_ref:
8473             UNI(OP_REF);
8474
8475         case KEY_s:
8476             s = scan_subst(s);
8477             if (pl_yylval.opval)
8478                 TERM(sublex_start());
8479             else
8480                 TOKEN(1);       /* force error */
8481
8482         case KEY_say:
8483             checkcomma(s,PL_tokenbuf,"filehandle");
8484             LOP(OP_SAY,XREF);
8485
8486         case KEY_chomp:
8487             UNI(OP_CHOMP);
8488
8489         case KEY_scalar:
8490             UNI(OP_SCALAR);
8491
8492         case KEY_select:
8493             LOP(OP_SELECT,XTERM);
8494
8495         case KEY_seek:
8496             LOP(OP_SEEK,XTERM);
8497
8498         case KEY_semctl:
8499             LOP(OP_SEMCTL,XTERM);
8500
8501         case KEY_semget:
8502             LOP(OP_SEMGET,XTERM);
8503
8504         case KEY_semop:
8505             LOP(OP_SEMOP,XTERM);
8506
8507         case KEY_send:
8508             LOP(OP_SEND,XTERM);
8509
8510         case KEY_setpgrp:
8511             LOP(OP_SETPGRP,XTERM);
8512
8513         case KEY_setpriority:
8514             LOP(OP_SETPRIORITY,XTERM);
8515
8516         case KEY_sethostent:
8517             UNI(OP_SHOSTENT);
8518
8519         case KEY_setnetent:
8520             UNI(OP_SNETENT);
8521
8522         case KEY_setservent:
8523             UNI(OP_SSERVENT);
8524
8525         case KEY_setprotoent:
8526             UNI(OP_SPROTOENT);
8527
8528         case KEY_setpwent:
8529             FUN0(OP_SPWENT);
8530
8531         case KEY_setgrent:
8532             FUN0(OP_SGRENT);
8533
8534         case KEY_seekdir:
8535             LOP(OP_SEEKDIR,XTERM);
8536
8537         case KEY_setsockopt:
8538             LOP(OP_SSOCKOPT,XTERM);
8539
8540         case KEY_shift:
8541             UNIDOR(OP_SHIFT);
8542
8543         case KEY_shmctl:
8544             LOP(OP_SHMCTL,XTERM);
8545
8546         case KEY_shmget:
8547             LOP(OP_SHMGET,XTERM);
8548
8549         case KEY_shmread:
8550             LOP(OP_SHMREAD,XTERM);
8551
8552         case KEY_shmwrite:
8553             LOP(OP_SHMWRITE,XTERM);
8554
8555         case KEY_shutdown:
8556             LOP(OP_SHUTDOWN,XTERM);
8557
8558         case KEY_sin:
8559             UNI(OP_SIN);
8560
8561         case KEY_sleep:
8562             UNI(OP_SLEEP);
8563
8564         case KEY_socket:
8565             LOP(OP_SOCKET,XTERM);
8566
8567         case KEY_socketpair:
8568             LOP(OP_SOCKPAIR,XTERM);
8569
8570         case KEY_sort:
8571             checkcomma(s,PL_tokenbuf,"subroutine name");
8572             s = SKIPSPACE1(s);
8573             PL_expect = XTERM;
8574             s = force_word(s,WORD,TRUE,TRUE);
8575             LOP(OP_SORT,XREF);
8576
8577         case KEY_split:
8578             LOP(OP_SPLIT,XTERM);
8579
8580         case KEY_sprintf:
8581             LOP(OP_SPRINTF,XTERM);
8582
8583         case KEY_splice:
8584             LOP(OP_SPLICE,XTERM);
8585
8586         case KEY_sqrt:
8587             UNI(OP_SQRT);
8588
8589         case KEY_srand:
8590             UNI(OP_SRAND);
8591
8592         case KEY_stat:
8593             UNI(OP_STAT);
8594
8595         case KEY_study:
8596             UNI(OP_STUDY);
8597
8598         case KEY_substr:
8599             LOP(OP_SUBSTR,XTERM);
8600
8601         case KEY_format:
8602         case KEY_sub:
8603           really_sub:
8604             {
8605                 char * const tmpbuf = PL_tokenbuf + 1;
8606                 expectation attrful;
8607                 bool have_name, have_proto;
8608                 const int key = tmp;
8609 #ifndef PERL_MAD
8610                 SV *format_name = NULL;
8611 #endif
8612
8613 #ifdef PERL_MAD
8614                 SV *tmpwhite = 0;
8615
8616                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8617                 SV *subtoken = PL_madskills
8618                    ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8619                    : NULL;
8620                 PL_thistoken = 0;
8621
8622                 d = s;
8623                 s = SKIPSPACE2(s,tmpwhite);
8624 #else
8625                 d = s;
8626                 s = skipspace(s);
8627 #endif
8628
8629                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8630                     (*s == ':' && s[1] == ':'))
8631                 {
8632 #ifdef PERL_MAD
8633                     SV *nametoke = NULL;
8634 #endif
8635
8636                     PL_expect = XBLOCK;
8637                     attrful = XATTRBLOCK;
8638                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8639                                   &len);
8640 #ifdef PERL_MAD
8641                     if (PL_madskills)
8642                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8643 #else
8644                     if (key == KEY_format)
8645                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8646 #endif
8647                     *PL_tokenbuf = '&';
8648                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8649                      || pad_findmy_pvn(
8650                             PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8651                         ) != NOT_IN_PAD)
8652                         sv_setpvn(PL_subname, tmpbuf, len);
8653                     else {
8654                         sv_setsv(PL_subname,PL_curstname);
8655                         sv_catpvs(PL_subname,"::");
8656                         sv_catpvn(PL_subname,tmpbuf,len);
8657                     }
8658                     if (SvUTF8(PL_linestr))
8659                         SvUTF8_on(PL_subname);
8660                     have_name = TRUE;
8661
8662
8663 #ifdef PERL_MAD
8664                     start_force(0);
8665                     CURMAD('X', nametoke);
8666                     CURMAD('_', tmpwhite);
8667                     force_ident_maybe_lex('&');
8668
8669                     s = SKIPSPACE2(d,tmpwhite);
8670 #else
8671                     s = skipspace(d);
8672 #endif
8673                 }
8674                 else {
8675                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8676                     {
8677                         *d = '\0';
8678                         /* diag_listed_as: Missing name in "%s sub" */
8679                         Perl_croak(aTHX_
8680                                   "Missing name in \"%s\"", PL_bufptr);
8681                     }
8682                     PL_expect = XTERMBLOCK;
8683                     attrful = XATTRTERM;
8684                     sv_setpvs(PL_subname,"?");
8685                     have_name = FALSE;
8686                 }
8687
8688                 if (key == KEY_format) {
8689 #ifdef PERL_MAD
8690                     PL_thistoken = subtoken;
8691                     s = d;
8692 #else
8693                     if (format_name) {
8694                         start_force(PL_curforce);
8695                         NEXTVAL_NEXTTOKE.opval
8696                             = (OP*)newSVOP(OP_CONST,0, format_name);
8697                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8698                         force_next(WORD);
8699                     }
8700 #endif
8701                     PREBLOCK(FORMAT);
8702                 }
8703
8704                 /* Look for a prototype */
8705                 if (*s == '(') {
8706                     s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8707                     if (!s)
8708                         Perl_croak(aTHX_ "Prototype not terminated");
8709                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8710                     have_proto = TRUE;
8711
8712 #ifdef PERL_MAD
8713                     start_force(0);
8714                     CURMAD('q', PL_thisopen);
8715                     CURMAD('_', tmpwhite);
8716                     CURMAD('=', PL_thisstuff);
8717                     CURMAD('Q', PL_thisclose);
8718                     NEXTVAL_NEXTTOKE.opval =
8719                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8720                     PL_lex_stuff = NULL;
8721                     force_next(THING);
8722
8723                     s = SKIPSPACE2(s,tmpwhite);
8724 #else
8725                     s = skipspace(s);
8726 #endif
8727                 }
8728                 else
8729                     have_proto = FALSE;
8730
8731                 if (*s == ':' && s[1] != ':')
8732                     PL_expect = attrful;
8733                 else if (*s != '{' && key == KEY_sub) {
8734                     if (!have_name)
8735                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8736                     else if (*s != ';' && *s != '}')
8737                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8738                 }
8739
8740 #ifdef PERL_MAD
8741                 start_force(0);
8742                 if (tmpwhite) {
8743                     if (PL_madskills)
8744                         curmad('^', newSVpvs(""));
8745                     CURMAD('_', tmpwhite);
8746                 }
8747                 force_next(0);
8748
8749                 PL_thistoken = subtoken;
8750                 PERL_UNUSED_VAR(have_proto);
8751 #else
8752                 if (have_proto) {
8753                     NEXTVAL_NEXTTOKE.opval =
8754                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8755                     PL_lex_stuff = NULL;
8756                     force_next(THING);
8757                 }
8758 #endif
8759                 if (!have_name) {
8760                     if (PL_curstash)
8761                         sv_setpvs(PL_subname, "__ANON__");
8762                     else
8763                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8764                     TOKEN(ANONSUB);
8765                 }
8766 #ifndef PERL_MAD
8767                 force_ident_maybe_lex('&');
8768 #endif
8769                 TOKEN(SUB);
8770             }
8771
8772         case KEY_system:
8773             LOP(OP_SYSTEM,XREF);
8774
8775         case KEY_symlink:
8776             LOP(OP_SYMLINK,XTERM);
8777
8778         case KEY_syscall:
8779             LOP(OP_SYSCALL,XTERM);
8780
8781         case KEY_sysopen:
8782             LOP(OP_SYSOPEN,XTERM);
8783
8784         case KEY_sysseek:
8785             LOP(OP_SYSSEEK,XTERM);
8786
8787         case KEY_sysread:
8788             LOP(OP_SYSREAD,XTERM);
8789
8790         case KEY_syswrite:
8791             LOP(OP_SYSWRITE,XTERM);
8792
8793         case KEY_tr:
8794         case KEY_y:
8795             s = scan_trans(s);
8796             TERM(sublex_start());
8797
8798         case KEY_tell:
8799             UNI(OP_TELL);
8800
8801         case KEY_telldir:
8802             UNI(OP_TELLDIR);
8803
8804         case KEY_tie:
8805             LOP(OP_TIE,XTERM);
8806
8807         case KEY_tied:
8808             UNI(OP_TIED);
8809
8810         case KEY_time:
8811             FUN0(OP_TIME);
8812
8813         case KEY_times:
8814             FUN0(OP_TMS);
8815
8816         case KEY_truncate:
8817             LOP(OP_TRUNCATE,XTERM);
8818
8819         case KEY_uc:
8820             UNI(OP_UC);
8821
8822         case KEY_ucfirst:
8823             UNI(OP_UCFIRST);
8824
8825         case KEY_untie:
8826             UNI(OP_UNTIE);
8827
8828         case KEY_until:
8829             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8830                 return REPORT(0);
8831             pl_yylval.ival = CopLINE(PL_curcop);
8832             OPERATOR(UNTIL);
8833
8834         case KEY_unless:
8835             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8836                 return REPORT(0);
8837             pl_yylval.ival = CopLINE(PL_curcop);
8838             OPERATOR(UNLESS);
8839
8840         case KEY_unlink:
8841             LOP(OP_UNLINK,XTERM);
8842
8843         case KEY_undef:
8844             UNIDOR(OP_UNDEF);
8845
8846         case KEY_unpack:
8847             LOP(OP_UNPACK,XTERM);
8848
8849         case KEY_utime:
8850             LOP(OP_UTIME,XTERM);
8851
8852         case KEY_umask:
8853             UNIDOR(OP_UMASK);
8854
8855         case KEY_unshift:
8856             LOP(OP_UNSHIFT,XTERM);
8857
8858         case KEY_use:
8859             s = tokenize_use(1, s);
8860             OPERATOR(USE);
8861
8862         case KEY_values:
8863             UNI(OP_VALUES);
8864
8865         case KEY_vec:
8866             LOP(OP_VEC,XTERM);
8867
8868         case KEY_when:
8869             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8870                 return REPORT(0);
8871             pl_yylval.ival = CopLINE(PL_curcop);
8872             Perl_ck_warner_d(aTHX_
8873                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8874                 "when is experimental");
8875             OPERATOR(WHEN);
8876
8877         case KEY_while:
8878             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8879                 return REPORT(0);
8880             pl_yylval.ival = CopLINE(PL_curcop);
8881             OPERATOR(WHILE);
8882
8883         case KEY_warn:
8884             PL_hints |= HINT_BLOCK_SCOPE;
8885             LOP(OP_WARN,XTERM);
8886
8887         case KEY_wait:
8888             FUN0(OP_WAIT);
8889
8890         case KEY_waitpid:
8891             LOP(OP_WAITPID,XTERM);
8892
8893         case KEY_wantarray:
8894             FUN0(OP_WANTARRAY);
8895
8896         case KEY_write:
8897 #ifdef EBCDIC
8898         {
8899             char ctl_l[2];
8900             ctl_l[0] = toCTRL('L');
8901             ctl_l[1] = '\0';
8902             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8903         }
8904 #else
8905             /* Make sure $^L is defined */
8906             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8907 #endif
8908             UNI(OP_ENTERWRITE);
8909
8910         case KEY_x:
8911             if (PL_expect == XOPERATOR) {
8912                 if (*s == '=' && !PL_lex_allbrackets &&
8913                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8914                     return REPORT(0);
8915                 Mop(OP_REPEAT);
8916             }
8917             check_uni();
8918             goto just_a_word;
8919
8920         case KEY_xor:
8921             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8922                 return REPORT(0);
8923             pl_yylval.ival = OP_XOR;
8924             OPERATOR(OROP);
8925         }
8926     }}
8927 }
8928 #ifdef __SC__
8929 #pragma segment Main
8930 #endif
8931
8932 /*
8933   S_pending_ident
8934
8935   Looks up an identifier in the pad or in a package
8936
8937   Returns:
8938     PRIVATEREF if this is a lexical name.
8939     WORD       if this belongs to a package.
8940
8941   Structure:
8942       if we're in a my declaration
8943           croak if they tried to say my($foo::bar)
8944           build the ops for a my() declaration
8945       if it's an access to a my() variable
8946           build ops for access to a my() variable
8947       if in a dq string, and they've said @foo and we can't find @foo
8948           warn
8949       build ops for a bareword
8950 */
8951
8952 static int
8953 S_pending_ident(pTHX)
8954 {
8955     dVAR;
8956     PADOFFSET tmp = 0;
8957     const char pit = (char)pl_yylval.ival;
8958     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8959     /* All routes through this function want to know if there is a colon.  */
8960     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8961
8962     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8963           "### Pending identifier '%s'\n", PL_tokenbuf); });
8964
8965     /* if we're in a my(), we can't allow dynamics here.
8966        $foo'bar has already been turned into $foo::bar, so
8967        just check for colons.
8968
8969        if it's a legal name, the OP is a PADANY.
8970     */
8971     if (PL_in_my) {
8972         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8973             if (has_colon)
8974                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8975                                   "variable %s in \"our\"",
8976                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8977             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8978         }
8979         else {
8980             if (has_colon)
8981                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8982                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8983                             UTF ? SVf_UTF8 : 0);
8984
8985             pl_yylval.opval = newOP(OP_PADANY, 0);
8986             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8987                                                         UTF ? SVf_UTF8 : 0);
8988             return PRIVATEREF;
8989         }
8990     }
8991
8992     /*
8993        build the ops for accesses to a my() variable.
8994     */
8995
8996     if (!has_colon) {
8997         if (!PL_in_my)
8998             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8999                                     UTF ? SVf_UTF8 : 0);
9000         if (tmp != NOT_IN_PAD) {
9001             /* might be an "our" variable" */
9002             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9003                 /* build ops for a bareword */
9004                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9005                 HEK * const stashname = HvNAME_HEK(stash);
9006                 SV *  const sym = newSVhek(stashname);
9007                 sv_catpvs(sym, "::");
9008                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9009                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9010                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9011                 if (pit != '&')
9012                   gv_fetchsv(sym,
9013                     (PL_in_eval
9014                         ? (GV_ADDMULTI | GV_ADDINEVAL)
9015                         : GV_ADDMULTI
9016                     ),
9017                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9018                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9019                      : SVt_PVHV));
9020                 return WORD;
9021             }
9022
9023             pl_yylval.opval = newOP(OP_PADANY, 0);
9024             pl_yylval.opval->op_targ = tmp;
9025             return PRIVATEREF;
9026         }
9027     }
9028
9029     /*
9030        Whine if they've said @foo in a doublequoted string,
9031        and @foo isn't a variable we can find in the symbol
9032        table.
9033     */
9034     if (ckWARN(WARN_AMBIGUOUS) &&
9035         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9036         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9037                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9038         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9039                 /* DO NOT warn for @- and @+ */
9040                 && !( PL_tokenbuf[2] == '\0' &&
9041                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9042            )
9043         {
9044             /* Downgraded from fatal to warning 20000522 mjd */
9045             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9046                         "Possible unintended interpolation of %"UTF8f
9047                         " in string",
9048                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9049         }
9050     }
9051
9052     /* build ops for a bareword */
9053     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9054                                    newSVpvn_flags(PL_tokenbuf + 1,
9055                                                       tokenbuf_len - 1,
9056                                                       UTF ? SVf_UTF8 : 0 ));
9057     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9058     if (pit != '&')
9059         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9060                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9061                      | ( UTF ? SVf_UTF8 : 0 ),
9062                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9063                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9064                       : SVt_PVHV));
9065     return WORD;
9066 }
9067
9068 STATIC void
9069 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9070 {
9071     dVAR;
9072
9073     PERL_ARGS_ASSERT_CHECKCOMMA;
9074
9075     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9076         if (ckWARN(WARN_SYNTAX)) {
9077             int level = 1;
9078             const char *w;
9079             for (w = s+2; *w && level; w++) {
9080                 if (*w == '(')
9081                     ++level;
9082                 else if (*w == ')')
9083                     --level;
9084             }
9085             while (isSPACE(*w))
9086                 ++w;
9087             /* the list of chars below is for end of statements or
9088              * block / parens, boolean operators (&&, ||, //) and branch
9089              * constructs (or, and, if, until, unless, while, err, for).
9090              * Not a very solid hack... */
9091             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9092                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9093                             "%s (...) interpreted as function",name);
9094         }
9095     }
9096     while (s < PL_bufend && isSPACE(*s))
9097         s++;
9098     if (*s == '(')
9099         s++;
9100     while (s < PL_bufend && isSPACE(*s))
9101         s++;
9102     if (isIDFIRST_lazy_if(s,UTF)) {
9103         const char * const w = s;
9104         s += UTF ? UTF8SKIP(s) : 1;
9105         while (isWORDCHAR_lazy_if(s,UTF))
9106             s += UTF ? UTF8SKIP(s) : 1;
9107         while (s < PL_bufend && isSPACE(*s))
9108             s++;
9109         if (*s == ',') {
9110             GV* gv;
9111             if (keyword(w, s - w, 0))
9112                 return;
9113
9114             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9115             if (gv && GvCVu(gv))
9116                 return;
9117             Perl_croak(aTHX_ "No comma allowed after %s", what);
9118         }
9119     }
9120 }
9121
9122 /* S_new_constant(): do any overload::constant lookup.
9123
9124    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9125    Best used as sv=new_constant(..., sv, ...).
9126    If s, pv are NULL, calls subroutine with one argument,
9127    and <type> is used with error messages only.
9128    <type> is assumed to be well formed UTF-8 */
9129
9130 STATIC SV *
9131 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9132                SV *sv, SV *pv, const char *type, STRLEN typelen)
9133 {
9134     dVAR; dSP;
9135     HV * table = GvHV(PL_hintgv);                /* ^H */
9136     SV *res;
9137     SV *errsv = NULL;
9138     SV **cvp;
9139     SV *cv, *typesv;
9140     const char *why1 = "", *why2 = "", *why3 = "";
9141
9142     PERL_ARGS_ASSERT_NEW_CONSTANT;
9143     /* We assume that this is true: */
9144     if (*key == 'c') { assert (strEQ(key, "charnames")); }
9145     assert(type || s);
9146
9147     /* charnames doesn't work well if there have been errors found */
9148     if (PL_error_count > 0 && *key == 'c')
9149     {
9150         SvREFCNT_dec_NN(sv);
9151         return &PL_sv_undef;
9152     }
9153
9154     sv_2mortal(sv);                     /* Parent created it permanently */
9155     if (!table
9156         || ! (PL_hints & HINT_LOCALIZE_HH)
9157         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9158         || ! SvOK(*cvp))
9159     {
9160         char *msg;
9161
9162         /* Here haven't found what we're looking for.  If it is charnames,
9163          * perhaps it needs to be loaded.  Try doing that before giving up */
9164         if (*key == 'c') {
9165             Perl_load_module(aTHX_
9166                             0,
9167                             newSVpvs("_charnames"),
9168                              /* version parameter; no need to specify it, as if
9169                               * we get too early a version, will fail anyway,
9170                               * not being able to find '_charnames' */
9171                             NULL,
9172                             newSVpvs(":full"),
9173                             newSVpvs(":short"),
9174                             NULL);
9175             SPAGAIN;
9176             table = GvHV(PL_hintgv);
9177             if (table
9178                 && (PL_hints & HINT_LOCALIZE_HH)
9179                 && (cvp = hv_fetch(table, key, keylen, FALSE))
9180                 && SvOK(*cvp))
9181             {
9182                 goto now_ok;
9183             }
9184         }
9185         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9186             msg = Perl_form(aTHX_
9187                                "Constant(%.*s) unknown",
9188                                 (int)(type ? typelen : len),
9189                                 (type ? type: s));
9190         }
9191         else {
9192             why1 = "$^H{";
9193             why2 = key;
9194             why3 = "} is not defined";
9195         report:
9196             if (*key == 'c') {
9197                 msg = Perl_form(aTHX_
9198                             /* The +3 is for '\N{'; -4 for that, plus '}' */
9199                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9200                       );
9201             }
9202             else {
9203                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9204                                     (int)(type ? typelen : len),
9205                                     (type ? type: s), why1, why2, why3);
9206             }
9207         }
9208         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9209         return SvREFCNT_inc_simple_NN(sv);
9210     }
9211 now_ok:
9212     cv = *cvp;
9213     if (!pv && s)
9214         pv = newSVpvn_flags(s, len, SVs_TEMP);
9215     if (type && pv)
9216         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9217     else
9218         typesv = &PL_sv_undef;
9219
9220     PUSHSTACKi(PERLSI_OVERLOAD);
9221     ENTER ;
9222     SAVETMPS;
9223
9224     PUSHMARK(SP) ;
9225     EXTEND(sp, 3);
9226     if (pv)
9227         PUSHs(pv);
9228     PUSHs(sv);
9229     if (pv)
9230         PUSHs(typesv);
9231     PUTBACK;
9232     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9233
9234     SPAGAIN ;
9235
9236     /* Check the eval first */
9237     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9238         STRLEN errlen;
9239         const char * errstr;
9240         sv_catpvs(errsv, "Propagated");
9241         errstr = SvPV_const(errsv, errlen);
9242         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9243         (void)POPs;
9244         res = SvREFCNT_inc_simple_NN(sv);
9245     }
9246     else {
9247         res = POPs;
9248         SvREFCNT_inc_simple_void_NN(res);
9249     }
9250
9251     PUTBACK ;
9252     FREETMPS ;
9253     LEAVE ;
9254     POPSTACK;
9255
9256     if (!SvOK(res)) {
9257         why1 = "Call to &{$^H{";
9258         why2 = key;
9259         why3 = "}} did not return a defined value";
9260         sv = res;
9261         (void)sv_2mortal(sv);
9262         goto report;
9263     }
9264
9265     return res;
9266 }
9267
9268 PERL_STATIC_INLINE void
9269 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9270     dVAR;
9271     PERL_ARGS_ASSERT_PARSE_IDENT;
9272
9273     for (;;) {
9274         if (*d >= e)
9275             Perl_croak(aTHX_ "%s", ident_too_long);
9276         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9277              /* The UTF-8 case must come first, otherwise things
9278              * like c\N{COMBINING TILDE} would start failing, as the
9279              * isWORDCHAR_A case below would gobble the 'c' up.
9280              */
9281
9282             char *t = *s + UTF8SKIP(*s);
9283             while (isIDCONT_utf8((U8*)t))
9284                 t += UTF8SKIP(t);
9285             if (*d + (t - *s) > e)
9286                 Perl_croak(aTHX_ "%s", ident_too_long);
9287             Copy(*s, *d, t - *s, char);
9288             *d += t - *s;
9289             *s = t;
9290         }
9291         else if ( isWORDCHAR_A(**s) ) {
9292             do {
9293                 *(*d)++ = *(*s)++;
9294             } while isWORDCHAR_A(**s);
9295         }
9296         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9297             *(*d)++ = ':';
9298             *(*d)++ = ':';
9299             (*s)++;
9300         }
9301         else if (allow_package && **s == ':' && (*s)[1] == ':'
9302            /* Disallow things like Foo::$bar. For the curious, this is
9303             * the code path that triggers the "Bad name after" warning
9304             * when looking for barewords.
9305             */
9306            && (*s)[2] != '$') {
9307             *(*d)++ = *(*s)++;
9308             *(*d)++ = *(*s)++;
9309         }
9310         else
9311             break;
9312     }
9313     return;
9314 }
9315
9316 /* Returns a NUL terminated string, with the length of the string written to
9317    *slp
9318    */
9319 STATIC char *
9320 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9321 {
9322     dVAR;
9323     char *d = dest;
9324     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9325     bool is_utf8 = cBOOL(UTF);
9326
9327     PERL_ARGS_ASSERT_SCAN_WORD;
9328
9329     parse_ident(&s, &d, e, allow_package, is_utf8);
9330     *d = '\0';
9331     *slp = d - dest;
9332     return s;
9333 }
9334
9335 STATIC char *
9336 S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9337 {
9338     dVAR;
9339     char *bracket = NULL;
9340     char funny = *s++;
9341     char *d = dest;
9342     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9343     bool is_utf8 = cBOOL(UTF);
9344
9345     PERL_ARGS_ASSERT_SCAN_IDENT;
9346
9347     if (isSPACE(*s))
9348         s = PEEKSPACE(s);
9349     if (isDIGIT(*s)) {
9350         while (isDIGIT(*s)) {
9351             if (d >= e)
9352                 Perl_croak(aTHX_ "%s", ident_too_long);
9353             *d++ = *s++;
9354         }
9355     }
9356     else {
9357         parse_ident(&s, &d, e, 1, is_utf8);
9358     }
9359     *d = '\0';
9360     d = dest;
9361     if (*d) {
9362         /* Either a digit variable, or parse_ident() found an identifier
9363            (anything valid as a bareword), so job done and return.  */
9364         if (PL_lex_state != LEX_NORMAL)
9365             PL_lex_state = LEX_INTERPENDMAYBE;
9366         return s;
9367     }
9368     if (*s == '$' && s[1] &&
9369       (isIDFIRST_lazy_if(s+1,is_utf8)
9370          || isDIGIT_A((U8)s[1])
9371          || s[1] == '$'
9372          || s[1] == '{'
9373          || strnEQ(s+1,"::",2)) )
9374     {
9375         /* Dereferencing a value in a scalar variable.
9376            The alternatives are different syntaxes for a scalar variable.
9377            Using ' as a leading package separator isn't allowed. :: is.   */
9378         return s;
9379     }
9380     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9381     if (*s == '{') {
9382         bracket = s;
9383         s++;
9384         while (s < send && SPACE_OR_TAB(*s))
9385            s++;
9386     }
9387
9388 #define VALID_LEN_ONE_IDENT(d, u)     (isPUNCT_A((U8)(d))     \
9389                                         || isCNTRL_A((U8)(d)) \
9390                                         || isDIGIT_A((U8)(d)) \
9391                                         || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
9392     if (s < send
9393         && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9394     {
9395         if (is_utf8) {
9396             const STRLEN skip = UTF8SKIP(s);
9397             STRLEN i;
9398             d[skip] = '\0';
9399             for ( i = 0; i < skip; i++ )
9400                 d[i] = *s++;
9401         }
9402         else {
9403             *d = *s++;
9404             d[1] = '\0';
9405         }
9406     }
9407     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9408     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9409         *d = toCTRL(*s);
9410         s++;
9411     }
9412     /* Warn about ambiguous code after unary operators if {...} notation isn't
9413        used.  There's no difference in ambiguity; it's merely a heuristic
9414        about when not to warn.  */
9415     else if (ck_uni && !bracket)
9416         check_uni();
9417     if (bracket) {
9418         /* If we were processing {...} notation then...  */
9419         if (isIDFIRST_lazy_if(d,is_utf8)) {
9420             /* if it starts as a valid identifier, assume that it is one.
9421                (the later check for } being at the expected point will trap
9422                cases where this doesn't pan out.)  */
9423         d += is_utf8 ? UTF8SKIP(d) : 1;
9424         parse_ident(&s, &d, e, 1, is_utf8);
9425             *d = '\0';
9426             while (s < send && SPACE_OR_TAB(*s))
9427                 s++;
9428             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9429                 /* ${foo[0]} and ${foo{bar}} notation.  */
9430                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9431                     const char * const brack =
9432                         (const char *)
9433                         ((*s == '[') ? "[...]" : "{...}");
9434    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9435                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9436                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9437                         funny, dest, brack, funny, dest, brack);
9438                 }
9439                 bracket++;
9440                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9441                 PL_lex_allbrackets++;
9442                 return s;
9443             }
9444         }
9445         /* Handle extended ${^Foo} variables
9446          * 1999-02-27 mjd-perl-patch@plover.com */
9447         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9448                  && isWORDCHAR(*s))
9449         {
9450             d++;
9451             while (isWORDCHAR(*s) && d < e) {
9452                 *d++ = *s++;
9453             }
9454             if (d >= e)
9455                 Perl_croak(aTHX_ "%s", ident_too_long);
9456             *d = '\0';
9457         }
9458
9459         while (s < send && SPACE_OR_TAB(*s))
9460             s++;
9461
9462         /* Expect to find a closing } after consuming any trailing whitespace.
9463          */
9464         if (*s == '}') {
9465             s++;
9466             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9467                 PL_lex_state = LEX_INTERPEND;
9468                 PL_expect = XREF;
9469             }
9470             if (PL_lex_state == LEX_NORMAL) {
9471                 if (ckWARN(WARN_AMBIGUOUS) &&
9472                     (keyword(dest, d - dest, 0)
9473                      || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9474                 {
9475                     SV *tmp = newSVpvn_flags( dest, d - dest,
9476                                             SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9477                     if (funny == '#')
9478                         funny = '@';
9479                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9480                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9481                         funny, tmp, funny, tmp);
9482                 }
9483             }
9484         }
9485         else {
9486             /* Didn't find the closing } at the point we expected, so restore
9487                state such that the next thing to process is the opening { and */
9488             s = bracket;                /* let the parser handle it */
9489             *dest = '\0';
9490         }
9491     }
9492     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9493         PL_lex_state = LEX_INTERPEND;
9494     return s;
9495 }
9496
9497 static bool
9498 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9499
9500     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9501      * the parse starting at 's', based on the subset that are valid in this
9502      * context input to this routine in 'valid_flags'. Advances s.  Returns
9503      * TRUE if the input should be treated as a valid flag, so the next char
9504      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9505      * first call on the current regex.  This routine will set it to any
9506      * charset modifier found.  The caller shouldn't change it.  This way,
9507      * another charset modifier encountered in the parse can be detected as an
9508      * error, as we have decided to allow only one */
9509
9510     const char c = **s;
9511     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9512
9513     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9514         if (isWORDCHAR_lazy_if(*s, UTF)) {
9515             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9516                        UTF ? SVf_UTF8 : 0);
9517             (*s) += charlen;
9518             /* Pretend that it worked, so will continue processing before
9519              * dieing */
9520             return TRUE;
9521         }
9522         return FALSE;
9523     }
9524
9525     switch (c) {
9526
9527         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9528         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9529         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9530         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9531         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9532         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9533         case LOCALE_PAT_MOD:
9534             if (*charset) {
9535                 goto multiple_charsets;
9536             }
9537             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9538             *charset = c;
9539             break;
9540         case UNICODE_PAT_MOD:
9541             if (*charset) {
9542                 goto multiple_charsets;
9543             }
9544             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9545             *charset = c;
9546             break;
9547         case ASCII_RESTRICT_PAT_MOD:
9548             if (! *charset) {
9549                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9550             }
9551             else {
9552
9553                 /* Error if previous modifier wasn't an 'a', but if it was, see
9554                  * if, and accept, a second occurrence (only) */
9555                 if (*charset != 'a'
9556                     || get_regex_charset(*pmfl)
9557                         != REGEX_ASCII_RESTRICTED_CHARSET)
9558                 {
9559                         goto multiple_charsets;
9560                 }
9561                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9562             }
9563             *charset = c;
9564             break;
9565         case DEPENDS_PAT_MOD:
9566             if (*charset) {
9567                 goto multiple_charsets;
9568             }
9569             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9570             *charset = c;
9571             break;
9572     }
9573
9574     (*s)++;
9575     return TRUE;
9576
9577     multiple_charsets:
9578         if (*charset != c) {
9579             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9580         }
9581         else if (c == 'a') {
9582             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9583         }
9584         else {
9585             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9586         }
9587
9588         /* Pretend that it worked, so will continue processing before dieing */
9589         (*s)++;
9590         return TRUE;
9591 }
9592
9593 STATIC char *
9594 S_scan_pat(pTHX_ char *start, I32 type)
9595 {
9596     dVAR;
9597     PMOP *pm;
9598     char *s;
9599     const char * const valid_flags =
9600         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9601     char charset = '\0';    /* character set modifier */
9602 #ifdef PERL_MAD
9603     char *modstart;
9604 #endif
9605
9606     PERL_ARGS_ASSERT_SCAN_PAT;
9607
9608     s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9609                        TRUE /* look for escaped bracketed metas */ );
9610
9611     if (!s) {
9612         const char * const delimiter = skipspace(start);
9613         Perl_croak(aTHX_
9614                    (const char *)
9615                    (*delimiter == '?'
9616                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9617                     : "Search pattern not terminated" ));
9618     }
9619
9620     pm = (PMOP*)newPMOP(type, 0);
9621     if (PL_multi_open == '?') {
9622         /* This is the only point in the code that sets PMf_ONCE:  */
9623         pm->op_pmflags |= PMf_ONCE;
9624
9625         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9626            allows us to restrict the list needed by reset to just the ??
9627            matches.  */
9628         assert(type != OP_TRANS);
9629         if (PL_curstash) {
9630             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9631             U32 elements;
9632             if (!mg) {
9633                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9634                                  0);
9635             }
9636             elements = mg->mg_len / sizeof(PMOP**);
9637             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9638             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9639             mg->mg_len = elements * sizeof(PMOP**);
9640             PmopSTASH_set(pm,PL_curstash);
9641         }
9642     }
9643 #ifdef PERL_MAD
9644     modstart = s;
9645 #endif
9646
9647     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9648      * anon CV. False positives like qr/[(?{]/ are harmless */
9649
9650     if (type == OP_QR) {
9651         STRLEN len;
9652         char *e, *p = SvPV(PL_lex_stuff, len);
9653         e = p + len;
9654         for (; p < e; p++) {
9655             if (p[0] == '(' && p[1] == '?'
9656                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9657             {
9658                 pm->op_pmflags |= PMf_HAS_CV;
9659                 break;
9660             }
9661         }
9662         pm->op_pmflags |= PMf_IS_QR;
9663     }
9664
9665     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9666 #ifdef PERL_MAD
9667     if (PL_madskills && modstart != s) {
9668         SV* tmptoken = newSVpvn(modstart, s - modstart);
9669         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9670     }
9671 #endif
9672     /* issue a warning if /c is specified,but /g is not */
9673     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9674     {
9675         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9676                        "Use of /c modifier is meaningless without /g" );
9677     }
9678
9679     PL_lex_op = (OP*)pm;
9680     pl_yylval.ival = OP_MATCH;
9681     return s;
9682 }
9683
9684 STATIC char *
9685 S_scan_subst(pTHX_ char *start)
9686 {
9687     dVAR;
9688     char *s;
9689     PMOP *pm;
9690     I32 first_start;
9691     I32 es = 0;
9692     char charset = '\0';    /* character set modifier */
9693 #ifdef PERL_MAD
9694     char *modstart;
9695 #endif
9696
9697     PERL_ARGS_ASSERT_SCAN_SUBST;
9698
9699     pl_yylval.ival = OP_NULL;
9700
9701     s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9702                  TRUE /* look for escaped bracketed metas */ );
9703
9704     if (!s)
9705         Perl_croak(aTHX_ "Substitution pattern not terminated");
9706
9707     if (s[-1] == PL_multi_open)
9708         s--;
9709 #ifdef PERL_MAD
9710     if (PL_madskills) {
9711         CURMAD('q', PL_thisopen);
9712         CURMAD('_', PL_thiswhite);
9713         CURMAD('E', PL_thisstuff);
9714         CURMAD('Q', PL_thisclose);
9715         PL_realtokenstart = s - SvPVX(PL_linestr);
9716     }
9717 #endif
9718
9719     first_start = PL_multi_start;
9720     s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9721     if (!s) {
9722         if (PL_lex_stuff) {
9723             SvREFCNT_dec(PL_lex_stuff);
9724             PL_lex_stuff = NULL;
9725         }
9726         Perl_croak(aTHX_ "Substitution replacement not terminated");
9727     }
9728     PL_multi_start = first_start;       /* so whole substitution is taken together */
9729
9730     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9731
9732 #ifdef PERL_MAD
9733     if (PL_madskills) {
9734         CURMAD('z', PL_thisopen);
9735         CURMAD('R', PL_thisstuff);
9736         CURMAD('Z', PL_thisclose);
9737     }
9738     modstart = s;
9739 #endif
9740
9741     while (*s) {
9742         if (*s == EXEC_PAT_MOD) {
9743             s++;
9744             es++;
9745         }
9746         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9747         {
9748             break;
9749         }
9750     }
9751
9752 #ifdef PERL_MAD
9753     if (PL_madskills) {
9754         if (modstart != s)
9755             curmad('m', newSVpvn(modstart, s - modstart));
9756         append_madprops(PL_thismad, (OP*)pm, 0);
9757         PL_thismad = 0;
9758     }
9759 #endif
9760     if ((pm->op_pmflags & PMf_CONTINUE)) {
9761         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9762     }
9763
9764     if (es) {
9765         SV * const repl = newSVpvs("");
9766
9767         PL_multi_end = 0;
9768         pm->op_pmflags |= PMf_EVAL;
9769         while (es-- > 0) {
9770             if (es)
9771                 sv_catpvs(repl, "eval ");
9772             else
9773                 sv_catpvs(repl, "do ");
9774         }
9775         sv_catpvs(repl, "{");
9776         sv_catsv(repl, PL_sublex_info.repl);
9777         sv_catpvs(repl, "}");
9778         SvEVALED_on(repl);
9779         SvREFCNT_dec(PL_sublex_info.repl);
9780         PL_sublex_info.repl = repl;
9781     }
9782
9783     PL_lex_op = (OP*)pm;
9784     pl_yylval.ival = OP_SUBST;
9785     return s;
9786 }
9787
9788 STATIC char *
9789 S_scan_trans(pTHX_ char *start)
9790 {
9791     dVAR;
9792     char* s;
9793     OP *o;
9794     U8 squash;
9795     U8 del;
9796     U8 complement;
9797     bool nondestruct = 0;
9798 #ifdef PERL_MAD
9799     char *modstart;
9800 #endif
9801
9802     PERL_ARGS_ASSERT_SCAN_TRANS;
9803
9804     pl_yylval.ival = OP_NULL;
9805
9806     s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
9807     if (!s)
9808         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9809
9810     if (s[-1] == PL_multi_open)
9811         s--;
9812 #ifdef PERL_MAD
9813     if (PL_madskills) {
9814         CURMAD('q', PL_thisopen);
9815         CURMAD('_', PL_thiswhite);
9816         CURMAD('E', PL_thisstuff);
9817         CURMAD('Q', PL_thisclose);
9818         PL_realtokenstart = s - SvPVX(PL_linestr);
9819     }
9820 #endif
9821
9822     s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9823     if (!s) {
9824         if (PL_lex_stuff) {
9825             SvREFCNT_dec(PL_lex_stuff);
9826             PL_lex_stuff = NULL;
9827         }
9828         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9829     }
9830     if (PL_madskills) {
9831         CURMAD('z', PL_thisopen);
9832         CURMAD('R', PL_thisstuff);
9833         CURMAD('Z', PL_thisclose);
9834     }
9835
9836     complement = del = squash = 0;
9837 #ifdef PERL_MAD
9838     modstart = s;
9839 #endif
9840     while (1) {
9841         switch (*s) {
9842         case 'c':
9843             complement = OPpTRANS_COMPLEMENT;
9844             break;
9845         case 'd':
9846             del = OPpTRANS_DELETE;
9847             break;
9848         case 's':
9849             squash = OPpTRANS_SQUASH;
9850             break;
9851         case 'r':
9852             nondestruct = 1;
9853             break;
9854         default:
9855             goto no_more;
9856         }
9857         s++;
9858     }
9859   no_more:
9860
9861     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9862     o->op_private &= ~OPpTRANS_ALL;
9863     o->op_private |= del|squash|complement|
9864       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9865       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9866
9867     PL_lex_op = o;
9868     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9869
9870 #ifdef PERL_MAD
9871     if (PL_madskills) {
9872         if (modstart != s)
9873             curmad('m', newSVpvn(modstart, s - modstart));
9874         append_madprops(PL_thismad, o, 0);
9875         PL_thismad = 0;
9876     }
9877 #endif
9878
9879     return s;
9880 }
9881
9882 /* scan_heredoc
9883    Takes a pointer to the first < in <<FOO.
9884    Returns a pointer to the byte following <<FOO.
9885
9886    This function scans a heredoc, which involves different methods
9887    depending on whether we are in a string eval, quoted construct, etc.
9888    This is because PL_linestr could containing a single line of input, or
9889    a whole string being evalled, or the contents of the current quote-
9890    like operator.
9891
9892    The two basic methods are:
9893     - Steal lines from the input stream
9894     - Scan the heredoc in PL_linestr and remove it therefrom
9895
9896    In a file scope or filtered eval, the first method is used; in a
9897    string eval, the second.
9898
9899    In a quote-like operator, we have to choose between the two,
9900    depending on where we can find a newline.  We peek into outer lex-
9901    ing scopes until we find one with a newline in it.  If we reach the
9902    outermost lexing scope and it is a file, we use the stream method.
9903    Otherwise it is treated as an eval.
9904 */
9905
9906 STATIC char *
9907 S_scan_heredoc(pTHX_ char *s)
9908 {
9909     dVAR;
9910     I32 op_type = OP_SCALAR;
9911     I32 len;
9912     SV *tmpstr;
9913     char term;
9914     char *d;
9915     char *e;
9916     char *peek;
9917     const bool infile = PL_rsfp || PL_parser->filtered;
9918     LEXSHARED *shared = PL_parser->lex_shared;
9919 #ifdef PERL_MAD
9920     I32 stuffstart = s - SvPVX(PL_linestr);
9921     char *tstart;
9922
9923     PL_realtokenstart = -1;
9924 #endif
9925
9926     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9927
9928     s += 2;
9929     d = PL_tokenbuf + 1;
9930     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9931     *PL_tokenbuf = '\n';
9932     peek = s;
9933     while (SPACE_OR_TAB(*peek))
9934         peek++;
9935     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9936         s = peek;
9937         term = *s++;
9938         s = delimcpy(d, e, s, PL_bufend, term, &len);
9939         if (s == PL_bufend)
9940             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9941         d += len;
9942         s++;
9943     }
9944     else {
9945         if (*s == '\\')
9946             /* <<\FOO is equivalent to <<'FOO' */
9947             s++, term = '\'';
9948         else
9949             term = '"';
9950         if (!isWORDCHAR_lazy_if(s,UTF))
9951             deprecate("bare << to mean <<\"\"");
9952         for (; isWORDCHAR_lazy_if(s,UTF); s++) {
9953             if (d < e)
9954                 *d++ = *s;
9955         }
9956     }
9957     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9958         Perl_croak(aTHX_ "Delimiter for here document is too long");
9959     *d++ = '\n';
9960     *d = '\0';
9961     len = d - PL_tokenbuf;
9962
9963 #ifdef PERL_MAD
9964     if (PL_madskills) {
9965         tstart = PL_tokenbuf + 1;
9966         PL_thisclose = newSVpvn(tstart, len - 1);
9967         tstart = SvPVX(PL_linestr) + stuffstart;
9968         PL_thisopen = newSVpvn(tstart, s - tstart);
9969         stuffstart = s - SvPVX(PL_linestr);
9970     }
9971 #endif
9972 #ifndef PERL_STRICT_CR
9973     d = strchr(s, '\r');
9974     if (d) {
9975         char * const olds = s;
9976         s = d;
9977         while (s < PL_bufend) {
9978             if (*s == '\r') {
9979                 *d++ = '\n';
9980                 if (*++s == '\n')
9981                     s++;
9982             }
9983             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9984                 *d++ = *s++;
9985                 s++;
9986             }
9987             else
9988                 *d++ = *s++;
9989         }
9990         *d = '\0';
9991         PL_bufend = d;
9992         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9993         s = olds;
9994     }
9995 #endif
9996 #ifdef PERL_MAD
9997     if (PL_madskills) {
9998         tstart = SvPVX(PL_linestr) + stuffstart;
9999         if (PL_thisstuff)
10000             sv_catpvn(PL_thisstuff, tstart, s - tstart);
10001         else
10002             PL_thisstuff = newSVpvn(tstart, s - tstart);
10003     }
10004
10005     stuffstart = s - SvPVX(PL_linestr);
10006 #endif
10007
10008     tmpstr = newSV_type(SVt_PVIV);
10009     SvGROW(tmpstr, 80);
10010     if (term == '\'') {
10011         op_type = OP_CONST;
10012         SvIV_set(tmpstr, -1);
10013     }
10014     else if (term == '`') {
10015         op_type = OP_BACKTICK;
10016         SvIV_set(tmpstr, '\\');
10017     }
10018
10019     PL_multi_start = CopLINE(PL_curcop) + 1;
10020     PL_multi_open = PL_multi_close = '<';
10021     /* inside a string eval or quote-like operator */
10022     if (!infile || PL_lex_inwhat) {
10023         SV *linestr;
10024         char *bufend;
10025         char * const olds = s;
10026         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10027         /* These two fields are not set until an inner lexing scope is
10028            entered.  But we need them set here. */
10029         shared->ls_bufptr  = s;
10030         shared->ls_linestr = PL_linestr;
10031         if (PL_lex_inwhat)
10032           /* Look for a newline.  If the current buffer does not have one,
10033              peek into the line buffer of the parent lexing scope, going
10034              up as many levels as necessary to find one with a newline
10035              after bufptr.
10036            */
10037           while (!(s = (char *)memchr(
10038                     (void *)shared->ls_bufptr, '\n',
10039                     SvEND(shared->ls_linestr)-shared->ls_bufptr
10040                 ))) {
10041             shared = shared->ls_prev;
10042             /* shared is only null if we have gone beyond the outermost
10043                lexing scope.  In a file, we will have broken out of the
10044                loop in the previous iteration.  In an eval, the string buf-
10045                fer ends with "\n;", so the while condition below will have
10046                evaluated to false.  So shared can never be null. */
10047             assert(shared);
10048             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10049                most lexing scope.  In a file, shared->ls_linestr at that
10050                level is just one line, so there is no body to steal. */
10051             if (infile && !shared->ls_prev) {
10052                 s = olds;
10053                 goto streaming;
10054             }
10055           }
10056         else {  /* eval */
10057             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10058             assert(s);
10059         }
10060         linestr = shared->ls_linestr;
10061         bufend = SvEND(linestr);
10062         d = s;
10063         while (s < bufend - len + 1 &&
10064           memNE(s,PL_tokenbuf,len) ) {
10065             if (*s++ == '\n')
10066                 ++shared->herelines;
10067         }
10068         if (s >= bufend - len + 1) {
10069             goto interminable;
10070         }
10071         sv_setpvn(tmpstr,d+1,s-d);
10072 #ifdef PERL_MAD
10073         if (PL_madskills) {
10074             if (PL_thisstuff)
10075                 sv_catpvn(PL_thisstuff, d + 1, s - d);
10076             else
10077                 PL_thisstuff = newSVpvn(d + 1, s - d);
10078             stuffstart = s - SvPVX(PL_linestr);
10079         }
10080 #endif
10081         s += len - 1;
10082         /* the preceding stmt passes a newline */
10083         shared->herelines++;
10084
10085         /* s now points to the newline after the heredoc terminator.
10086            d points to the newline before the body of the heredoc.
10087          */
10088
10089         /* We are going to modify linestr in place here, so set
10090            aside copies of the string if necessary for re-evals or
10091            (caller $n)[6]. */
10092         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10093            check shared->re_eval_str. */
10094         if (shared->re_eval_start || shared->re_eval_str) {
10095             /* Set aside the rest of the regexp */
10096             if (!shared->re_eval_str)
10097                 shared->re_eval_str =
10098                        newSVpvn(shared->re_eval_start,
10099                                 bufend - shared->re_eval_start);
10100             shared->re_eval_start -= s-d;
10101         }
10102         if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10103             CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10104             cx->blk_eval.cur_text == linestr)
10105         {
10106             cx->blk_eval.cur_text = newSVsv(linestr);
10107             SvSCREAM_on(cx->blk_eval.cur_text);
10108         }
10109         /* Copy everything from s onwards back to d. */
10110         Move(s,d,bufend-s + 1,char);
10111         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10112         /* Setting PL_bufend only applies when we have not dug deeper
10113            into other scopes, because sublex_done sets PL_bufend to
10114            SvEND(PL_linestr). */
10115         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10116         s = olds;
10117     }
10118     else
10119     {
10120       SV *linestr_save;
10121      streaming:
10122       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
10123       term = PL_tokenbuf[1];
10124       len--;
10125       linestr_save = PL_linestr; /* must restore this afterwards */
10126       d = s;                     /* and this */
10127       PL_linestr = newSVpvs("");
10128       PL_bufend = SvPVX(PL_linestr);
10129       while (1) {
10130 #ifdef PERL_MAD
10131         if (PL_madskills) {
10132             tstart = SvPVX(PL_linestr) + stuffstart;
10133             if (PL_thisstuff)
10134                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10135             else
10136                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10137         }
10138 #endif
10139         PL_bufptr = PL_bufend;
10140         CopLINE_set(PL_curcop,
10141                     PL_multi_start + shared->herelines);
10142         if (!lex_next_chunk(LEX_NO_TERM)
10143          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10144             SvREFCNT_dec(linestr_save);
10145             goto interminable;
10146         }
10147         CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10148         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10149             lex_grow_linestr(SvCUR(PL_linestr) + 2);
10150             sv_catpvs(PL_linestr, "\n\0");
10151         }
10152         s = PL_bufptr;
10153 #ifdef PERL_MAD
10154         stuffstart = s - SvPVX(PL_linestr);
10155 #endif
10156         shared->herelines++;
10157         PL_last_lop = PL_last_uni = NULL;
10158 #ifndef PERL_STRICT_CR
10159         if (PL_bufend - PL_linestart >= 2) {
10160             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10161                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10162             {
10163                 PL_bufend[-2] = '\n';
10164                 PL_bufend--;
10165                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10166             }
10167             else if (PL_bufend[-1] == '\r')
10168                 PL_bufend[-1] = '\n';
10169         }
10170         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10171             PL_bufend[-1] = '\n';
10172 #endif
10173         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10174             SvREFCNT_dec(PL_linestr);
10175             PL_linestr = linestr_save;
10176             PL_linestart = SvPVX(linestr_save);
10177             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10178             s = d;
10179             break;
10180         }
10181         else {
10182             sv_catsv(tmpstr,PL_linestr);
10183         }
10184       }
10185     }
10186     PL_multi_end = CopLINE(PL_curcop);
10187     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10188         SvPV_shrink_to_cur(tmpstr);
10189     }
10190     if (!IN_BYTES) {
10191         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10192             SvUTF8_on(tmpstr);
10193         else if (PL_encoding)
10194             sv_recode_to_utf8(tmpstr, PL_encoding);
10195     }
10196     PL_lex_stuff = tmpstr;
10197     pl_yylval.ival = op_type;
10198     return s;
10199
10200   interminable:
10201     SvREFCNT_dec(tmpstr);
10202     CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10203     missingterm(PL_tokenbuf + 1);
10204 }
10205
10206 /* scan_inputsymbol
10207    takes: current position in input buffer
10208    returns: new position in input buffer
10209    side-effects: pl_yylval and lex_op are set.
10210
10211    This code handles:
10212
10213    <>           read from ARGV
10214    <FH>         read from filehandle
10215    <pkg::FH>    read from package qualified filehandle
10216    <pkg'FH>     read from package qualified filehandle
10217    <$fh>        read from filehandle in $fh
10218    <*.h>        filename glob
10219
10220 */
10221
10222 STATIC char *
10223 S_scan_inputsymbol(pTHX_ char *start)
10224 {
10225     dVAR;
10226     char *s = start;            /* current position in buffer */
10227     char *end;
10228     I32 len;
10229     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10230     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10231
10232     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10233
10234     end = strchr(s, '\n');
10235     if (!end)
10236         end = PL_bufend;
10237     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
10238
10239     /* die if we didn't have space for the contents of the <>,
10240        or if it didn't end, or if we see a newline
10241     */
10242
10243     if (len >= (I32)sizeof PL_tokenbuf)
10244         Perl_croak(aTHX_ "Excessively long <> operator");
10245     if (s >= end)
10246         Perl_croak(aTHX_ "Unterminated <> operator");
10247
10248     s++;
10249
10250     /* check for <$fh>
10251        Remember, only scalar variables are interpreted as filehandles by
10252        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10253        treated as a glob() call.
10254        This code makes use of the fact that except for the $ at the front,
10255        a scalar variable and a filehandle look the same.
10256     */
10257     if (*d == '$' && d[1]) d++;
10258
10259     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10260     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10261         d += UTF ? UTF8SKIP(d) : 1;
10262
10263     /* If we've tried to read what we allow filehandles to look like, and
10264        there's still text left, then it must be a glob() and not a getline.
10265        Use scan_str to pull out the stuff between the <> and treat it
10266        as nothing more than a string.
10267     */
10268
10269     if (d - PL_tokenbuf != len) {
10270         pl_yylval.ival = OP_GLOB;
10271         s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
10272         if (!s)
10273            Perl_croak(aTHX_ "Glob not terminated");
10274         return s;
10275     }
10276     else {
10277         bool readline_overriden = FALSE;
10278         GV *gv_readline;
10279         GV **gvp;
10280         /* we're in a filehandle read situation */
10281         d = PL_tokenbuf;
10282
10283         /* turn <> into <ARGV> */
10284         if (!len)
10285             Copy("ARGV",d,5,char);
10286
10287         /* Check whether readline() is overriden */
10288         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10289         if ((gv_readline
10290                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10291                 ||
10292                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10293                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
10294                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10295             readline_overriden = TRUE;
10296
10297         /* if <$fh>, create the ops to turn the variable into a
10298            filehandle
10299         */
10300         if (*d == '$') {
10301             /* try to find it in the pad for this block, otherwise find
10302                add symbol table ops
10303             */
10304             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10305             if (tmp != NOT_IN_PAD) {
10306                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10307                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10308                     HEK * const stashname = HvNAME_HEK(stash);
10309                     SV * const sym = sv_2mortal(newSVhek(stashname));
10310                     sv_catpvs(sym, "::");
10311                     sv_catpv(sym, d+1);
10312                     d = SvPVX(sym);
10313                     goto intro_sym;
10314                 }
10315                 else {
10316                     OP * const o = newOP(OP_PADSV, 0);
10317                     o->op_targ = tmp;
10318                     PL_lex_op = readline_overriden
10319                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10320                                 op_append_elem(OP_LIST, o,
10321                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10322                         : (OP*)newUNOP(OP_READLINE, 0, o);
10323                 }
10324             }
10325             else {
10326                 GV *gv;
10327                 ++d;
10328 intro_sym:
10329                 gv = gv_fetchpv(d,
10330                                 (PL_in_eval
10331                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
10332                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10333                                 SVt_PV);
10334                 PL_lex_op = readline_overriden
10335                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10336                             op_append_elem(OP_LIST,
10337                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10338                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10339                     : (OP*)newUNOP(OP_READLINE, 0,
10340                             newUNOP(OP_RV2SV, 0,
10341                                 newGVOP(OP_GV, 0, gv)));
10342             }
10343             if (!readline_overriden)
10344                 PL_lex_op->op_flags |= OPf_SPECIAL;
10345             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10346             pl_yylval.ival = OP_NULL;
10347         }
10348
10349         /* If it's none of the above, it must be a literal filehandle
10350            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10351         else {
10352             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10353             PL_lex_op = readline_overriden
10354                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10355                         op_append_elem(OP_LIST,
10356                             newGVOP(OP_GV, 0, gv),
10357                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10358                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10359             pl_yylval.ival = OP_NULL;
10360         }
10361     }
10362
10363     return s;
10364 }
10365
10366
10367 /* scan_str
10368    takes: start position in buffer
10369           keep_quoted preserve \ on the embedded delimiter(s)
10370           keep_delims preserve the delimiters around the string
10371           re_reparse  compiling a run-time /(?{})/:
10372                         collapse // to /,  and skip encoding src
10373    returns: position to continue reading from buffer
10374    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10375         updates the read buffer.
10376
10377    This subroutine pulls a string out of the input.  It is called for:
10378         q               single quotes           q(literal text)
10379         '               single quotes           'literal text'
10380         qq              double quotes           qq(interpolate $here please)
10381         "               double quotes           "interpolate $here please"
10382         qx              backticks               qx(/bin/ls -l)
10383         `               backticks               `/bin/ls -l`
10384         qw              quote words             @EXPORT_OK = qw( func() $spam )
10385         m//             regexp match            m/this/
10386         s///            regexp substitute       s/this/that/
10387         tr///           string transliterate    tr/this/that/
10388         y///            string transliterate    y/this/that/
10389         ($*@)           sub prototypes          sub foo ($)
10390         (stuff)         sub attr parameters     sub foo : attr(stuff)
10391         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10392
10393    In most of these cases (all but <>, patterns and transliterate)
10394    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10395    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10396    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10397    calls scan_str().
10398
10399    It skips whitespace before the string starts, and treats the first
10400    character as the delimiter.  If the delimiter is one of ([{< then
10401    the corresponding "close" character )]}> is used as the closing
10402    delimiter.  It allows quoting of delimiters, and if the string has
10403    balanced delimiters ([{<>}]) it allows nesting.
10404
10405    On success, the SV with the resulting string is put into lex_stuff or,
10406    if that is already non-NULL, into lex_repl. The second case occurs only
10407    when parsing the RHS of the special constructs s/// and tr/// (y///).
10408    For convenience, the terminating delimiter character is stuffed into
10409    SvIVX of the SV.
10410 */
10411
10412 STATIC char *
10413 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10414         bool deprecate_escaped_meta /* Should we issue a deprecation warning
10415                                        for certain paired metacharacters that
10416                                        appear escaped within it */
10417     )
10418 {
10419     dVAR;
10420     SV *sv;                     /* scalar value: string */
10421     const char *tmps;           /* temp string, used for delimiter matching */
10422     char *s = start;            /* current position in the buffer */
10423     char term;                  /* terminating character */
10424     char *to;                   /* current position in the sv's data */
10425     I32 brackets = 1;           /* bracket nesting level */
10426     bool has_utf8 = FALSE;      /* is there any utf8 content? */
10427     I32 termcode;               /* terminating char. code */
10428     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
10429     STRLEN termlen;             /* length of terminating string */
10430     int last_off = 0;           /* last position for nesting bracket */
10431     char *escaped_open = NULL;
10432 #ifdef PERL_MAD
10433     int stuffstart;
10434     char *tstart;
10435 #endif
10436
10437     PERL_ARGS_ASSERT_SCAN_STR;
10438
10439     /* skip space before the delimiter */
10440     if (isSPACE(*s)) {
10441         s = PEEKSPACE(s);
10442     }
10443
10444 #ifdef PERL_MAD
10445     if (PL_realtokenstart >= 0) {
10446         stuffstart = PL_realtokenstart;
10447         PL_realtokenstart = -1;
10448     }
10449     else
10450         stuffstart = start - SvPVX(PL_linestr);
10451 #endif
10452     /* mark where we are, in case we need to report errors */
10453     CLINE;
10454
10455     /* after skipping whitespace, the next character is the terminator */
10456     term = *s;
10457     if (!UTF) {
10458         termcode = termstr[0] = term;
10459         termlen = 1;
10460     }
10461     else {
10462         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10463         Copy(s, termstr, termlen, U8);
10464         if (!UTF8_IS_INVARIANT(term))
10465             has_utf8 = TRUE;
10466     }
10467
10468     /* mark where we are */
10469     PL_multi_start = CopLINE(PL_curcop);
10470     PL_multi_open = term;
10471
10472     /* find corresponding closing delimiter */
10473     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10474         termcode = termstr[0] = term = tmps[5];
10475
10476     PL_multi_close = term;
10477
10478     /* A warning is raised if the input parameter requires it for escaped (by a
10479      * backslash) paired metacharacters {} [] and () when the delimiters are
10480      * those same characters, and the backslash is ineffective.  This doesn't
10481      * happen for <>, as they aren't metas. */
10482     if (deprecate_escaped_meta
10483         && (PL_multi_open == PL_multi_close
10484             || ! ckWARN_d(WARN_DEPRECATED)
10485             || PL_multi_open == '<'))
10486     {
10487         deprecate_escaped_meta = FALSE;
10488     }
10489
10490     /* create a new SV to hold the contents.  79 is the SV's initial length.
10491        What a random number. */
10492     sv = newSV_type(SVt_PVIV);
10493     SvGROW(sv, 80);
10494     SvIV_set(sv, termcode);
10495     (void)SvPOK_only(sv);               /* validate pointer */
10496
10497     /* move past delimiter and try to read a complete string */
10498     if (keep_delims)
10499         sv_catpvn(sv, s, termlen);
10500     s += termlen;
10501 #ifdef PERL_MAD
10502     tstart = SvPVX(PL_linestr) + stuffstart;
10503     if (PL_madskills && !PL_thisopen && !keep_delims) {
10504         PL_thisopen = newSVpvn(tstart, s - tstart);
10505         stuffstart = s - SvPVX(PL_linestr);
10506     }
10507 #endif
10508     for (;;) {
10509         if (PL_encoding && !UTF && !re_reparse) {
10510             bool cont = TRUE;
10511
10512             while (cont) {
10513                 int offset = s - SvPVX_const(PL_linestr);
10514                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10515                                            &offset, (char*)termstr, termlen);
10516                 const char * const ns = SvPVX_const(PL_linestr) + offset;
10517                 char * const svlast = SvEND(sv) - 1;
10518
10519                 for (; s < ns; s++) {
10520                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10521                         COPLINE_INC_WITH_HERELINES;
10522                 }
10523                 if (!found)
10524                     goto read_more_line;
10525                 else {
10526                     /* handle quoted delimiters */
10527                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10528                         const char *t;
10529                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10530                             t--;
10531                         if ((svlast-1 - t) % 2) {
10532                             if (!keep_quoted) {
10533                                 *(svlast-1) = term;
10534                                 *svlast = '\0';
10535                                 SvCUR_set(sv, SvCUR(sv) - 1);
10536                             }
10537                             continue;
10538                         }
10539                     }
10540                     if (PL_multi_open == PL_multi_close) {
10541                         cont = FALSE;
10542                     }
10543                     else {
10544                         const char *t;
10545                         char *w;
10546                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10547                             /* At here, all closes are "was quoted" one,
10548                                so we don't check PL_multi_close. */
10549                             if (*t == '\\') {
10550                                 if (!keep_quoted && *(t+1) == PL_multi_open)
10551                                     t++;
10552                                 else
10553                                     *w++ = *t++;
10554                             }
10555                             else if (*t == PL_multi_open)
10556                                 brackets++;
10557
10558                             *w = *t;
10559                         }
10560                         if (w < t) {
10561                             *w++ = term;
10562                             *w = '\0';
10563                             SvCUR_set(sv, w - SvPVX_const(sv));
10564                         }
10565                         last_off = w - SvPVX(sv);
10566                         if (--brackets <= 0)
10567                             cont = FALSE;
10568                     }
10569                 }
10570             }
10571             if (!keep_delims) {
10572                 SvCUR_set(sv, SvCUR(sv) - 1);
10573                 *SvEND(sv) = '\0';
10574             }
10575             break;
10576         }
10577
10578         /* extend sv if need be */
10579         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10580         /* set 'to' to the next character in the sv's string */
10581         to = SvPVX(sv)+SvCUR(sv);
10582
10583         /* if open delimiter is the close delimiter read unbridle */
10584         if (PL_multi_open == PL_multi_close) {
10585             for (; s < PL_bufend; s++,to++) {
10586                 /* embedded newlines increment the current line number */
10587                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10588                     COPLINE_INC_WITH_HERELINES;
10589                 /* handle quoted delimiters */
10590                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10591                     if (!keep_quoted
10592                         && (s[1] == term
10593                             || (re_reparse && s[1] == '\\'))
10594                     )
10595                         s++;
10596                     /* any other quotes are simply copied straight through */
10597                     else
10598                         *to++ = *s++;
10599                 }
10600                 /* terminate when run out of buffer (the for() condition), or
10601                    have found the terminator */
10602                 else if (*s == term) {
10603                     if (termlen == 1)
10604                         break;
10605                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10606                         break;
10607                 }
10608                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10609                     has_utf8 = TRUE;
10610                 *to = *s;
10611             }
10612         }
10613
10614         /* if the terminator isn't the same as the start character (e.g.,
10615            matched brackets), we have to allow more in the quoting, and
10616            be prepared for nested brackets.
10617         */
10618         else {
10619             /* read until we run out of string, or we find the terminator */
10620             for (; s < PL_bufend; s++,to++) {
10621                 /* embedded newlines increment the line count */
10622                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10623                     COPLINE_INC_WITH_HERELINES;
10624                 /* backslashes can escape the open or closing characters */
10625                 if (*s == '\\' && s+1 < PL_bufend) {
10626                     if (!keep_quoted &&
10627                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10628                     {
10629                         s++;
10630
10631                         /* Here, 'deprecate_escaped_meta' is true iff the
10632                          * delimiters are paired metacharacters, and 's' points
10633                          * to an occurrence of one of them within the string,
10634                          * which was preceded by a backslash.  If this is a
10635                          * context where the delimiter is also a metacharacter,
10636                          * the backslash is useless, and deprecated.  () and []
10637                          * are meta in any context. {} are meta only when
10638                          * appearing in a quantifier or in things like '\p{'.
10639                          * They also aren't meta unless there is a matching
10640                          * closed, escaped char later on within the string.
10641                          * If 's' points to an open, set a flag; if to a close,
10642                          * test that flag, and raise a warning if it was set */
10643
10644                         if (deprecate_escaped_meta) {
10645                             if (*s == PL_multi_open) {
10646                                 if (*s != '{') {
10647                                     escaped_open = s;
10648                                 }
10649                                 else if (regcurly(s,
10650                                                   TRUE /* Look for a closing
10651                                                           '\}' */)
10652                                          || (s - start > 2  /* Look for e.g.
10653                                                                '\x{' */
10654                                              && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
10655                                 {
10656                                     escaped_open = s;
10657                                 }
10658                             }
10659                             else if (escaped_open) {
10660                                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10661                                     "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10662                                 escaped_open = NULL;
10663                             }
10664                         }
10665                     }
10666                     else
10667                         *to++ = *s++;
10668                 }
10669                 /* allow nested opens and closes */
10670                 else if (*s == PL_multi_close && --brackets <= 0)
10671                     break;
10672                 else if (*s == PL_multi_open)
10673                     brackets++;
10674                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10675                     has_utf8 = TRUE;
10676                 *to = *s;
10677             }
10678         }
10679         /* terminate the copied string and update the sv's end-of-string */
10680         *to = '\0';
10681         SvCUR_set(sv, to - SvPVX_const(sv));
10682
10683         /*
10684          * this next chunk reads more into the buffer if we're not done yet
10685          */
10686
10687         if (s < PL_bufend)
10688             break;              /* handle case where we are done yet :-) */
10689
10690 #ifndef PERL_STRICT_CR
10691         if (to - SvPVX_const(sv) >= 2) {
10692             if ((to[-2] == '\r' && to[-1] == '\n') ||
10693                 (to[-2] == '\n' && to[-1] == '\r'))
10694             {
10695                 to[-2] = '\n';
10696                 to--;
10697                 SvCUR_set(sv, to - SvPVX_const(sv));
10698             }
10699             else if (to[-1] == '\r')
10700                 to[-1] = '\n';
10701         }
10702         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10703             to[-1] = '\n';
10704 #endif
10705
10706      read_more_line:
10707         /* if we're out of file, or a read fails, bail and reset the current
10708            line marker so we can report where the unterminated string began
10709         */
10710 #ifdef PERL_MAD
10711         if (PL_madskills) {
10712             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10713             if (PL_thisstuff)
10714                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10715             else
10716                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10717         }
10718 #endif
10719         COPLINE_INC_WITH_HERELINES;
10720         PL_bufptr = PL_bufend;
10721         if (!lex_next_chunk(0)) {
10722             sv_free(sv);
10723             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10724             return NULL;
10725         }
10726         s = PL_bufptr;
10727 #ifdef PERL_MAD
10728         stuffstart = 0;
10729 #endif
10730     }
10731
10732     /* at this point, we have successfully read the delimited string */
10733
10734     if (!PL_encoding || UTF || re_reparse) {
10735 #ifdef PERL_MAD
10736         if (PL_madskills) {
10737             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10738             const int len = s - tstart;
10739             if (PL_thisstuff)
10740                 sv_catpvn(PL_thisstuff, tstart, len);
10741             else
10742                 PL_thisstuff = newSVpvn(tstart, len);
10743             if (!PL_thisclose && !keep_delims)
10744                 PL_thisclose = newSVpvn(s,termlen);
10745         }
10746 #endif
10747
10748         if (keep_delims)
10749             sv_catpvn(sv, s, termlen);
10750         s += termlen;
10751     }
10752 #ifdef PERL_MAD
10753     else {
10754         if (PL_madskills) {
10755             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10756             const int len = s - tstart - termlen;
10757             if (PL_thisstuff)
10758                 sv_catpvn(PL_thisstuff, tstart, len);
10759             else
10760                 PL_thisstuff = newSVpvn(tstart, len);
10761             if (!PL_thisclose && !keep_delims)
10762                 PL_thisclose = newSVpvn(s - termlen,termlen);
10763         }
10764     }
10765 #endif
10766     if (has_utf8 || (PL_encoding && !re_reparse))
10767         SvUTF8_on(sv);
10768
10769     PL_multi_end = CopLINE(PL_curcop);
10770
10771     /* if we allocated too much space, give some back */
10772     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10773         SvLEN_set(sv, SvCUR(sv) + 1);
10774         SvPV_renew(sv, SvLEN(sv));
10775     }
10776
10777     /* decide whether this is the first or second quoted string we've read
10778        for this op
10779     */
10780
10781     if (PL_lex_stuff)
10782         PL_sublex_info.repl = sv;
10783     else
10784         PL_lex_stuff = sv;
10785     return s;
10786 }
10787
10788 /*
10789   scan_num
10790   takes: pointer to position in buffer
10791   returns: pointer to new position in buffer
10792   side-effects: builds ops for the constant in pl_yylval.op
10793
10794   Read a number in any of the formats that Perl accepts:
10795
10796   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10797   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10798   0b[01](_?[01])*
10799   0[0-7](_?[0-7])*
10800   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10801
10802   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10803   thing it reads.
10804
10805   If it reads a number without a decimal point or an exponent, it will
10806   try converting the number to an integer and see if it can do so
10807   without loss of precision.
10808 */
10809
10810 char *
10811 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10812 {
10813     dVAR;
10814     const char *s = start;      /* current position in buffer */
10815     char *d;                    /* destination in temp buffer */
10816     char *e;                    /* end of temp buffer */
10817     NV nv;                              /* number read, as a double */
10818     SV *sv = NULL;                      /* place to put the converted number */
10819     bool floatit;                       /* boolean: int or float? */
10820     const char *lastub = NULL;          /* position of last underbar */
10821     static const char* const number_too_long = "Number too long";
10822
10823     PERL_ARGS_ASSERT_SCAN_NUM;
10824
10825     /* We use the first character to decide what type of number this is */
10826
10827     switch (*s) {
10828     default:
10829         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10830
10831     /* if it starts with a 0, it could be an octal number, a decimal in
10832        0.13 disguise, or a hexadecimal number, or a binary number. */
10833     case '0':
10834         {
10835           /* variables:
10836              u          holds the "number so far"
10837              shift      the power of 2 of the base
10838                         (hex == 4, octal == 3, binary == 1)
10839              overflowed was the number more than we can hold?
10840
10841              Shift is used when we add a digit.  It also serves as an "are
10842              we in octal/hex/binary?" indicator to disallow hex characters
10843              when in octal mode.
10844            */
10845             NV n = 0.0;
10846             UV u = 0;
10847             I32 shift;
10848             bool overflowed = FALSE;
10849             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10850             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10851             static const char* const bases[5] =
10852               { "", "binary", "", "octal", "hexadecimal" };
10853             static const char* const Bases[5] =
10854               { "", "Binary", "", "Octal", "Hexadecimal" };
10855             static const char* const maxima[5] =
10856               { "",
10857                 "0b11111111111111111111111111111111",
10858                 "",
10859                 "037777777777",
10860                 "0xffffffff" };
10861             const char *base, *Base, *max;
10862
10863             /* check for hex */
10864             if (s[1] == 'x' || s[1] == 'X') {
10865                 shift = 4;
10866                 s += 2;
10867                 just_zero = FALSE;
10868             } else if (s[1] == 'b' || s[1] == 'B') {
10869                 shift = 1;
10870                 s += 2;
10871                 just_zero = FALSE;
10872             }
10873             /* check for a decimal in disguise */
10874             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10875                 goto decimal;
10876             /* so it must be octal */
10877             else {
10878                 shift = 3;
10879                 s++;
10880             }
10881
10882             if (*s == '_') {
10883                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10884                                "Misplaced _ in number");
10885                lastub = s++;
10886             }
10887
10888             base = bases[shift];
10889             Base = Bases[shift];
10890             max  = maxima[shift];
10891
10892             /* read the rest of the number */
10893             for (;;) {
10894                 /* x is used in the overflow test,
10895                    b is the digit we're adding on. */
10896                 UV x, b;
10897
10898                 switch (*s) {
10899
10900                 /* if we don't mention it, we're done */
10901                 default:
10902                     goto out;
10903
10904                 /* _ are ignored -- but warned about if consecutive */
10905                 case '_':
10906                     if (lastub && s == lastub + 1)
10907                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10908                                        "Misplaced _ in number");
10909                     lastub = s++;
10910                     break;
10911
10912                 /* 8 and 9 are not octal */
10913                 case '8': case '9':
10914                     if (shift == 3)
10915                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10916                     /* FALL THROUGH */
10917
10918                 /* octal digits */
10919                 case '2': case '3': case '4':
10920                 case '5': case '6': case '7':
10921                     if (shift == 1)
10922                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10923                     /* FALL THROUGH */
10924
10925                 case '0': case '1':
10926                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10927                     goto digit;
10928
10929                 /* hex digits */
10930                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10931                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10932                     /* make sure they said 0x */
10933                     if (shift != 4)
10934                         goto out;
10935                     b = (*s++ & 7) + 9;
10936
10937                     /* Prepare to put the digit we have onto the end
10938                        of the number so far.  We check for overflows.
10939                     */
10940
10941                   digit:
10942                     just_zero = FALSE;
10943                     if (!overflowed) {
10944                         x = u << shift; /* make room for the digit */
10945
10946                         if ((x >> shift) != u
10947                             && !(PL_hints & HINT_NEW_BINARY)) {
10948                             overflowed = TRUE;
10949                             n = (NV) u;
10950                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10951                                              "Integer overflow in %s number",
10952                                              base);
10953                         } else
10954                             u = x | b;          /* add the digit to the end */
10955                     }
10956                     if (overflowed) {
10957                         n *= nvshift[shift];
10958                         /* If an NV has not enough bits in its
10959                          * mantissa to represent an UV this summing of
10960                          * small low-order numbers is a waste of time
10961                          * (because the NV cannot preserve the
10962                          * low-order bits anyway): we could just
10963                          * remember when did we overflow and in the
10964                          * end just multiply n by the right
10965                          * amount. */
10966                         n += (NV) b;
10967                     }
10968                     break;
10969                 }
10970             }
10971
10972           /* if we get here, we had success: make a scalar value from
10973              the number.
10974           */
10975           out:
10976
10977             /* final misplaced underbar check */
10978             if (s[-1] == '_') {
10979                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10980             }
10981
10982             if (overflowed) {
10983                 if (n > 4294967295.0)
10984                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10985                                    "%s number > %s non-portable",
10986                                    Base, max);
10987                 sv = newSVnv(n);
10988             }
10989             else {
10990 #if UVSIZE > 4
10991                 if (u > 0xffffffff)
10992                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10993                                    "%s number > %s non-portable",
10994                                    Base, max);
10995 #endif
10996                 sv = newSVuv(u);
10997             }
10998             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10999                 sv = new_constant(start, s - start, "integer",
11000                                   sv, NULL, NULL, 0);
11001             else if (PL_hints & HINT_NEW_BINARY)
11002                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11003         }
11004         break;
11005
11006     /*
11007       handle decimal numbers.
11008       we're also sent here when we read a 0 as the first digit
11009     */
11010     case '1': case '2': case '3': case '4': case '5':
11011     case '6': case '7': case '8': case '9': case '.':
11012       decimal:
11013         d = PL_tokenbuf;
11014         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11015         floatit = FALSE;
11016
11017         /* read next group of digits and _ and copy into d */
11018         while (isDIGIT(*s) || *s == '_') {
11019             /* skip underscores, checking for misplaced ones
11020                if -w is on
11021             */
11022             if (*s == '_') {
11023                 if (lastub && s == lastub + 1)
11024                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11025                                    "Misplaced _ in number");
11026                 lastub = s++;
11027             }
11028             else {
11029                 /* check for end of fixed-length buffer */
11030                 if (d >= e)
11031                     Perl_croak(aTHX_ "%s", number_too_long);
11032                 /* if we're ok, copy the character */
11033                 *d++ = *s++;
11034             }
11035         }
11036
11037         /* final misplaced underbar check */
11038         if (lastub && s == lastub + 1) {
11039             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11040         }
11041
11042         /* read a decimal portion if there is one.  avoid
11043            3..5 being interpreted as the number 3. followed
11044            by .5
11045         */
11046         if (*s == '.' && s[1] != '.') {
11047             floatit = TRUE;
11048             *d++ = *s++;
11049
11050             if (*s == '_') {
11051                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11052                                "Misplaced _ in number");
11053                 lastub = s;
11054             }
11055
11056             /* copy, ignoring underbars, until we run out of digits.
11057             */
11058             for (; isDIGIT(*s) || *s == '_'; s++) {
11059                 /* fixed length buffer check */
11060                 if (d >= e)
11061                     Perl_croak(aTHX_ "%s", number_too_long);
11062                 if (*s == '_') {
11063                    if (lastub && s == lastub + 1)
11064                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11065                                       "Misplaced _ in number");
11066                    lastub = s;
11067                 }
11068                 else
11069                     *d++ = *s;
11070             }
11071             /* fractional part ending in underbar? */
11072             if (s[-1] == '_') {
11073                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11074                                "Misplaced _ in number");
11075             }
11076             if (*s == '.' && isDIGIT(s[1])) {
11077                 /* oops, it's really a v-string, but without the "v" */
11078                 s = start;
11079                 goto vstring;
11080             }
11081         }
11082
11083         /* read exponent part, if present */
11084         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11085             floatit = TRUE;
11086             s++;
11087
11088             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11089             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
11090
11091             /* stray preinitial _ */
11092             if (*s == '_') {
11093                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11094                                "Misplaced _ in number");
11095                 lastub = s++;
11096             }
11097
11098             /* allow positive or negative exponent */
11099             if (*s == '+' || *s == '-')
11100                 *d++ = *s++;
11101
11102             /* stray initial _ */
11103             if (*s == '_') {
11104                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11105                                "Misplaced _ in number");
11106                 lastub = s++;
11107             }
11108
11109             /* read digits of exponent */
11110             while (isDIGIT(*s) || *s == '_') {
11111                 if (isDIGIT(*s)) {
11112                     if (d >= e)
11113                         Perl_croak(aTHX_ "%s", number_too_long);
11114                     *d++ = *s++;
11115                 }
11116                 else {
11117                    if (((lastub && s == lastub + 1) ||
11118                         (!isDIGIT(s[1]) && s[1] != '_')))
11119                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11120                                       "Misplaced _ in number");
11121                    lastub = s++;
11122                 }
11123             }
11124         }
11125
11126
11127         /*
11128            We try to do an integer conversion first if no characters
11129            indicating "float" have been found.
11130          */
11131
11132         if (!floatit) {
11133             UV uv;
11134             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11135
11136             if (flags == IS_NUMBER_IN_UV) {
11137               if (uv <= IV_MAX)
11138                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11139               else
11140                 sv = newSVuv(uv);
11141             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11142               if (uv <= (UV) IV_MIN)
11143                 sv = newSViv(-(IV)uv);
11144               else
11145                 floatit = TRUE;
11146             } else
11147               floatit = TRUE;
11148         }
11149         if (floatit) {
11150             /* terminate the string */
11151             *d = '\0';
11152             nv = Atof(PL_tokenbuf);
11153             sv = newSVnv(nv);
11154         }
11155
11156         if ( floatit
11157              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11158             const char *const key = floatit ? "float" : "integer";
11159             const STRLEN keylen = floatit ? 5 : 7;
11160             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11161                                 key, keylen, sv, NULL, NULL, 0);
11162         }
11163         break;
11164
11165     /* if it starts with a v, it could be a v-string */
11166     case 'v':
11167 vstring:
11168                 sv = newSV(5); /* preallocate storage space */
11169                 ENTER_with_name("scan_vstring");
11170                 SAVEFREESV(sv);
11171                 s = scan_vstring(s, PL_bufend, sv);
11172                 SvREFCNT_inc_simple_void_NN(sv);
11173                 LEAVE_with_name("scan_vstring");
11174         break;
11175     }
11176
11177     /* make the op for the constant and return */
11178
11179     if (sv)
11180         lvalp->opval = newSVOP(OP_CONST, 0, sv);
11181     else
11182         lvalp->opval = NULL;
11183
11184     return (char *)s;
11185 }
11186
11187 STATIC char *
11188 S_scan_formline(pTHX_ char *s)
11189 {
11190     dVAR;
11191     char *eol;
11192     char *t;
11193     SV * const stuff = newSVpvs("");
11194     bool needargs = FALSE;
11195     bool eofmt = FALSE;
11196 #ifdef PERL_MAD
11197     char *tokenstart = s;
11198     SV* savewhite = NULL;
11199
11200     if (PL_madskills) {
11201         savewhite = PL_thiswhite;
11202         PL_thiswhite = 0;
11203     }
11204 #endif
11205
11206     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11207
11208     while (!needargs) {
11209         if (*s == '.') {
11210             t = s+1;
11211 #ifdef PERL_STRICT_CR
11212             while (SPACE_OR_TAB(*t))
11213                 t++;
11214 #else
11215             while (SPACE_OR_TAB(*t) || *t == '\r')
11216                 t++;
11217 #endif
11218             if (*t == '\n' || t == PL_bufend) {
11219                 eofmt = TRUE;
11220                 break;
11221             }
11222         }
11223         eol = (char *) memchr(s,'\n',PL_bufend-s);
11224         if (!eol++)
11225                 eol = PL_bufend;
11226         if (*s != '#') {
11227             for (t = s; t < eol; t++) {
11228                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11229                     needargs = FALSE;
11230                     goto enough;        /* ~~ must be first line in formline */
11231                 }
11232                 if (*t == '@' || *t == '^')
11233                     needargs = TRUE;
11234             }
11235             if (eol > s) {
11236                 sv_catpvn(stuff, s, eol-s);
11237 #ifndef PERL_STRICT_CR
11238                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11239                     char *end = SvPVX(stuff) + SvCUR(stuff);
11240                     end[-2] = '\n';
11241                     end[-1] = '\0';
11242                     SvCUR_set(stuff, SvCUR(stuff) - 1);
11243                 }
11244 #endif
11245             }
11246             else
11247               break;
11248         }
11249         s = (char*)eol;
11250         if ((PL_rsfp || PL_parser->filtered)
11251          && PL_parser->form_lex_state == LEX_NORMAL) {
11252             bool got_some;
11253 #ifdef PERL_MAD
11254             if (PL_madskills) {
11255                 if (PL_thistoken)
11256                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11257                 else
11258                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11259             }
11260 #endif
11261             PL_bufptr = PL_bufend;
11262             COPLINE_INC_WITH_HERELINES;
11263             got_some = lex_next_chunk(0);
11264             CopLINE_dec(PL_curcop);
11265             s = PL_bufptr;
11266 #ifdef PERL_MAD
11267             tokenstart = PL_bufptr;
11268 #endif
11269             if (!got_some)
11270                 break;
11271         }
11272         incline(s);
11273     }
11274   enough:
11275     if (!SvCUR(stuff) || needargs)
11276         PL_lex_state = PL_parser->form_lex_state;
11277     if (SvCUR(stuff)) {
11278         PL_expect = XSTATE;
11279         if (needargs) {
11280             start_force(PL_curforce);
11281             NEXTVAL_NEXTTOKE.ival = 0;
11282             force_next(FORMLBRACK);
11283         }
11284         if (!IN_BYTES) {
11285             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11286                 SvUTF8_on(stuff);
11287             else if (PL_encoding)
11288                 sv_recode_to_utf8(stuff, PL_encoding);
11289         }
11290         start_force(PL_curforce);
11291         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11292         force_next(THING);
11293     }
11294     else {
11295         SvREFCNT_dec(stuff);
11296         if (eofmt)
11297             PL_lex_formbrack = 0;
11298     }
11299 #ifdef PERL_MAD
11300     if (PL_madskills) {
11301         if (PL_thistoken)
11302             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11303         else
11304             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11305         PL_thiswhite = savewhite;
11306     }
11307 #endif
11308     return s;
11309 }
11310
11311 I32
11312 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11313 {
11314     dVAR;
11315     const I32 oldsavestack_ix = PL_savestack_ix;
11316     CV* const outsidecv = PL_compcv;
11317
11318     SAVEI32(PL_subline);
11319     save_item(PL_subname);
11320     SAVESPTR(PL_compcv);
11321
11322     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11323     CvFLAGS(PL_compcv) |= flags;
11324
11325     PL_subline = CopLINE(PL_curcop);
11326     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11327     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11328     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11329     if (outsidecv && CvPADLIST(outsidecv))
11330         CvPADLIST(PL_compcv)->xpadl_outid =
11331             PadlistNAMES(CvPADLIST(outsidecv));
11332
11333     return oldsavestack_ix;
11334 }
11335
11336 #ifdef __SC__
11337 #pragma segment Perl_yylex
11338 #endif
11339 static int
11340 S_yywarn(pTHX_ const char *const s, U32 flags)
11341 {
11342     dVAR;
11343
11344     PERL_ARGS_ASSERT_YYWARN;
11345
11346     PL_in_eval |= EVAL_WARNONLY;
11347     yyerror_pv(s, flags);
11348     PL_in_eval &= ~EVAL_WARNONLY;
11349     return 0;
11350 }
11351
11352 int
11353 Perl_yyerror(pTHX_ const char *const s)
11354 {
11355     PERL_ARGS_ASSERT_YYERROR;
11356     return yyerror_pvn(s, strlen(s), 0);
11357 }
11358
11359 int
11360 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11361 {
11362     PERL_ARGS_ASSERT_YYERROR_PV;
11363     return yyerror_pvn(s, strlen(s), flags);
11364 }
11365
11366 int
11367 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11368 {
11369     dVAR;
11370     const char *context = NULL;
11371     int contlen = -1;
11372     SV *msg;
11373     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11374     int yychar  = PL_parser->yychar;
11375
11376     PERL_ARGS_ASSERT_YYERROR_PVN;
11377
11378     if (!yychar || (yychar == ';' && !PL_rsfp))
11379         sv_catpvs(where_sv, "at EOF");
11380     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11381       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11382       PL_oldbufptr != PL_bufptr) {
11383         /*
11384                 Only for NetWare:
11385                 The code below is removed for NetWare because it abends/crashes on NetWare
11386                 when the script has error such as not having the closing quotes like:
11387                     if ($var eq "value)
11388                 Checking of white spaces is anyway done in NetWare code.
11389         */
11390 #ifndef NETWARE
11391         while (isSPACE(*PL_oldoldbufptr))
11392             PL_oldoldbufptr++;
11393 #endif
11394         context = PL_oldoldbufptr;
11395         contlen = PL_bufptr - PL_oldoldbufptr;
11396     }
11397     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11398       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11399         /*
11400                 Only for NetWare:
11401                 The code below is removed for NetWare because it abends/crashes on NetWare
11402                 when the script has error such as not having the closing quotes like:
11403                     if ($var eq "value)
11404                 Checking of white spaces is anyway done in NetWare code.
11405         */
11406 #ifndef NETWARE
11407         while (isSPACE(*PL_oldbufptr))
11408             PL_oldbufptr++;
11409 #endif
11410         context = PL_oldbufptr;
11411         contlen = PL_bufptr - PL_oldbufptr;
11412     }
11413     else if (yychar > 255)
11414         sv_catpvs(where_sv, "next token ???");
11415     else if (yychar == -2) { /* YYEMPTY */
11416         if (PL_lex_state == LEX_NORMAL ||
11417            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11418             sv_catpvs(where_sv, "at end of line");
11419         else if (PL_lex_inpat)
11420             sv_catpvs(where_sv, "within pattern");
11421         else
11422             sv_catpvs(where_sv, "within string");
11423     }
11424     else {
11425         sv_catpvs(where_sv, "next char ");
11426         if (yychar < 32)
11427             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11428         else if (isPRINT_LC(yychar)) {
11429             const char string = yychar;
11430             sv_catpvn(where_sv, &string, 1);
11431         }
11432         else
11433             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11434     }
11435     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11436     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11437         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11438     if (context)
11439         Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11440                              UTF8fARG(UTF, contlen, context));
11441     else
11442         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11443     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11444         Perl_sv_catpvf(aTHX_ msg,
11445         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11446                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11447         PL_multi_end = 0;
11448     }
11449     if (PL_in_eval & EVAL_WARNONLY) {
11450         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11451     }
11452     else
11453         qerror(msg);
11454     if (PL_error_count >= 10) {
11455         SV * errsv;
11456         if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11457             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11458                        SVfARG(errsv), OutCopFILE(PL_curcop));
11459         else
11460             Perl_croak(aTHX_ "%s has too many errors.\n",
11461             OutCopFILE(PL_curcop));
11462     }
11463     PL_in_my = 0;
11464     PL_in_my_stash = NULL;
11465     return 0;
11466 }
11467 #ifdef __SC__
11468 #pragma segment Main
11469 #endif
11470
11471 STATIC char*
11472 S_swallow_bom(pTHX_ U8 *s)
11473 {
11474     dVAR;
11475     const STRLEN slen = SvCUR(PL_linestr);
11476
11477     PERL_ARGS_ASSERT_SWALLOW_BOM;
11478
11479     switch (s[0]) {
11480     case 0xFF:
11481         if (s[1] == 0xFE) {
11482             /* UTF-16 little-endian? (or UTF-32LE?) */
11483             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11484                 /* diag_listed_as: Unsupported script encoding %s */
11485                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11486 #ifndef PERL_NO_UTF16_FILTER
11487             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11488             s += 2;
11489             if (PL_bufend > (char*)s) {
11490                 s = add_utf16_textfilter(s, TRUE);
11491             }
11492 #else
11493             /* diag_listed_as: Unsupported script encoding %s */
11494             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11495 #endif
11496         }
11497         break;
11498     case 0xFE:
11499         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11500 #ifndef PERL_NO_UTF16_FILTER
11501             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11502             s += 2;
11503             if (PL_bufend > (char *)s) {
11504                 s = add_utf16_textfilter(s, FALSE);
11505             }
11506 #else
11507             /* diag_listed_as: Unsupported script encoding %s */
11508             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11509 #endif
11510         }
11511         break;
11512     case 0xEF:
11513         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11514             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11515             s += 3;                      /* UTF-8 */
11516         }
11517         break;
11518     case 0:
11519         if (slen > 3) {
11520              if (s[1] == 0) {
11521                   if (s[2] == 0xFE && s[3] == 0xFF) {
11522                        /* UTF-32 big-endian */
11523                        /* diag_listed_as: Unsupported script encoding %s */
11524                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11525                   }
11526              }
11527              else if (s[2] == 0 && s[3] != 0) {
11528                   /* Leading bytes
11529                    * 00 xx 00 xx
11530                    * are a good indicator of UTF-16BE. */
11531 #ifndef PERL_NO_UTF16_FILTER
11532                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11533                   s = add_utf16_textfilter(s, FALSE);
11534 #else
11535                   /* diag_listed_as: Unsupported script encoding %s */
11536                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11537 #endif
11538              }
11539         }
11540 #ifdef EBCDIC
11541     case 0xDD:
11542         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11543             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11544             s += 4;                      /* UTF-8 */
11545         }
11546         break;
11547 #endif
11548
11549     default:
11550          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11551                   /* Leading bytes
11552                    * xx 00 xx 00
11553                    * are a good indicator of UTF-16LE. */
11554 #ifndef PERL_NO_UTF16_FILTER
11555               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11556               s = add_utf16_textfilter(s, TRUE);
11557 #else
11558               /* diag_listed_as: Unsupported script encoding %s */
11559               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11560 #endif
11561          }
11562     }
11563     return (char*)s;
11564 }
11565
11566
11567 #ifndef PERL_NO_UTF16_FILTER
11568 static I32
11569 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11570 {
11571     dVAR;
11572     SV *const filter = FILTER_DATA(idx);
11573     /* We re-use this each time round, throwing the contents away before we
11574        return.  */
11575     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11576     SV *const utf8_buffer = filter;
11577     IV status = IoPAGE(filter);
11578     const bool reverse = cBOOL(IoLINES(filter));
11579     I32 retval;
11580
11581     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11582
11583     /* As we're automatically added, at the lowest level, and hence only called
11584        from this file, we can be sure that we're not called in block mode. Hence
11585        don't bother writing code to deal with block mode.  */
11586     if (maxlen) {
11587         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11588     }
11589     if (status < 0) {
11590         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11591     }
11592     DEBUG_P(PerlIO_printf(Perl_debug_log,
11593                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11594                           FPTR2DPTR(void *, S_utf16_textfilter),
11595                           reverse ? 'l' : 'b', idx, maxlen, status,
11596                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11597
11598     while (1) {
11599         STRLEN chars;
11600         STRLEN have;
11601         I32 newlen;
11602         U8 *end;
11603         /* First, look in our buffer of existing UTF-8 data:  */
11604         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11605
11606         if (nl) {
11607             ++nl;
11608         } else if (status == 0) {
11609             /* EOF */
11610             IoPAGE(filter) = 0;
11611             nl = SvEND(utf8_buffer);
11612         }
11613         if (nl) {
11614             STRLEN got = nl - SvPVX(utf8_buffer);
11615             /* Did we have anything to append?  */
11616             retval = got != 0;
11617             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11618             /* Everything else in this code works just fine if SVp_POK isn't
11619                set.  This, however, needs it, and we need it to work, else
11620                we loop infinitely because the buffer is never consumed.  */
11621             sv_chop(utf8_buffer, nl);
11622             break;
11623         }
11624
11625         /* OK, not a complete line there, so need to read some more UTF-16.
11626            Read an extra octect if the buffer currently has an odd number. */
11627         while (1) {
11628             if (status <= 0)
11629                 break;
11630             if (SvCUR(utf16_buffer) >= 2) {
11631                 /* Location of the high octet of the last complete code point.
11632                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11633                    *coupled* with all the benefits of partial reads and
11634                    endianness.  */
11635                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11636                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11637
11638                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11639                     break;
11640                 }
11641
11642                 /* We have the first half of a surrogate. Read more.  */
11643                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11644             }
11645
11646             status = FILTER_READ(idx + 1, utf16_buffer,
11647                                  160 + (SvCUR(utf16_buffer) & 1));
11648             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11649             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11650             if (status < 0) {
11651                 /* Error */
11652                 IoPAGE(filter) = status;
11653                 return status;
11654             }
11655         }
11656
11657         chars = SvCUR(utf16_buffer) >> 1;
11658         have = SvCUR(utf8_buffer);
11659         SvGROW(utf8_buffer, have + chars * 3 + 1);
11660
11661         if (reverse) {
11662             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11663                                          (U8*)SvPVX_const(utf8_buffer) + have,
11664                                          chars * 2, &newlen);
11665         } else {
11666             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11667                                 (U8*)SvPVX_const(utf8_buffer) + have,
11668                                 chars * 2, &newlen);
11669         }
11670         SvCUR_set(utf8_buffer, have + newlen);
11671         *end = '\0';
11672
11673         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11674            it's private to us, and utf16_to_utf8{,reversed} take a
11675            (pointer,length) pair, rather than a NUL-terminated string.  */
11676         if(SvCUR(utf16_buffer) & 1) {
11677             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11678             SvCUR_set(utf16_buffer, 1);
11679         } else {
11680             SvCUR_set(utf16_buffer, 0);
11681         }
11682     }
11683     DEBUG_P(PerlIO_printf(Perl_debug_log,
11684                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11685                           status,
11686                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11687     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11688     return retval;
11689 }
11690
11691 static U8 *
11692 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11693 {
11694     SV *filter = filter_add(S_utf16_textfilter, NULL);
11695
11696     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11697
11698     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11699     sv_setpvs(filter, "");
11700     IoLINES(filter) = reversed;
11701     IoPAGE(filter) = 1; /* Not EOF */
11702
11703     /* Sadly, we have to return a valid pointer, come what may, so we have to
11704        ignore any error return from this.  */
11705     SvCUR_set(PL_linestr, 0);
11706     if (FILTER_READ(0, PL_linestr, 0)) {
11707         SvUTF8_on(PL_linestr);
11708     } else {
11709         SvUTF8_on(PL_linestr);
11710     }
11711     PL_bufend = SvEND(PL_linestr);
11712     return (U8*)SvPVX(PL_linestr);
11713 }
11714 #endif
11715
11716 /*
11717 Returns a pointer to the next character after the parsed
11718 vstring, as well as updating the passed in sv.
11719
11720 Function must be called like
11721
11722         sv = sv_2mortal(newSV(5));
11723         s = scan_vstring(s,e,sv);
11724
11725 where s and e are the start and end of the string.
11726 The sv should already be large enough to store the vstring
11727 passed in, for performance reasons.
11728
11729 This function may croak if fatal warnings are enabled in the
11730 calling scope, hence the sv_2mortal in the example (to prevent
11731 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11732 sv_2mortal.
11733
11734 */
11735
11736 char *
11737 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11738 {
11739     dVAR;
11740     const char *pos = s;
11741     const char *start = s;
11742
11743     PERL_ARGS_ASSERT_SCAN_VSTRING;
11744
11745     if (*pos == 'v') pos++;  /* get past 'v' */
11746     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11747         pos++;
11748     if ( *pos != '.') {
11749         /* this may not be a v-string if followed by => */
11750         const char *next = pos;
11751         while (next < e && isSPACE(*next))
11752             ++next;
11753         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11754             /* return string not v-string */
11755             sv_setpvn(sv,(char *)s,pos-s);
11756             return (char *)pos;
11757         }
11758     }
11759
11760     if (!isALPHA(*pos)) {
11761         U8 tmpbuf[UTF8_MAXBYTES+1];
11762
11763         if (*s == 'v')
11764             s++;  /* get past 'v' */
11765
11766         sv_setpvs(sv, "");
11767
11768         for (;;) {
11769             /* this is atoi() that tolerates underscores */
11770             U8 *tmpend;
11771             UV rev = 0;
11772             const char *end = pos;
11773             UV mult = 1;
11774             while (--end >= s) {
11775                 if (*end != '_') {
11776                     const UV orev = rev;
11777                     rev += (*end - '0') * mult;
11778                     mult *= 10;
11779                     if (orev > rev)
11780                         /* diag_listed_as: Integer overflow in %s number */
11781                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11782                                          "Integer overflow in decimal number");
11783                 }
11784             }
11785 #ifdef EBCDIC
11786             if (rev > 0x7FFFFFFF)
11787                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11788 #endif
11789             /* Append native character for the rev point */
11790             tmpend = uvchr_to_utf8(tmpbuf, rev);
11791             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11792             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11793                  SvUTF8_on(sv);
11794             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11795                  s = ++pos;
11796             else {
11797                  s = pos;
11798                  break;
11799             }
11800             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11801                  pos++;
11802         }
11803         SvPOK_on(sv);
11804         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11805         SvRMAGICAL_on(sv);
11806     }
11807     return (char *)s;
11808 }
11809
11810 int
11811 Perl_keyword_plugin_standard(pTHX_
11812         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11813 {
11814     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11815     PERL_UNUSED_CONTEXT;
11816     PERL_UNUSED_ARG(keyword_ptr);
11817     PERL_UNUSED_ARG(keyword_len);
11818     PERL_UNUSED_ARG(op_ptr);
11819     return KEYWORD_PLUGIN_DECLINE;
11820 }
11821
11822 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11823 static void
11824 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11825 {
11826     SAVEI32(PL_lex_brackets);
11827     if (PL_lex_brackets > 100)
11828         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11829     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11830     SAVEI32(PL_lex_allbrackets);
11831     PL_lex_allbrackets = 0;
11832     SAVEI8(PL_lex_fakeeof);
11833     PL_lex_fakeeof = (U8)fakeeof;
11834     if(yyparse(gramtype) && !PL_parser->error_count)
11835         qerror(Perl_mess(aTHX_ "Parse error"));
11836 }
11837
11838 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11839 static OP *
11840 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11841 {
11842     OP *o;
11843     ENTER;
11844     SAVEVPTR(PL_eval_root);
11845     PL_eval_root = NULL;
11846     parse_recdescent(gramtype, fakeeof);
11847     o = PL_eval_root;
11848     LEAVE;
11849     return o;
11850 }
11851
11852 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11853 static OP *
11854 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11855 {
11856     OP *exprop;
11857     if (flags & ~PARSE_OPTIONAL)
11858         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11859     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11860     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11861         if (!PL_parser->error_count)
11862             qerror(Perl_mess(aTHX_ "Parse error"));
11863         exprop = newOP(OP_NULL, 0);
11864     }
11865     return exprop;
11866 }
11867
11868 /*
11869 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11870
11871 Parse a Perl arithmetic expression.  This may contain operators of precedence
11872 down to the bit shift operators.  The expression must be followed (and thus
11873 terminated) either by a comparison or lower-precedence operator or by
11874 something that would normally terminate an expression such as semicolon.
11875 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11876 otherwise it is mandatory.  It is up to the caller to ensure that the
11877 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11878 the source of the code to be parsed and the lexical context for the
11879 expression.
11880
11881 The op tree representing the expression is returned.  If an optional
11882 expression is absent, a null pointer is returned, otherwise the pointer
11883 will be non-null.
11884
11885 If an error occurs in parsing or compilation, in most cases a valid op
11886 tree is returned anyway.  The error is reflected in the parser state,
11887 normally resulting in a single exception at the top level of parsing
11888 which covers all the compilation errors that occurred.  Some compilation
11889 errors, however, will throw an exception immediately.
11890
11891 =cut
11892 */
11893
11894 OP *
11895 Perl_parse_arithexpr(pTHX_ U32 flags)
11896 {
11897     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11898 }
11899
11900 /*
11901 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11902
11903 Parse a Perl term expression.  This may contain operators of precedence
11904 down to the assignment operators.  The expression must be followed (and thus
11905 terminated) either by a comma or lower-precedence operator or by
11906 something that would normally terminate an expression such as semicolon.
11907 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11908 otherwise it is mandatory.  It is up to the caller to ensure that the
11909 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11910 the source of the code to be parsed and the lexical context for the
11911 expression.
11912
11913 The op tree representing the expression is returned.  If an optional
11914 expression is absent, a null pointer is returned, otherwise the pointer
11915 will be non-null.
11916
11917 If an error occurs in parsing or compilation, in most cases a valid op
11918 tree is returned anyway.  The error is reflected in the parser state,
11919 normally resulting in a single exception at the top level of parsing
11920 which covers all the compilation errors that occurred.  Some compilation
11921 errors, however, will throw an exception immediately.
11922
11923 =cut
11924 */
11925
11926 OP *
11927 Perl_parse_termexpr(pTHX_ U32 flags)
11928 {
11929     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11930 }
11931
11932 /*
11933 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11934
11935 Parse a Perl list expression.  This may contain operators of precedence
11936 down to the comma operator.  The expression must be followed (and thus
11937 terminated) either by a low-precedence logic operator such as C<or> or by
11938 something that would normally terminate an expression such as semicolon.
11939 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11940 otherwise it is mandatory.  It is up to the caller to ensure that the
11941 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11942 the source of the code to be parsed and the lexical context for the
11943 expression.
11944
11945 The op tree representing the expression is returned.  If an optional
11946 expression is absent, a null pointer is returned, otherwise the pointer
11947 will be non-null.
11948
11949 If an error occurs in parsing or compilation, in most cases a valid op
11950 tree is returned anyway.  The error is reflected in the parser state,
11951 normally resulting in a single exception at the top level of parsing
11952 which covers all the compilation errors that occurred.  Some compilation
11953 errors, however, will throw an exception immediately.
11954
11955 =cut
11956 */
11957
11958 OP *
11959 Perl_parse_listexpr(pTHX_ U32 flags)
11960 {
11961     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11962 }
11963
11964 /*
11965 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11966
11967 Parse a single complete Perl expression.  This allows the full
11968 expression grammar, including the lowest-precedence operators such
11969 as C<or>.  The expression must be followed (and thus terminated) by a
11970 token that an expression would normally be terminated by: end-of-file,
11971 closing bracketing punctuation, semicolon, or one of the keywords that
11972 signals a postfix expression-statement modifier.  If I<flags> includes
11973 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11974 mandatory.  It is up to the caller to ensure that the dynamic parser
11975 state (L</PL_parser> et al) is correctly set to reflect the source of
11976 the code to be parsed and the lexical context for the expression.
11977
11978 The op tree representing the expression is returned.  If an optional
11979 expression is absent, a null pointer is returned, otherwise the pointer
11980 will be non-null.
11981
11982 If an error occurs in parsing or compilation, in most cases a valid op
11983 tree is returned anyway.  The error is reflected in the parser state,
11984 normally resulting in a single exception at the top level of parsing
11985 which covers all the compilation errors that occurred.  Some compilation
11986 errors, however, will throw an exception immediately.
11987
11988 =cut
11989 */
11990
11991 OP *
11992 Perl_parse_fullexpr(pTHX_ U32 flags)
11993 {
11994     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11995 }
11996
11997 /*
11998 =for apidoc Amx|OP *|parse_block|U32 flags
11999
12000 Parse a single complete Perl code block.  This consists of an opening
12001 brace, a sequence of statements, and a closing brace.  The block
12002 constitutes a lexical scope, so C<my> variables and various compile-time
12003 effects can be contained within it.  It is up to the caller to ensure
12004 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12005 reflect the source of the code to be parsed and the lexical context for
12006 the statement.
12007
12008 The op tree representing the code block is returned.  This is always a
12009 real op, never a null pointer.  It will normally be a C<lineseq> list,
12010 including C<nextstate> or equivalent ops.  No ops to construct any kind
12011 of runtime scope are included by virtue of it being a block.
12012
12013 If an error occurs in parsing or compilation, in most cases a valid op
12014 tree (most likely null) is returned anyway.  The error is reflected in
12015 the parser state, normally resulting in a single exception at the top
12016 level of parsing which covers all the compilation errors that occurred.
12017 Some compilation errors, however, will throw an exception immediately.
12018
12019 The I<flags> parameter is reserved for future use, and must always
12020 be zero.
12021
12022 =cut
12023 */
12024
12025 OP *
12026 Perl_parse_block(pTHX_ U32 flags)
12027 {
12028     if (flags)
12029         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12030     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12031 }
12032
12033 /*
12034 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12035
12036 Parse a single unadorned Perl statement.  This may be a normal imperative
12037 statement or a declaration that has compile-time effect.  It does not
12038 include any label or other affixture.  It is up to the caller to ensure
12039 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12040 reflect the source of the code to be parsed and the lexical context for
12041 the statement.
12042
12043 The op tree representing the statement is returned.  This may be a
12044 null pointer if the statement is null, for example if it was actually
12045 a subroutine definition (which has compile-time side effects).  If not
12046 null, it will be ops directly implementing the statement, suitable to
12047 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12048 equivalent op (except for those embedded in a scope contained entirely
12049 within the statement).
12050
12051 If an error occurs in parsing or compilation, in most cases a valid op
12052 tree (most likely null) is returned anyway.  The error is reflected in
12053 the parser state, normally resulting in a single exception at the top
12054 level of parsing which covers all the compilation errors that occurred.
12055 Some compilation errors, however, will throw an exception immediately.
12056
12057 The I<flags> parameter is reserved for future use, and must always
12058 be zero.
12059
12060 =cut
12061 */
12062
12063 OP *
12064 Perl_parse_barestmt(pTHX_ U32 flags)
12065 {
12066     if (flags)
12067         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12068     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12069 }
12070
12071 /*
12072 =for apidoc Amx|SV *|parse_label|U32 flags
12073
12074 Parse a single label, possibly optional, of the type that may prefix a
12075 Perl statement.  It is up to the caller to ensure that the dynamic parser
12076 state (L</PL_parser> et al) is correctly set to reflect the source of
12077 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
12078 label is optional, otherwise it is mandatory.
12079
12080 The name of the label is returned in the form of a fresh scalar.  If an
12081 optional label is absent, a null pointer is returned.
12082
12083 If an error occurs in parsing, which can only occur if the label is
12084 mandatory, a valid label is returned anyway.  The error is reflected in
12085 the parser state, normally resulting in a single exception at the top
12086 level of parsing which covers all the compilation errors that occurred.
12087
12088 =cut
12089 */
12090
12091 SV *
12092 Perl_parse_label(pTHX_ U32 flags)
12093 {
12094     if (flags & ~PARSE_OPTIONAL)
12095         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12096     if (PL_lex_state == LEX_KNOWNEXT) {
12097         PL_parser->yychar = yylex();
12098         if (PL_parser->yychar == LABEL) {
12099             char * const lpv = pl_yylval.pval;
12100             STRLEN llen = strlen(lpv);
12101             PL_parser->yychar = YYEMPTY;
12102             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12103         } else {
12104             yyunlex();
12105             goto no_label;
12106         }
12107     } else {
12108         char *s, *t;
12109         STRLEN wlen, bufptr_pos;
12110         lex_read_space(0);
12111         t = s = PL_bufptr;
12112         if (!isIDFIRST_lazy_if(s, UTF))
12113             goto no_label;
12114         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12115         if (word_takes_any_delimeter(s, wlen))
12116             goto no_label;
12117         bufptr_pos = s - SvPVX(PL_linestr);
12118         PL_bufptr = t;
12119         lex_read_space(LEX_KEEP_PREVIOUS);
12120         t = PL_bufptr;
12121         s = SvPVX(PL_linestr) + bufptr_pos;
12122         if (t[0] == ':' && t[1] != ':') {
12123             PL_oldoldbufptr = PL_oldbufptr;
12124             PL_oldbufptr = s;
12125             PL_bufptr = t+1;
12126             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12127         } else {
12128             PL_bufptr = s;
12129             no_label:
12130             if (flags & PARSE_OPTIONAL) {
12131                 return NULL;
12132             } else {
12133                 qerror(Perl_mess(aTHX_ "Parse error"));
12134                 return newSVpvs("x");
12135             }
12136         }
12137     }
12138 }
12139
12140 /*
12141 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12142
12143 Parse a single complete Perl statement.  This may be a normal imperative
12144 statement or a declaration that has compile-time effect, and may include
12145 optional labels.  It is up to the caller to ensure that the dynamic
12146 parser state (L</PL_parser> et al) is correctly set to reflect the source
12147 of the code to be parsed and the lexical context for the statement.
12148
12149 The op tree representing the statement is returned.  This may be a
12150 null pointer if the statement is null, for example if it was actually
12151 a subroutine definition (which has compile-time side effects).  If not
12152 null, it will be the result of a L</newSTATEOP> call, normally including
12153 a C<nextstate> or equivalent op.
12154
12155 If an error occurs in parsing or compilation, in most cases a valid op
12156 tree (most likely null) is returned anyway.  The error is reflected in
12157 the parser state, normally resulting in a single exception at the top
12158 level of parsing which covers all the compilation errors that occurred.
12159 Some compilation errors, however, will throw an exception immediately.
12160
12161 The I<flags> parameter is reserved for future use, and must always
12162 be zero.
12163
12164 =cut
12165 */
12166
12167 OP *
12168 Perl_parse_fullstmt(pTHX_ U32 flags)
12169 {
12170     if (flags)
12171         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12172     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12173 }
12174
12175 /*
12176 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12177
12178 Parse a sequence of zero or more Perl statements.  These may be normal
12179 imperative statements, including optional labels, or declarations
12180 that have compile-time effect, or any mixture thereof.  The statement
12181 sequence ends when a closing brace or end-of-file is encountered in a
12182 place where a new statement could have validly started.  It is up to
12183 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12184 is correctly set to reflect the source of the code to be parsed and the
12185 lexical context for the statements.
12186
12187 The op tree representing the statement sequence is returned.  This may
12188 be a null pointer if the statements were all null, for example if there
12189 were no statements or if there were only subroutine definitions (which
12190 have compile-time side effects).  If not null, it will be a C<lineseq>
12191 list, normally including C<nextstate> or equivalent ops.
12192
12193 If an error occurs in parsing or compilation, in most cases a valid op
12194 tree is returned anyway.  The error is reflected in the parser state,
12195 normally resulting in a single exception at the top level of parsing
12196 which covers all the compilation errors that occurred.  Some compilation
12197 errors, however, will throw an exception immediately.
12198
12199 The I<flags> parameter is reserved for future use, and must always
12200 be zero.
12201
12202 =cut
12203 */
12204
12205 OP *
12206 Perl_parse_stmtseq(pTHX_ U32 flags)
12207 {
12208     OP *stmtseqop;
12209     I32 c;
12210     if (flags)
12211         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12212     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12213     c = lex_peek_unichar(0);
12214     if (c != -1 && c != /*{*/'}')
12215         qerror(Perl_mess(aTHX_ "Parse error"));
12216     return stmtseqop;
12217 }
12218
12219 /*
12220  * Local variables:
12221  * c-indentation-style: bsd
12222  * c-basic-offset: 4
12223  * indent-tabs-mode: nil
12224  * End:
12225  *
12226  * ex: set ts=8 sts=4 sw=4 et:
12227  */