toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_preambled            (PL_parser->preambled)
  70 #define PL_sublex_info          (PL_parser->sublex_info)
  71 #define PL_linestr              (PL_parser->linestr)
  72 #define PL_expect               (PL_parser->expect)
  73 #define PL_copline              (PL_parser->copline)
  74 #define PL_bufptr               (PL_parser->bufptr)
  75 #define PL_oldbufptr            (PL_parser->oldbufptr)
  76 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  77 #define PL_linestart            (PL_parser->linestart)
  78 #define PL_bufend               (PL_parser->bufend)
  79 #define PL_last_uni             (PL_parser->last_uni)
  80 #define PL_last_lop             (PL_parser->last_lop)
  81 #define PL_last_lop_op          (PL_parser->last_lop_op)
  82 #define PL_lex_state            (PL_parser->lex_state)
  83 #define PL_rsfp                 (PL_parser->rsfp)
  84 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  85 #define PL_in_my                (PL_parser->in_my)
  86 #define PL_in_my_stash          (PL_parser->in_my_stash)
  87 #define PL_tokenbuf             (PL_parser->tokenbuf)
  88 #define PL_multi_end            (PL_parser->multi_end)
  89 #define PL_error_count          (PL_parser->error_count)
  90
  91 #ifdef PERL_MAD
  92 #  define PL_endwhite           (PL_parser->endwhite)
  93 #  define PL_faketokens         (PL_parser->faketokens)
  94 #  define PL_lasttoke           (PL_parser->lasttoke)
  95 #  define PL_nextwhite          (PL_parser->nextwhite)
  96 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  97 #  define PL_skipwhite          (PL_parser->skipwhite)
  98 #  define PL_thisclose          (PL_parser->thisclose)
  99 #  define PL_thismad            (PL_parser->thismad)
 100 #  define PL_thisopen           (PL_parser->thisopen)
 101 #  define PL_thisstuff          (PL_parser->thisstuff)
 102 #  define PL_thistoken          (PL_parser->thistoken)
 103 #  define PL_thiswhite          (PL_parser->thiswhite)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_nexttoke           (PL_parser->nexttoke)
 106 #  define PL_curforce           (PL_parser->curforce)
 107 #else
 108 #  define PL_nexttoke           (PL_parser->nexttoke)
 109 #  define PL_nexttype           (PL_parser->nexttype)
 110 #  define PL_nextval            (PL_parser->nextval)
 111 #endif
 112
 113 static const char* const ident_too_long = "Identifier too long";
 114
 115 #ifdef PERL_MAD
 116 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 117 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 118 #else
 119 #  define CURMAD(slot,sv)
 120 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 121 #endif
 122
 123 #define XENUMMASK  0x3f
 124 #define XFAKEEOF   0x40
 125 #define XFAKEBRACK 0x80
 126
 127 #ifdef USE_UTF8_SCRIPTS
 128 #   define UTF (!IN_BYTES)
 129 #else
 130 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 131 #endif
 132
 133 /* The maximum number of characters preceding the unrecognized one to display */
 134 #define UNRECOGNIZED_PRECEDE_COUNT 10
 135
 136 /* In variables named $^X, these are the legal values for X.
 137  * 1999-02-27 mjd-perl-patch@plover.com */
 138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 139
 140 #define SPACE_OR_TAB(c) isBLANK_A(c)
 141
 142 /* LEX_* are values for PL_lex_state, the state of the lexer.
 143  * They are arranged oddly so that the guard on the switch statement
 144  * can get by with a single comparison (if the compiler is smart enough).
 145  *
 146  * These values refer to the various states within a sublex parse,
 147  * i.e. within a double quotish string
 148  */
 149
 150 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 151
 152 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 153 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 154 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 155 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 156 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 157
 158                                    /* at end of code, eg "$x" followed by:  */
 159 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 160 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 161
 162 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 163                                         string or after \E, $foo, etc       */
 164 #define LEX_INTERPCONST          2 /* NOT USED */
 165 #define LEX_FORMLINE             1 /* expecting a format line               */
 166 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 167
 168
 169 #ifdef DEBUGGING
 170 static const char* const lex_state_names[] = {
 171     "KNOWNEXT",
 172     "FORMLINE",
 173     "INTERPCONST",
 174     "INTERPCONCAT",
 175     "INTERPENDMAYBE",
 176     "INTERPEND",
 177     "INTERPSTART",
 178     "INTERPPUSH",
 179     "INTERPCASEMOD",
 180     "INTERPNORMAL",
 181     "NORMAL"
 182 };
 183 #endif
 184
 185 #ifdef ff_next
 186 #undef ff_next
 187 #endif
 188
 189 #include "keywords.h"
 190
 191 /* CLINE is a macro that ensures PL_copline has a sane value */
 192
 193 #ifdef CLINE
 194 #undef CLINE
 195 #endif
 196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 197
 198 #ifdef PERL_MAD
 199 #  define SKIPSPACE0(s) skipspace0(s)
 200 #  define SKIPSPACE1(s) skipspace1(s)
 201 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 202 #  define PEEKSPACE(s) skipspace2(s,0)
 203 #else
 204 #  define SKIPSPACE0(s) skipspace(s)
 205 #  define SKIPSPACE1(s) skipspace(s)
 206 #  define SKIPSPACE2(s,tsv) skipspace(s)
 207 #  define PEEKSPACE(s) skipspace(s)
 208 #endif
 209
 210 /*
 211  * Convenience functions to return different tokens and prime the
 212  * lexer for the next token.  They all take an argument.
 213  *
 214  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 215  * OPERATOR     : generic operator
 216  * AOPERATOR    : assignment operator
 217  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 218  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 219  * PREREF       : *EXPR where EXPR is not a simple identifier
 220  * TERM         : expression term
 221  * LOOPX        : loop exiting command (goto, last, dump, etc)
 222  * FTST         : file test operator
 223  * FUN0         : zero-argument function
 224  * FUN0OP       : zero-argument function, with its op created in this file
 225  * FUN1         : not used, except for not, which isn't a UNIOP
 226  * BOop         : bitwise or or xor
 227  * BAop         : bitwise and
 228  * SHop         : shift operator
 229  * PWop         : power operator
 230  * PMop         : pattern-matching operator
 231  * Aop          : addition-level operator
 232  * Mop          : multiplication-level operator
 233  * Eop          : equality-testing operator
 234  * Rop          : relational operator <= != gt
 235  *
 236  * Also see LOP and lop() below.
 237  */
 238
 239 #ifdef DEBUGGING /* Serve -DT. */
 240 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 241 #else
 242 #   define REPORT(retval) (retval)
 243 #endif
 244
 245 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 246 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 247 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 248 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 249 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 251 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 252 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 253 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 254 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 255 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI3(f,x,have_x) { \
 273         pl_yylval.ival = f; \
 274         if (have_x) PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI3(f,XTERM,1)
 284 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 285 #define UNIPROTO(f,optional) { \
 286         if (optional) PL_last_uni = PL_oldbufptr; \
 287         OPERATOR(f); \
 288         }
 289
 290 #define UNIBRACK(f) UNI3(f,0,0)
 291
 292 /* grandfather return to old style */
 293 #define OLDLOP(f) \
 294         do { \
 295             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 296                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 297             pl_yylval.ival = (f); \
 298             PL_expect = XTERM; \
 299             PL_bufptr = s; \
 300             return (int)LSTOP; \
 301         } while(0)
 302
 303 #define COPLINE_INC_WITH_HERELINES                  \
 304     STMT_START {                                     \
 305         CopLINE_inc(PL_curcop);                       \
 306         if (PL_parser->lex_shared->herelines)          \
 307             CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
 308             PL_parser->lex_shared->herelines = 0;                    \
 309     } STMT_END
 310
 311
 312 #ifdef DEBUGGING
 313
 314 /* how to interpret the pl_yylval associated with the token */
 315 enum token_type {
 316     TOKENTYPE_NONE,
 317     TOKENTYPE_IVAL,
 318     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 319     TOKENTYPE_PVAL,
 320     TOKENTYPE_OPVAL
 321 };
 322
 323 static struct debug_tokens {
 324     const int token;
 325     enum token_type type;
 326     const char *name;
 327 } const debug_tokens[] =
 328 {
 329     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 330     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 331     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 332     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 333     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 334     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 335     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 336     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 337     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 338     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 339     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 340     { DO,               TOKENTYPE_NONE,         "DO" },
 341     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 342     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 343     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 344     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 345     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 346     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 347     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 348     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 349     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 350     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 351     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 352     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 353     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 354     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 355     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 356     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 357     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 358     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 359     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 360     { IF,               TOKENTYPE_IVAL,         "IF" },
 361     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 362     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 363     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 364     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 365     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 366     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 367     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 368     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 369     { MY,               TOKENTYPE_IVAL,         "MY" },
 370     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 371     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 372     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 373     { OROR,             TOKENTYPE_NONE,         "OROR" },
 374     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 375     { PEG,              TOKENTYPE_NONE,         "PEG" },
 376     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 377     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 378     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 379     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 380     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 381     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 382     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 383     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 384     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 385     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 386     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 387     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 388     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 389     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 390     { SUB,              TOKENTYPE_NONE,         "SUB" },
 391     { THING,            TOKENTYPE_OPVAL,        "THING" },
 392     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 393     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 394     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 395     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 396     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 397     { USE,              TOKENTYPE_IVAL,         "USE" },
 398     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 399     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 400     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 401     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 402     { 0,                TOKENTYPE_NONE,         NULL }
 403 };
 404
 405 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 406
 407 STATIC int
 408 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 409 {
 410     dVAR;
 411
 412     PERL_ARGS_ASSERT_TOKEREPORT;
 413
 414     if (DEBUG_T_TEST) {
 415         const char *name = NULL;
 416         enum token_type type = TOKENTYPE_NONE;
 417         const struct debug_tokens *p;
 418         SV* const report = newSVpvs("<== ");
 419
 420         for (p = debug_tokens; p->token; p++) {
 421             if (p->token == (int)rv) {
 422                 name = p->name;
 423                 type = p->type;
 424                 break;
 425             }
 426         }
 427         if (name)
 428             Perl_sv_catpv(aTHX_ report, name);
 429         else if ((char)rv > ' ' && (char)rv <= '~')
 430         {
 431             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 432             if ((char)rv == 'p')
 433                 sv_catpvs(report, " (pending identifier)");
 434         }
 435         else if (!rv)
 436             sv_catpvs(report, "EOF");
 437         else
 438             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 439         switch (type) {
 440         case TOKENTYPE_NONE:
 441             break;
 442         case TOKENTYPE_IVAL:
 443             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 444             break;
 445         case TOKENTYPE_OPNUM:
 446             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 447                                     PL_op_name[lvalp->ival]);
 448             break;
 449         case TOKENTYPE_PVAL:
 450             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 451             break;
 452         case TOKENTYPE_OPVAL:
 453             if (lvalp->opval) {
 454                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 455                                     PL_op_name[lvalp->opval->op_type]);
 456                 if (lvalp->opval->op_type == OP_CONST) {
 457                     Perl_sv_catpvf(aTHX_ report, " %s",
 458                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 459                 }
 460
 461             }
 462             else
 463                 sv_catpvs(report, "(opval=null)");
 464             break;
 465         }
 466         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 467     };
 468     return (int)rv;
 469 }
 470
 471
 472 /* print the buffer with suitable escapes */
 473
 474 STATIC void
 475 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 476 {
 477     SV* const tmp = newSVpvs("");
 478
 479     PERL_ARGS_ASSERT_PRINTBUF;
 480
 481     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 482     SvREFCNT_dec(tmp);
 483 }
 484
 485 #endif
 486
 487 static int
 488 S_deprecate_commaless_var_list(pTHX) {
 489     PL_expect = XTERM;
 490     deprecate("comma-less variable list");
 491     return REPORT(','); /* grandfather non-comma-format format */
 492 }
 493
 494 /*
 495  * S_ao
 496  *
 497  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 498  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 499  */
 500
 501 STATIC int
 502 S_ao(pTHX_ int toketype)
 503 {
 504     dVAR;
 505     if (*PL_bufptr == '=') {
 506         PL_bufptr++;
 507         if (toketype == ANDAND)
 508             pl_yylval.ival = OP_ANDASSIGN;
 509         else if (toketype == OROR)
 510             pl_yylval.ival = OP_ORASSIGN;
 511         else if (toketype == DORDOR)
 512             pl_yylval.ival = OP_DORASSIGN;
 513         toketype = ASSIGNOP;
 514     }
 515     return toketype;
 516 }
 517
 518 /*
 519  * S_no_op
 520  * When Perl expects an operator and finds something else, no_op
 521  * prints the warning.  It always prints "<something> found where
 522  * operator expected.  It prints "Missing semicolon on previous line?"
 523  * if the surprise occurs at the start of the line.  "do you need to
 524  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 525  * where the compiler doesn't know if foo is a method call or a function.
 526  * It prints "Missing operator before end of line" if there's nothing
 527  * after the missing operator, or "... before <...>" if there is something
 528  * after the missing operator.
 529  */
 530
 531 STATIC void
 532 S_no_op(pTHX_ const char *const what, char *s)
 533 {
 534     dVAR;
 535     char * const oldbp = PL_bufptr;
 536     const bool is_first = (PL_oldbufptr == PL_linestart);
 537
 538     PERL_ARGS_ASSERT_NO_OP;
 539
 540     if (!s)
 541         s = oldbp;
 542     else
 543         PL_bufptr = s;
 544     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 545     if (ckWARN_d(WARN_SYNTAX)) {
 546         if (is_first)
 547             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 548                     "\t(Missing semicolon on previous line?)\n");
 549         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 550             const char *t;
 551             for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
 552                                                             t += UTF ? UTF8SKIP(t) : 1)
 553                 NOOP;
 554             if (t < PL_bufptr && isSPACE(*t))
 555                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 556                         "\t(Do you need to predeclare %"UTF8f"?)\n",
 557                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 558         }
 559         else {
 560             assert(s >= oldbp);
 561             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 562                     "\t(Missing operator before %"UTF8f"?)\n",
 563                      UTF8fARG(UTF, s - oldbp, oldbp));
 564         }
 565     }
 566     PL_bufptr = oldbp;
 567 }
 568
 569 /*
 570  * S_missingterm
 571  * Complain about missing quote/regexp/heredoc terminator.
 572  * If it's called with NULL then it cauterizes the line buffer.
 573  * If we're in a delimited string and the delimiter is a control
 574  * character, it's reformatted into a two-char sequence like ^C.
 575  * This is fatal.
 576  */
 577
 578 STATIC void
 579 S_missingterm(pTHX_ char *s)
 580 {
 581     dVAR;
 582     char tmpbuf[3];
 583     char q;
 584     if (s) {
 585         char * const nl = strrchr(s,'\n');
 586         if (nl)
 587             *nl = '\0';
 588     }
 589     else if (isCNTRL(PL_multi_close)) {
 590         *tmpbuf = '^';
 591         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 592         tmpbuf[2] = '\0';
 593         s = tmpbuf;
 594     }
 595     else {
 596         *tmpbuf = (char)PL_multi_close;
 597         tmpbuf[1] = '\0';
 598         s = tmpbuf;
 599     }
 600     q = strchr(s,'"') ? '\'' : '"';
 601     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 602 }
 603
 604 #include "feature.h"
 605
 606 /*
 607  * Check whether the named feature is enabled.
 608  */
 609 bool
 610 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 611 {
 612     dVAR;
 613     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 614
 615     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 616
 617     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 618
 619     if (namelen > MAX_FEATURE_LEN)
 620         return FALSE;
 621     memcpy(&he_name[8], name, namelen);
 622
 623     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 624                                      REFCOUNTED_HE_EXISTS));
 625 }
 626
 627 /*
 628  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 629  * utf16-to-utf8-reversed.
 630  */
 631
 632 #ifdef PERL_CR_FILTER
 633 static void
 634 strip_return(SV *sv)
 635 {
 636     const char *s = SvPVX_const(sv);
 637     const char * const e = s + SvCUR(sv);
 638
 639     PERL_ARGS_ASSERT_STRIP_RETURN;
 640
 641     /* outer loop optimized to do nothing if there are no CR-LFs */
 642     while (s < e) {
 643         if (*s++ == '\r' && *s == '\n') {
 644             /* hit a CR-LF, need to copy the rest */
 645             char *d = s - 1;
 646             *d++ = *s++;
 647             while (s < e) {
 648                 if (*s == '\r' && s[1] == '\n')
 649                     s++;
 650                 *d++ = *s++;
 651             }
 652             SvCUR(sv) -= s - d;
 653             return;
 654         }
 655     }
 656 }
 657
 658 STATIC I32
 659 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 660 {
 661     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 662     if (count > 0 && !maxlen)
 663         strip_return(sv);
 664     return count;
 665 }
 666 #endif
 667
 668 /*
 669 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 670
 671 Creates and initialises a new lexer/parser state object, supplying
 672 a context in which to lex and parse from a new source of Perl code.
 673 A pointer to the new state object is placed in L</PL_parser>.  An entry
 674 is made on the save stack so that upon unwinding the new state object
 675 will be destroyed and the former value of L</PL_parser> will be restored.
 676 Nothing else need be done to clean up the parsing context.
 677
 678 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 679 non-null, provides a string (in SV form) containing code to be parsed.
 680 A copy of the string is made, so subsequent modification of I<line>
 681 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 682 from which code will be read to be parsed.  If both are non-null, the
 683 code in I<line> comes first and must consist of complete lines of input,
 684 and I<rsfp> supplies the remainder of the source.
 685
 686 The I<flags> parameter is reserved for future use.  Currently it is only
 687 used by perl internally, so extensions should always pass zero.
 688
 689 =cut
 690 */
 691
 692 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 693    can share filters with the current parser.
 694    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 695    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 696    destruction. This is used to handle the case of defaulting to reading the
 697    script from the standard input because no filename was given on the command
 698    line (without getting confused by situation where STDIN has been closed, so
 699    the script handle is opened on fd 0)  */
 700
 701 void
 702 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 703 {
 704     dVAR;
 705     const char *s = NULL;
 706     yy_parser *parser, *oparser;
 707     if (flags && flags & ~LEX_START_FLAGS)
 708         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 709
 710     /* create and initialise a parser */
 711
 712     Newxz(parser, 1, yy_parser);
 713     parser->old_parser = oparser = PL_parser;
 714     PL_parser = parser;
 715
 716     parser->stack = NULL;
 717     parser->ps = NULL;
 718     parser->stack_size = 0;
 719
 720     /* on scope exit, free this parser and restore any outer one */
 721     SAVEPARSER(parser);
 722     parser->saved_curcop = PL_curcop;
 723
 724     /* initialise lexer state */
 725
 726 #ifdef PERL_MAD
 727     parser->curforce = -1;
 728 #else
 729     parser->nexttoke = 0;
 730 #endif
 731     parser->error_count = oparser ? oparser->error_count : 0;
 732     parser->copline = NOLINE;
 733     parser->lex_state = LEX_NORMAL;
 734     parser->expect = XSTATE;
 735     parser->rsfp = rsfp;
 736     parser->rsfp_filters =
 737       !(flags & LEX_START_SAME_FILTER) || !oparser
 738         ? NULL
 739         : MUTABLE_AV(SvREFCNT_inc(
 740             oparser->rsfp_filters
 741              ? oparser->rsfp_filters
 742              : (oparser->rsfp_filters = newAV())
 743           ));
 744
 745     Newx(parser->lex_brackstack, 120, char);
 746     Newx(parser->lex_casestack, 12, char);
 747     *parser->lex_casestack = '\0';
 748     Newxz(parser->lex_shared, 1, LEXSHARED);
 749
 750     if (line) {
 751         STRLEN len;
 752         s = SvPV_const(line, len);
 753         parser->linestr = flags & LEX_START_COPIED
 754                             ? SvREFCNT_inc_simple_NN(line)
 755                             : newSVpvn_flags(s, len, SvUTF8(line));
 756         sv_catpvs(parser->linestr, "\n;");
 757     } else {
 758         parser->linestr = newSVpvs("\n;");
 759     }
 760     parser->oldoldbufptr =
 761         parser->oldbufptr =
 762         parser->bufptr =
 763         parser->linestart = SvPVX(parser->linestr);
 764     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 765     parser->last_lop = parser->last_uni = NULL;
 766     parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 767                                  |LEX_DONT_CLOSE_RSFP);
 768
 769     parser->in_pod = parser->filtered = 0;
 770 }
 771
 772
 773 /* delete a parser object */
 774
 775 void
 776 Perl_parser_free(pTHX_  const yy_parser *parser)
 777 {
 778     PERL_ARGS_ASSERT_PARSER_FREE;
 779
 780     PL_curcop = parser->saved_curcop;
 781     SvREFCNT_dec(parser->linestr);
 782
 783     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 784         PerlIO_clearerr(parser->rsfp);
 785     else if (parser->rsfp && (!parser->old_parser ||
 786                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 787         PerlIO_close(parser->rsfp);
 788     SvREFCNT_dec(parser->rsfp_filters);
 789     SvREFCNT_dec(parser->lex_stuff);
 790     SvREFCNT_dec(parser->sublex_info.repl);
 791
 792     Safefree(parser->lex_brackstack);
 793     Safefree(parser->lex_casestack);
 794     Safefree(parser->lex_shared);
 795     PL_parser = parser->old_parser;
 796     Safefree(parser);
 797 }
 798
 799 void
 800 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 801 {
 802 #ifdef PERL_MAD
 803     I32 nexttoke = parser->lasttoke;
 804 #else
 805     I32 nexttoke = parser->nexttoke;
 806 #endif
 807     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 808     while (nexttoke--) {
 809 #ifdef PERL_MAD
 810         if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
 811                                 & 0xffff)
 812          && parser->nexttoke[nexttoke].next_val.opval
 813          && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
 814          && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
 815                 op_free(parser->nexttoke[nexttoke].next_val.opval);
 816                 parser->nexttoke[nexttoke].next_val.opval = NULL;
 817         }
 818 #else
 819         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 820          && parser->nextval[nexttoke].opval
 821          && parser->nextval[nexttoke].opval->op_slabbed
 822          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 823             op_free(parser->nextval[nexttoke].opval);
 824             parser->nextval[nexttoke].opval = NULL;
 825         }
 826 #endif
 827     }
 828 }
 829
 830
 831 /*
 832 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 833
 834 Buffer scalar containing the chunk currently under consideration of the
 835 text currently being lexed.  This is always a plain string scalar (for
 836 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 837 normal scalar means; instead refer to the buffer directly by the pointer
 838 variables described below.
 839
 840 The lexer maintains various C<char*> pointers to things in the
 841 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 842 reallocated, all of these pointers must be updated.  Don't attempt to
 843 do this manually, but rather use L</lex_grow_linestr> if you need to
 844 reallocate the buffer.
 845
 846 The content of the text chunk in the buffer is commonly exactly one
 847 complete line of input, up to and including a newline terminator,
 848 but there are situations where it is otherwise.  The octets of the
 849 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 850 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 851 flag on this scalar, which may disagree with it.
 852
 853 For direct examination of the buffer, the variable
 854 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 855 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 856 of these pointers is usually preferable to examination of the scalar
 857 through normal scalar means.
 858
 859 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 860
 861 Direct pointer to the end of the chunk of text currently being lexed, the
 862 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 863 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 864 always located at the end of the buffer, and does not count as part of
 865 the buffer's contents.
 866
 867 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 868
 869 Points to the current position of lexing inside the lexer buffer.
 870 Characters around this point may be freely examined, within
 871 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 872 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 873 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 874
 875 Lexing code (whether in the Perl core or not) moves this pointer past
 876 the characters that it consumes.  It is also expected to perform some
 877 bookkeeping whenever a newline character is consumed.  This movement
 878 can be more conveniently performed by the function L</lex_read_to>,
 879 which handles newlines appropriately.
 880
 881 Interpretation of the buffer's octets can be abstracted out by
 882 using the slightly higher-level functions L</lex_peek_unichar> and
 883 L</lex_read_unichar>.
 884
 885 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 886
 887 Points to the start of the current line inside the lexer buffer.
 888 This is useful for indicating at which column an error occurred, and
 889 not much else.  This must be updated by any lexing code that consumes
 890 a newline; the function L</lex_read_to> handles this detail.
 891
 892 =cut
 893 */
 894
 895 /*
 896 =for apidoc Amx|bool|lex_bufutf8
 897
 898 Indicates whether the octets in the lexer buffer
 899 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 900 of Unicode characters.  If not, they should be interpreted as Latin-1
 901 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 902
 903 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 904 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 905 encoding.
 906
 907 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 908 is significant, but not the whole story regarding the input character
 909 encoding.  Normally, when a file is being read, the scalar contains octets
 910 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 911 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 912 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 913 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 914 is in effect.  This logic may change in the future; use this function
 915 instead of implementing the logic yourself.
 916
 917 =cut
 918 */
 919
 920 bool
 921 Perl_lex_bufutf8(pTHX)
 922 {
 923     return UTF;
 924 }
 925
 926 /*
 927 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 928
 929 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 930 at least I<len> octets (including terminating NUL).  Returns a
 931 pointer to the reallocated buffer.  This is necessary before making
 932 any direct modification of the buffer that would increase its length.
 933 L</lex_stuff_pvn> provides a more convenient way to insert text into
 934 the buffer.
 935
 936 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 937 this function updates all of the lexer's variables that point directly
 938 into the buffer.
 939
 940 =cut
 941 */
 942
 943 char *
 944 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 945 {
 946     SV *linestr;
 947     char *buf;
 948     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 949     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 950     linestr = PL_parser->linestr;
 951     buf = SvPVX(linestr);
 952     if (len <= SvLEN(linestr))
 953         return buf;
 954     bufend_pos = PL_parser->bufend - buf;
 955     bufptr_pos = PL_parser->bufptr - buf;
 956     oldbufptr_pos = PL_parser->oldbufptr - buf;
 957     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 958     linestart_pos = PL_parser->linestart - buf;
 959     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 960     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 961     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 962                             PL_parser->lex_shared->re_eval_start - buf : 0;
 963
 964     buf = sv_grow(linestr, len);
 965
 966     PL_parser->bufend = buf + bufend_pos;
 967     PL_parser->bufptr = buf + bufptr_pos;
 968     PL_parser->oldbufptr = buf + oldbufptr_pos;
 969     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 970     PL_parser->linestart = buf + linestart_pos;
 971     if (PL_parser->last_uni)
 972         PL_parser->last_uni = buf + last_uni_pos;
 973     if (PL_parser->last_lop)
 974         PL_parser->last_lop = buf + last_lop_pos;
 975     if (PL_parser->lex_shared->re_eval_start)
 976         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 977     return buf;
 978 }
 979
 980 /*
 981 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 982
 983 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 984 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 985 reallocating the buffer if necessary.  This means that lexing code that
 986 runs later will see the characters as if they had appeared in the input.
 987 It is not recommended to do this as part of normal parsing, and most
 988 uses of this facility run the risk of the inserted characters being
 989 interpreted in an unintended manner.
 990
 991 The string to be inserted is represented by I<len> octets starting
 992 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 993 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 994 The characters are recoded for the lexer buffer, according to how the
 995 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 996 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 997 function is more convenient.
 998
 999 =cut
1000 */
1001
1002 void
1003 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1004 {
1005     dVAR;
1006     char *bufptr;
1007     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1008     if (flags & ~(LEX_STUFF_UTF8))
1009         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1010     if (UTF) {
1011         if (flags & LEX_STUFF_UTF8) {
1012             goto plain_copy;
1013         } else {
1014             STRLEN highhalf = 0;    /* Count of variants */
1015             const char *p, *e = pv+len;
1016             for (p = pv; p != e; p++) {
1017                 if (! UTF8_IS_INVARIANT(*p)) {
1018                     highhalf++;
1019                 }
1020             }
1021             if (!highhalf)
1022                 goto plain_copy;
1023             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1024             bufptr = PL_parser->bufptr;
1025             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1026             SvCUR_set(PL_parser->linestr,
1027                 SvCUR(PL_parser->linestr) + len+highhalf);
1028             PL_parser->bufend += len+highhalf;
1029             for (p = pv; p != e; p++) {
1030                 U8 c = (U8)*p;
1031                 if (! UTF8_IS_INVARIANT(c)) {
1032                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
1033                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
1034                 } else {
1035                     *bufptr++ = (char)c;
1036                 }
1037             }
1038         }
1039     } else {
1040         if (flags & LEX_STUFF_UTF8) {
1041             STRLEN highhalf = 0;
1042             const char *p, *e = pv+len;
1043             for (p = pv; p != e; p++) {
1044                 U8 c = (U8)*p;
1045                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1046                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1047                                 "non-Latin-1 character into Latin-1 input");
1048                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1049                     p++;
1050                     highhalf++;
1051                 } else if (! UTF8_IS_INVARIANT(c)) {
1052                     /* malformed UTF-8 */
1053                     ENTER;
1054                     SAVESPTR(PL_warnhook);
1055                     PL_warnhook = PERL_WARNHOOK_FATAL;
1056                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1057                     LEAVE;
1058                 }
1059             }
1060             if (!highhalf)
1061                 goto plain_copy;
1062             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1063             bufptr = PL_parser->bufptr;
1064             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1065             SvCUR_set(PL_parser->linestr,
1066                 SvCUR(PL_parser->linestr) + len-highhalf);
1067             PL_parser->bufend += len-highhalf;
1068             p = pv;
1069             while (p < e) {
1070                 if (UTF8_IS_INVARIANT(*p)) {
1071                     *bufptr++ = *p;
1072                     p++;
1073                 }
1074                 else {
1075                     assert(p < e -1 );
1076                     *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
1077                     p += 2;
1078                 }
1079             }
1080         } else {
1081           plain_copy:
1082             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1083             bufptr = PL_parser->bufptr;
1084             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1085             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1086             PL_parser->bufend += len;
1087             Copy(pv, bufptr, len, char);
1088         }
1089     }
1090 }
1091
1092 /*
1093 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1094
1095 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1096 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1097 reallocating the buffer if necessary.  This means that lexing code that
1098 runs later will see the characters as if they had appeared in the input.
1099 It is not recommended to do this as part of normal parsing, and most
1100 uses of this facility run the risk of the inserted characters being
1101 interpreted in an unintended manner.
1102
1103 The string to be inserted is represented by octets starting at I<pv>
1104 and continuing to the first nul.  These octets are interpreted as either
1105 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1106 in I<flags>.  The characters are recoded for the lexer buffer, according
1107 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1108 If it is not convenient to nul-terminate a string to be inserted, the
1109 L</lex_stuff_pvn> function is more appropriate.
1110
1111 =cut
1112 */
1113
1114 void
1115 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1116 {
1117     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1118     lex_stuff_pvn(pv, strlen(pv), flags);
1119 }
1120
1121 /*
1122 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1123
1124 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1125 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1126 reallocating the buffer if necessary.  This means that lexing code that
1127 runs later will see the characters as if they had appeared in the input.
1128 It is not recommended to do this as part of normal parsing, and most
1129 uses of this facility run the risk of the inserted characters being
1130 interpreted in an unintended manner.
1131
1132 The string to be inserted is the string value of I<sv>.  The characters
1133 are recoded for the lexer buffer, according to how the buffer is currently
1134 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1135 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1136 need to construct a scalar.
1137
1138 =cut
1139 */
1140
1141 void
1142 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1143 {
1144     char *pv;
1145     STRLEN len;
1146     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1147     if (flags)
1148         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1149     pv = SvPV(sv, len);
1150     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1151 }
1152
1153 /*
1154 =for apidoc Amx|void|lex_unstuff|char *ptr
1155
1156 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1157 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1158 This hides the discarded text from any lexing code that runs later,
1159 as if the text had never appeared.
1160
1161 This is not the normal way to consume lexed text.  For that, use
1162 L</lex_read_to>.
1163
1164 =cut
1165 */
1166
1167 void
1168 Perl_lex_unstuff(pTHX_ char *ptr)
1169 {
1170     char *buf, *bufend;
1171     STRLEN unstuff_len;
1172     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1173     buf = PL_parser->bufptr;
1174     if (ptr < buf)
1175         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1176     if (ptr == buf)
1177         return;
1178     bufend = PL_parser->bufend;
1179     if (ptr > bufend)
1180         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1181     unstuff_len = ptr - buf;
1182     Move(ptr, buf, bufend+1-ptr, char);
1183     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1184     PL_parser->bufend = bufend - unstuff_len;
1185 }
1186
1187 /*
1188 =for apidoc Amx|void|lex_read_to|char *ptr
1189
1190 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1191 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1192 performing the correct bookkeeping whenever a newline character is passed.
1193 This is the normal way to consume lexed text.
1194
1195 Interpretation of the buffer's octets can be abstracted out by
1196 using the slightly higher-level functions L</lex_peek_unichar> and
1197 L</lex_read_unichar>.
1198
1199 =cut
1200 */
1201
1202 void
1203 Perl_lex_read_to(pTHX_ char *ptr)
1204 {
1205     char *s;
1206     PERL_ARGS_ASSERT_LEX_READ_TO;
1207     s = PL_parser->bufptr;
1208     if (ptr < s || ptr > PL_parser->bufend)
1209         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1210     for (; s != ptr; s++)
1211         if (*s == '\n') {
1212             COPLINE_INC_WITH_HERELINES;
1213             PL_parser->linestart = s+1;
1214         }
1215     PL_parser->bufptr = ptr;
1216 }
1217
1218 /*
1219 =for apidoc Amx|void|lex_discard_to|char *ptr
1220
1221 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1222 up to I<ptr>.  The remaining content of the buffer will be moved, and
1223 all pointers into the buffer updated appropriately.  I<ptr> must not
1224 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1225 it is not permitted to discard text that has yet to be lexed.
1226
1227 Normally it is not necessarily to do this directly, because it suffices to
1228 use the implicit discarding behaviour of L</lex_next_chunk> and things
1229 based on it.  However, if a token stretches across multiple lines,
1230 and the lexing code has kept multiple lines of text in the buffer for
1231 that purpose, then after completion of the token it would be wise to
1232 explicitly discard the now-unneeded earlier lines, to avoid future
1233 multi-line tokens growing the buffer without bound.
1234
1235 =cut
1236 */
1237
1238 void
1239 Perl_lex_discard_to(pTHX_ char *ptr)
1240 {
1241     char *buf;
1242     STRLEN discard_len;
1243     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1244     buf = SvPVX(PL_parser->linestr);
1245     if (ptr < buf)
1246         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1247     if (ptr == buf)
1248         return;
1249     if (ptr > PL_parser->bufptr)
1250         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1251     discard_len = ptr - buf;
1252     if (PL_parser->oldbufptr < ptr)
1253         PL_parser->oldbufptr = ptr;
1254     if (PL_parser->oldoldbufptr < ptr)
1255         PL_parser->oldoldbufptr = ptr;
1256     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1257         PL_parser->last_uni = NULL;
1258     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1259         PL_parser->last_lop = NULL;
1260     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1261     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1262     PL_parser->bufend -= discard_len;
1263     PL_parser->bufptr -= discard_len;
1264     PL_parser->oldbufptr -= discard_len;
1265     PL_parser->oldoldbufptr -= discard_len;
1266     if (PL_parser->last_uni)
1267         PL_parser->last_uni -= discard_len;
1268     if (PL_parser->last_lop)
1269         PL_parser->last_lop -= discard_len;
1270 }
1271
1272 /*
1273 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1274
1275 Reads in the next chunk of text to be lexed, appending it to
1276 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1277 looked to the end of the current chunk and wants to know more.  It is
1278 usual, but not necessary, for lexing to have consumed the entirety of
1279 the current chunk at this time.
1280
1281 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1282 chunk (i.e., the current chunk has been entirely consumed), normally the
1283 current chunk will be discarded at the same time that the new chunk is
1284 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1285 will not be discarded.  If the current chunk has not been entirely
1286 consumed, then it will not be discarded regardless of the flag.
1287
1288 Returns true if some new text was added to the buffer, or false if the
1289 buffer has reached the end of the input text.
1290
1291 =cut
1292 */
1293
1294 #define LEX_FAKE_EOF 0x80000000
1295 #define LEX_NO_TERM  0x40000000
1296
1297 bool
1298 Perl_lex_next_chunk(pTHX_ U32 flags)
1299 {
1300     SV *linestr;
1301     char *buf;
1302     STRLEN old_bufend_pos, new_bufend_pos;
1303     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1304     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1305     bool got_some_for_debugger = 0;
1306     bool got_some;
1307     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1308         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1309     linestr = PL_parser->linestr;
1310     buf = SvPVX(linestr);
1311     if (!(flags & LEX_KEEP_PREVIOUS) &&
1312             PL_parser->bufptr == PL_parser->bufend) {
1313         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1314         linestart_pos = 0;
1315         if (PL_parser->last_uni != PL_parser->bufend)
1316             PL_parser->last_uni = NULL;
1317         if (PL_parser->last_lop != PL_parser->bufend)
1318             PL_parser->last_lop = NULL;
1319         last_uni_pos = last_lop_pos = 0;
1320         *buf = 0;
1321         SvCUR(linestr) = 0;
1322     } else {
1323         old_bufend_pos = PL_parser->bufend - buf;
1324         bufptr_pos = PL_parser->bufptr - buf;
1325         oldbufptr_pos = PL_parser->oldbufptr - buf;
1326         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1327         linestart_pos = PL_parser->linestart - buf;
1328         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1329         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1330     }
1331     if (flags & LEX_FAKE_EOF) {
1332         goto eof;
1333     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1334         got_some = 0;
1335     } else if (filter_gets(linestr, old_bufend_pos)) {
1336         got_some = 1;
1337         got_some_for_debugger = 1;
1338     } else if (flags & LEX_NO_TERM) {
1339         got_some = 0;
1340     } else {
1341         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1342             sv_setpvs(linestr, "");
1343         eof:
1344         /* End of real input.  Close filehandle (unless it was STDIN),
1345          * then add implicit termination.
1346          */
1347         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1348             PerlIO_clearerr(PL_parser->rsfp);
1349         else if (PL_parser->rsfp)
1350             (void)PerlIO_close(PL_parser->rsfp);
1351         PL_parser->rsfp = NULL;
1352         PL_parser->in_pod = PL_parser->filtered = 0;
1353 #ifdef PERL_MAD
1354         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1355             PL_faketokens = 1;
1356 #endif
1357         if (!PL_in_eval && PL_minus_p) {
1358             sv_catpvs(linestr,
1359                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1360             PL_minus_n = PL_minus_p = 0;
1361         } else if (!PL_in_eval && PL_minus_n) {
1362             sv_catpvs(linestr, /*{*/";}");
1363             PL_minus_n = 0;
1364         } else
1365             sv_catpvs(linestr, ";");
1366         got_some = 1;
1367     }
1368     buf = SvPVX(linestr);
1369     new_bufend_pos = SvCUR(linestr);
1370     PL_parser->bufend = buf + new_bufend_pos;
1371     PL_parser->bufptr = buf + bufptr_pos;
1372     PL_parser->oldbufptr = buf + oldbufptr_pos;
1373     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1374     PL_parser->linestart = buf + linestart_pos;
1375     if (PL_parser->last_uni)
1376         PL_parser->last_uni = buf + last_uni_pos;
1377     if (PL_parser->last_lop)
1378         PL_parser->last_lop = buf + last_lop_pos;
1379     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1380             PL_curstash != PL_debstash) {
1381         /* debugger active and we're not compiling the debugger code,
1382          * so store the line into the debugger's array of lines
1383          */
1384         update_debugger_info(NULL, buf+old_bufend_pos,
1385             new_bufend_pos-old_bufend_pos);
1386     }
1387     return got_some;
1388 }
1389
1390 /*
1391 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1392
1393 Looks ahead one (Unicode) character in the text currently being lexed.
1394 Returns the codepoint (unsigned integer value) of the next character,
1395 or -1 if lexing has reached the end of the input text.  To consume the
1396 peeked character, use L</lex_read_unichar>.
1397
1398 If the next character is in (or extends into) the next chunk of input
1399 text, the next chunk will be read in.  Normally the current chunk will be
1400 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1401 then the current chunk will not be discarded.
1402
1403 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1404 is encountered, an exception is generated.
1405
1406 =cut
1407 */
1408
1409 I32
1410 Perl_lex_peek_unichar(pTHX_ U32 flags)
1411 {
1412     dVAR;
1413     char *s, *bufend;
1414     if (flags & ~(LEX_KEEP_PREVIOUS))
1415         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1416     s = PL_parser->bufptr;
1417     bufend = PL_parser->bufend;
1418     if (UTF) {
1419         U8 head;
1420         I32 unichar;
1421         STRLEN len, retlen;
1422         if (s == bufend) {
1423             if (!lex_next_chunk(flags))
1424                 return -1;
1425             s = PL_parser->bufptr;
1426             bufend = PL_parser->bufend;
1427         }
1428         head = (U8)*s;
1429         if (UTF8_IS_INVARIANT(head))
1430             return head;
1431         if (UTF8_IS_START(head)) {
1432             len = UTF8SKIP(&head);
1433             while ((STRLEN)(bufend-s) < len) {
1434                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1435                     break;
1436                 s = PL_parser->bufptr;
1437                 bufend = PL_parser->bufend;
1438             }
1439         }
1440         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1441         if (retlen == (STRLEN)-1) {
1442             /* malformed UTF-8 */
1443             ENTER;
1444             SAVESPTR(PL_warnhook);
1445             PL_warnhook = PERL_WARNHOOK_FATAL;
1446             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1447             LEAVE;
1448         }
1449         return unichar;
1450     } else {
1451         if (s == bufend) {
1452             if (!lex_next_chunk(flags))
1453                 return -1;
1454             s = PL_parser->bufptr;
1455         }
1456         return (U8)*s;
1457     }
1458 }
1459
1460 /*
1461 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1462
1463 Reads the next (Unicode) character in the text currently being lexed.
1464 Returns the codepoint (unsigned integer value) of the character read,
1465 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1466 if lexing has reached the end of the input text.  To non-destructively
1467 examine the next character, use L</lex_peek_unichar> instead.
1468
1469 If the next character is in (or extends into) the next chunk of input
1470 text, the next chunk will be read in.  Normally the current chunk will be
1471 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1472 then the current chunk will not be discarded.
1473
1474 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1475 is encountered, an exception is generated.
1476
1477 =cut
1478 */
1479
1480 I32
1481 Perl_lex_read_unichar(pTHX_ U32 flags)
1482 {
1483     I32 c;
1484     if (flags & ~(LEX_KEEP_PREVIOUS))
1485         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1486     c = lex_peek_unichar(flags);
1487     if (c != -1) {
1488         if (c == '\n')
1489             COPLINE_INC_WITH_HERELINES;
1490         if (UTF)
1491             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1492         else
1493             ++(PL_parser->bufptr);
1494     }
1495     return c;
1496 }
1497
1498 /*
1499 =for apidoc Amx|void|lex_read_space|U32 flags
1500
1501 Reads optional spaces, in Perl style, in the text currently being
1502 lexed.  The spaces may include ordinary whitespace characters and
1503 Perl-style comments.  C<#line> directives are processed if encountered.
1504 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1505 at a non-space character (or the end of the input text).
1506
1507 If spaces extend into the next chunk of input text, the next chunk will
1508 be read in.  Normally the current chunk will be discarded at the same
1509 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1510 chunk will not be discarded.
1511
1512 =cut
1513 */
1514
1515 #define LEX_NO_NEXT_CHUNK 0x80000000
1516
1517 void
1518 Perl_lex_read_space(pTHX_ U32 flags)
1519 {
1520     char *s, *bufend;
1521     bool need_incline = 0;
1522     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1523         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1524 #ifdef PERL_MAD
1525     if (PL_skipwhite) {
1526         sv_free(PL_skipwhite);
1527         PL_skipwhite = NULL;
1528     }
1529     if (PL_madskills)
1530         PL_skipwhite = newSVpvs("");
1531 #endif /* PERL_MAD */
1532     s = PL_parser->bufptr;
1533     bufend = PL_parser->bufend;
1534     while (1) {
1535         char c = *s;
1536         if (c == '#') {
1537             do {
1538                 c = *++s;
1539             } while (!(c == '\n' || (c == 0 && s == bufend)));
1540         } else if (c == '\n') {
1541             s++;
1542             PL_parser->linestart = s;
1543             if (s == bufend)
1544                 need_incline = 1;
1545             else
1546                 incline(s);
1547         } else if (isSPACE(c)) {
1548             s++;
1549         } else if (c == 0 && s == bufend) {
1550             bool got_more;
1551 #ifdef PERL_MAD
1552             if (PL_madskills)
1553                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1554 #endif /* PERL_MAD */
1555             if (flags & LEX_NO_NEXT_CHUNK)
1556                 break;
1557             PL_parser->bufptr = s;
1558             COPLINE_INC_WITH_HERELINES;
1559             got_more = lex_next_chunk(flags);
1560             CopLINE_dec(PL_curcop);
1561             s = PL_parser->bufptr;
1562             bufend = PL_parser->bufend;
1563             if (!got_more)
1564                 break;
1565             if (need_incline && PL_parser->rsfp) {
1566                 incline(s);
1567                 need_incline = 0;
1568             }
1569         } else {
1570             break;
1571         }
1572     }
1573 #ifdef PERL_MAD
1574     if (PL_madskills)
1575         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1576 #endif /* PERL_MAD */
1577     PL_parser->bufptr = s;
1578 }
1579
1580 /*
1581
1582 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1583
1584 This function performs syntax checking on a prototype, C<proto>.
1585 If C<warn> is true, any illegal characters or mismatched brackets
1586 will trigger illegalproto warnings, declaring that they were
1587 detected in the prototype for C<name>.
1588
1589 The return value is C<true> if this is a valid prototype, and
1590 C<false> if it is not, regardless of whether C<warn> was C<true> or
1591 C<false>.
1592
1593 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1594
1595 =cut
1596
1597  */
1598
1599 bool
1600 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1601 {
1602     STRLEN len, origlen;
1603     char *p = proto ? SvPV(proto, len) : NULL;
1604     bool bad_proto = FALSE;
1605     bool in_brackets = FALSE;
1606     bool after_slash = FALSE;
1607     char greedy_proto = ' ';
1608     bool proto_after_greedy_proto = FALSE;
1609     bool must_be_last = FALSE;
1610     bool underscore = FALSE;
1611     bool seen_underscore = FALSE;
1612
1613     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1614
1615     if (!proto)
1616         return TRUE;
1617
1618     origlen = len;
1619     for (; len--; p++) {
1620         if (!isSPACE(*p)) {
1621             if (must_be_last)
1622                 proto_after_greedy_proto = TRUE;
1623             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1624                 bad_proto = TRUE;
1625             }
1626             else {
1627                 if (underscore) {
1628                     if(!strchr(";@%", *p))
1629                         bad_proto = TRUE;
1630                     underscore = FALSE;
1631                 }
1632
1633                 if (*p == '[')
1634                     in_brackets = TRUE;
1635                 else if (*p == ']')
1636                     in_brackets = FALSE;
1637                 else if ((*p == '@' || *p == '%') &&
1638                     !after_slash &&
1639                     !in_brackets ) {
1640                     must_be_last = TRUE;
1641                     greedy_proto = *p;
1642                 }
1643                 else if (*p == '_')
1644                     underscore = seen_underscore = TRUE;
1645             }
1646             if (*p == '\\')
1647                 after_slash = TRUE;
1648             else
1649                 after_slash = FALSE;
1650         }
1651     }
1652
1653     if (warn) {
1654         p -= origlen;
1655         if (proto_after_greedy_proto)
1656             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1657                         "Prototype after '%c' for %"SVf" : %s",
1658                         greedy_proto, SVfARG(name), p);
1659         if (bad_proto) {
1660             SV *dsv = newSVpvs_flags("", SVs_TEMP);
1661             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1662                         "Illegal character %sin prototype for %"SVf" : %s",
1663                         seen_underscore ? "after '_' " : "",
1664                         SVfARG(PL_subname),
1665                         SvUTF8(PL_lex_stuff)
1666                             ? sv_uni_display(dsv,
1667                                 newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1668                                 origlen,
1669                                 UNI_DISPLAY_ISPRINT)
1670                             : pv_pretty(dsv, p, origlen, 60, NULL, NULL,
1671                                 PERL_PV_ESCAPE_NONASCII));
1672         }
1673     }
1674
1675     return (! (proto_after_greedy_proto || bad_proto) );
1676 }
1677
1678 /*
1679  * S_incline
1680  * This subroutine has nothing to do with tilting, whether at windmills
1681  * or pinball tables.  Its name is short for "increment line".  It
1682  * increments the current line number in CopLINE(PL_curcop) and checks
1683  * to see whether the line starts with a comment of the form
1684  *    # line 500 "foo.pm"
1685  * If so, it sets the current line number and file to the values in the comment.
1686  */
1687
1688 STATIC void
1689 S_incline(pTHX_ const char *s)
1690 {
1691     dVAR;
1692     const char *t;
1693     const char *n;
1694     const char *e;
1695     line_t line_num;
1696
1697     PERL_ARGS_ASSERT_INCLINE;
1698
1699     COPLINE_INC_WITH_HERELINES;
1700     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1701      && s+1 == PL_bufend && *s == ';') {
1702         /* fake newline in string eval */
1703         CopLINE_dec(PL_curcop);
1704         return;
1705     }
1706     if (*s++ != '#')
1707         return;
1708     while (SPACE_OR_TAB(*s))
1709         s++;
1710     if (strnEQ(s, "line", 4))
1711         s += 4;
1712     else
1713         return;
1714     if (SPACE_OR_TAB(*s))
1715         s++;
1716     else
1717         return;
1718     while (SPACE_OR_TAB(*s))
1719         s++;
1720     if (!isDIGIT(*s))
1721         return;
1722
1723     n = s;
1724     while (isDIGIT(*s))
1725         s++;
1726     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1727         return;
1728     while (SPACE_OR_TAB(*s))
1729         s++;
1730     if (*s == '"' && (t = strchr(s+1, '"'))) {
1731         s++;
1732         e = t + 1;
1733     }
1734     else {
1735         t = s;
1736         while (!isSPACE(*t))
1737             t++;
1738         e = t;
1739     }
1740     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1741         e++;
1742     if (*e != '\n' && *e != '\0')
1743         return;         /* false alarm */
1744
1745     line_num = atoi(n)-1;
1746
1747     if (t - s > 0) {
1748         const STRLEN len = t - s;
1749         SV *const temp_sv = CopFILESV(PL_curcop);
1750         const char *cf;
1751         STRLEN tmplen;
1752
1753         if (temp_sv) {
1754             cf = SvPVX(temp_sv);
1755             tmplen = SvCUR(temp_sv);
1756         } else {
1757             cf = NULL;
1758             tmplen = 0;
1759         }
1760
1761         if (!PL_rsfp && !PL_parser->filtered) {
1762             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1763              * to *{"::_<newfilename"} */
1764             /* However, the long form of evals is only turned on by the
1765                debugger - usually they're "(eval %lu)" */
1766             char smallbuf[128];
1767             char *tmpbuf;
1768             GV **gvp;
1769             STRLEN tmplen2 = len;
1770             if (tmplen + 2 <= sizeof smallbuf)
1771                 tmpbuf = smallbuf;
1772             else
1773                 Newx(tmpbuf, tmplen + 2, char);
1774             tmpbuf[0] = '_';
1775             tmpbuf[1] = '<';
1776             memcpy(tmpbuf + 2, cf, tmplen);
1777             tmplen += 2;
1778             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1779             if (gvp) {
1780                 char *tmpbuf2;
1781                 GV *gv2;
1782
1783                 if (tmplen2 + 2 <= sizeof smallbuf)
1784                     tmpbuf2 = smallbuf;
1785                 else
1786                     Newx(tmpbuf2, tmplen2 + 2, char);
1787
1788                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1789                     /* Either they malloc'd it, or we malloc'd it,
1790                        so no prefix is present in ours.  */
1791                     tmpbuf2[0] = '_';
1792                     tmpbuf2[1] = '<';
1793                 }
1794
1795                 memcpy(tmpbuf2 + 2, s, tmplen2);
1796                 tmplen2 += 2;
1797
1798                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1799                 if (!isGV(gv2)) {
1800                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1801                     /* adjust ${"::_<newfilename"} to store the new file name */
1802                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1803                     /* The line number may differ. If that is the case,
1804                        alias the saved lines that are in the array.
1805                        Otherwise alias the whole array. */
1806                     if (CopLINE(PL_curcop) == line_num) {
1807                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1808                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1809                     }
1810                     else if (GvAV(*gvp)) {
1811                         AV * const av = GvAV(*gvp);
1812                         const I32 start = CopLINE(PL_curcop)+1;
1813                         I32 items = AvFILLp(av) - start;
1814                         if (items > 0) {
1815                             AV * const av2 = GvAVn(gv2);
1816                             SV **svp = AvARRAY(av) + start;
1817                             I32 l = (I32)line_num+1;
1818                             while (items--)
1819                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1820                         }
1821                     }
1822                 }
1823
1824                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1825             }
1826             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1827         }
1828         CopFILE_free(PL_curcop);
1829         CopFILE_setn(PL_curcop, s, len);
1830     }
1831     CopLINE_set(PL_curcop, line_num);
1832 }
1833
1834 #ifdef PERL_MAD
1835 /* skip space before PL_thistoken */
1836
1837 STATIC char *
1838 S_skipspace0(pTHX_ char *s)
1839 {
1840     PERL_ARGS_ASSERT_SKIPSPACE0;
1841
1842     s = skipspace(s);
1843     if (!PL_madskills)
1844         return s;
1845     if (PL_skipwhite) {
1846         if (!PL_thiswhite)
1847             PL_thiswhite = newSVpvs("");
1848         sv_catsv(PL_thiswhite, PL_skipwhite);
1849         sv_free(PL_skipwhite);
1850         PL_skipwhite = 0;
1851     }
1852     PL_realtokenstart = s - SvPVX(PL_linestr);
1853     return s;
1854 }
1855
1856 /* skip space after PL_thistoken */
1857
1858 STATIC char *
1859 S_skipspace1(pTHX_ char *s)
1860 {
1861     const char *start = s;
1862     I32 startoff = start - SvPVX(PL_linestr);
1863
1864     PERL_ARGS_ASSERT_SKIPSPACE1;
1865
1866     s = skipspace(s);
1867     if (!PL_madskills)
1868         return s;
1869     start = SvPVX(PL_linestr) + startoff;
1870     if (!PL_thistoken && PL_realtokenstart >= 0) {
1871         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1872         PL_thistoken = newSVpvn(tstart, start - tstart);
1873     }
1874     PL_realtokenstart = -1;
1875     if (PL_skipwhite) {
1876         if (!PL_nextwhite)
1877             PL_nextwhite = newSVpvs("");
1878         sv_catsv(PL_nextwhite, PL_skipwhite);
1879         sv_free(PL_skipwhite);
1880         PL_skipwhite = 0;
1881     }
1882     return s;
1883 }
1884
1885 STATIC char *
1886 S_skipspace2(pTHX_ char *s, SV **svp)
1887 {
1888     char *start;
1889     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1890     const I32 startoff = s - SvPVX(PL_linestr);
1891
1892     PERL_ARGS_ASSERT_SKIPSPACE2;
1893
1894     s = skipspace(s);
1895     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1896     if (!PL_madskills || !svp)
1897         return s;
1898     start = SvPVX(PL_linestr) + startoff;
1899     if (!PL_thistoken && PL_realtokenstart >= 0) {
1900         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1901         PL_thistoken = newSVpvn(tstart, start - tstart);
1902         PL_realtokenstart = -1;
1903     }
1904     if (PL_skipwhite) {
1905         if (!*svp)
1906             *svp = newSVpvs("");
1907         sv_setsv(*svp, PL_skipwhite);
1908         sv_free(PL_skipwhite);
1909         PL_skipwhite = 0;
1910     }
1911
1912     return s;
1913 }
1914 #endif
1915
1916 STATIC void
1917 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1918 {
1919     AV *av = CopFILEAVx(PL_curcop);
1920     if (av) {
1921         SV * const sv = newSV_type(SVt_PVMG);
1922         if (orig_sv)
1923             sv_setsv(sv, orig_sv);
1924         else
1925             sv_setpvn(sv, buf, len);
1926         (void)SvIOK_on(sv);
1927         SvIV_set(sv, 0);
1928         av_store(av, (I32)CopLINE(PL_curcop), sv);
1929     }
1930 }
1931
1932 /*
1933  * S_skipspace
1934  * Called to gobble the appropriate amount and type of whitespace.
1935  * Skips comments as well.
1936  */
1937
1938 STATIC char *
1939 S_skipspace(pTHX_ char *s)
1940 {
1941 #ifdef PERL_MAD
1942     char *start = s;
1943 #endif /* PERL_MAD */
1944     PERL_ARGS_ASSERT_SKIPSPACE;
1945 #ifdef PERL_MAD
1946     if (PL_skipwhite) {
1947         sv_free(PL_skipwhite);
1948         PL_skipwhite = NULL;
1949     }
1950 #endif /* PERL_MAD */
1951     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1952         while (s < PL_bufend && SPACE_OR_TAB(*s))
1953             s++;
1954     } else {
1955         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1956         PL_bufptr = s;
1957         lex_read_space(LEX_KEEP_PREVIOUS |
1958                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1959                     LEX_NO_NEXT_CHUNK : 0));
1960         s = PL_bufptr;
1961         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1962         if (PL_linestart > PL_bufptr)
1963             PL_bufptr = PL_linestart;
1964         return s;
1965     }
1966 #ifdef PERL_MAD
1967     if (PL_madskills)
1968         PL_skipwhite = newSVpvn(start, s-start);
1969 #endif /* PERL_MAD */
1970     return s;
1971 }
1972
1973 /*
1974  * S_check_uni
1975  * Check the unary operators to ensure there's no ambiguity in how they're
1976  * used.  An ambiguous piece of code would be:
1977  *     rand + 5
1978  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1979  * the +5 is its argument.
1980  */
1981
1982 STATIC void
1983 S_check_uni(pTHX)
1984 {
1985     dVAR;
1986     const char *s;
1987     const char *t;
1988
1989     if (PL_oldoldbufptr != PL_last_uni)
1990         return;
1991     while (isSPACE(*PL_last_uni))
1992         PL_last_uni++;
1993     s = PL_last_uni;
1994     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1995         s++;
1996     if ((t = strchr(s, '(')) && t < PL_bufptr)
1997         return;
1998
1999     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2000                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2001                      (int)(s - PL_last_uni), PL_last_uni);
2002 }
2003
2004 /*
2005  * LOP : macro to build a list operator.  Its behaviour has been replaced
2006  * with a subroutine, S_lop() for which LOP is just another name.
2007  */
2008
2009 #define LOP(f,x) return lop(f,x,s)
2010
2011 /*
2012  * S_lop
2013  * Build a list operator (or something that might be one).  The rules:
2014  *  - if we have a next token, then it's a list operator [why?]
2015  *  - if the next thing is an opening paren, then it's a function
2016  *  - else it's a list operator
2017  */
2018
2019 STATIC I32
2020 S_lop(pTHX_ I32 f, int x, char *s)
2021 {
2022     dVAR;
2023
2024     PERL_ARGS_ASSERT_LOP;
2025
2026     pl_yylval.ival = f;
2027     CLINE;
2028     PL_expect = x;
2029     PL_bufptr = s;
2030     PL_last_lop = PL_oldbufptr;
2031     PL_last_lop_op = (OPCODE)f;
2032 #ifdef PERL_MAD
2033     if (PL_lasttoke)
2034         goto lstop;
2035 #else
2036     if (PL_nexttoke)
2037         goto lstop;
2038 #endif
2039     if (*s == '(')
2040         return REPORT(FUNC);
2041     s = PEEKSPACE(s);
2042     if (*s == '(')
2043         return REPORT(FUNC);
2044     else {
2045         lstop:
2046         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2047             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2048         return REPORT(LSTOP);
2049     }
2050 }
2051
2052 #ifdef PERL_MAD
2053  /*
2054  * S_start_force
2055  * Sets up for an eventual force_next().  start_force(0) basically does
2056  * an unshift, while start_force(-1) does a push.  yylex removes items
2057  * on the "pop" end.
2058  */
2059
2060 STATIC void
2061 S_start_force(pTHX_ int where)
2062 {
2063     int i;
2064
2065     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
2066         where = PL_lasttoke;
2067     assert(PL_curforce < 0 || PL_curforce == where);
2068     if (PL_curforce != where) {
2069         for (i = PL_lasttoke; i > where; --i) {
2070             PL_nexttoke[i] = PL_nexttoke[i-1];
2071         }
2072         PL_lasttoke++;
2073     }
2074     if (PL_curforce < 0)        /* in case of duplicate start_force() */
2075         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2076     PL_curforce = where;
2077     if (PL_nextwhite) {
2078         if (PL_madskills)
2079             curmad('^', newSVpvs(""));
2080         CURMAD('_', PL_nextwhite);
2081     }
2082 }
2083
2084 STATIC void
2085 S_curmad(pTHX_ char slot, SV *sv)
2086 {
2087     MADPROP **where;
2088
2089     if (!sv)
2090         return;
2091     if (PL_curforce < 0)
2092         where = &PL_thismad;
2093     else
2094         where = &PL_nexttoke[PL_curforce].next_mad;
2095
2096     if (PL_faketokens)
2097         sv_setpvs(sv, "");
2098     else {
2099         if (!IN_BYTES) {
2100             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2101                 SvUTF8_on(sv);
2102             else if (PL_encoding) {
2103                 sv_recode_to_utf8(sv, PL_encoding);
2104             }
2105         }
2106     }
2107
2108     /* keep a slot open for the head of the list? */
2109     if (slot != '_' && *where && (*where)->mad_key == '^') {
2110         (*where)->mad_key = slot;
2111         sv_free(MUTABLE_SV(((*where)->mad_val)));
2112         (*where)->mad_val = (void*)sv;
2113     }
2114     else
2115         addmad(newMADsv(slot, sv), where, 0);
2116 }
2117 #else
2118 #  define start_force(where)    NOOP
2119 #  define curmad(slot, sv)      NOOP
2120 #endif
2121
2122 /*
2123  * S_force_next
2124  * When the lexer realizes it knows the next token (for instance,
2125  * it is reordering tokens for the parser) then it can call S_force_next
2126  * to know what token to return the next time the lexer is called.  Caller
2127  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2128  * and possibly PL_expect to ensure the lexer handles the token correctly.
2129  */
2130
2131 STATIC void
2132 S_force_next(pTHX_ I32 type)
2133 {
2134     dVAR;
2135 #ifdef DEBUGGING
2136     if (DEBUG_T_TEST) {
2137         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2138         tokereport(type, &NEXTVAL_NEXTTOKE);
2139     }
2140 #endif
2141 #ifdef PERL_MAD
2142     if (PL_curforce < 0)
2143         start_force(PL_lasttoke);
2144     PL_nexttoke[PL_curforce].next_type = type;
2145     if (PL_lex_state != LEX_KNOWNEXT)
2146         PL_lex_defer = PL_lex_state;
2147     PL_lex_state = LEX_KNOWNEXT;
2148     PL_lex_expect = PL_expect;
2149     PL_curforce = -1;
2150 #else
2151     PL_nexttype[PL_nexttoke] = type;
2152     PL_nexttoke++;
2153     if (PL_lex_state != LEX_KNOWNEXT) {
2154         PL_lex_defer = PL_lex_state;
2155         PL_lex_expect = PL_expect;
2156         PL_lex_state = LEX_KNOWNEXT;
2157     }
2158 #endif
2159 }
2160
2161 void
2162 Perl_yyunlex(pTHX)
2163 {
2164     int yyc = PL_parser->yychar;
2165     if (yyc != YYEMPTY) {
2166         if (yyc) {
2167             start_force(-1);
2168             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2169             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2170                 PL_lex_allbrackets--;
2171                 PL_lex_brackets--;
2172                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2173             } else if (yyc == '('/*)*/) {
2174                 PL_lex_allbrackets--;
2175                 yyc |= (2<<24);
2176             }
2177             force_next(yyc);
2178         }
2179         PL_parser->yychar = YYEMPTY;
2180     }
2181 }
2182
2183 STATIC SV *
2184 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2185 {
2186     dVAR;
2187     SV * const sv = newSVpvn_utf8(start, len,
2188                                   !IN_BYTES
2189                                   && UTF
2190                                   && !is_ascii_string((const U8*)start, len)
2191                                   && is_utf8_string((const U8*)start, len));
2192     return sv;
2193 }
2194
2195 /*
2196  * S_force_word
2197  * When the lexer knows the next thing is a word (for instance, it has
2198  * just seen -> and it knows that the next char is a word char, then
2199  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2200  * lookahead.
2201  *
2202  * Arguments:
2203  *   char *start : buffer position (must be within PL_linestr)
2204  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2205  *   int check_keyword : if true, Perl checks to make sure the word isn't
2206  *       a keyword (do this if the word is a label, e.g. goto FOO)
2207  *   int allow_pack : if true, : characters will also be allowed (require,
2208  *       use, etc. do this)
2209  *   int allow_initial_tick : used by the "sub" lexer only.
2210  */
2211
2212 STATIC char *
2213 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2214 {
2215     dVAR;
2216     char *s;
2217     STRLEN len;
2218
2219     PERL_ARGS_ASSERT_FORCE_WORD;
2220
2221     start = SKIPSPACE1(start);
2222     s = start;
2223     if (isIDFIRST_lazy_if(s,UTF) ||
2224         (allow_pack && *s == ':') )
2225     {
2226         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2227         if (check_keyword) {
2228           char *s2 = PL_tokenbuf;
2229           if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2230             s2 += 6, len -= 6;
2231           if (keyword(s2, len, 0))
2232             return start;
2233         }
2234         start_force(PL_curforce);
2235         if (PL_madskills)
2236             curmad('X', newSVpvn(start,s-start));
2237         if (token == METHOD) {
2238             s = SKIPSPACE1(s);
2239             if (*s == '(')
2240                 PL_expect = XTERM;
2241             else {
2242                 PL_expect = XOPERATOR;
2243             }
2244         }
2245         if (PL_madskills)
2246             curmad('g', newSVpvs( "forced" ));
2247         NEXTVAL_NEXTTOKE.opval
2248             = (OP*)newSVOP(OP_CONST,0,
2249                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2250         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2251         force_next(token);
2252     }
2253     return s;
2254 }
2255
2256 /*
2257  * S_force_ident
2258  * Called when the lexer wants $foo *foo &foo etc, but the program
2259  * text only contains the "foo" portion.  The first argument is a pointer
2260  * to the "foo", and the second argument is the type symbol to prefix.
2261  * Forces the next token to be a "WORD".
2262  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2263  */
2264
2265 STATIC void
2266 S_force_ident(pTHX_ const char *s, int kind)
2267 {
2268     dVAR;
2269
2270     PERL_ARGS_ASSERT_FORCE_IDENT;
2271
2272     if (s[0]) {
2273         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2274         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2275                                                                 UTF ? SVf_UTF8 : 0));
2276         start_force(PL_curforce);
2277         NEXTVAL_NEXTTOKE.opval = o;
2278         force_next(WORD);
2279         if (kind) {
2280             o->op_private = OPpCONST_ENTERED;
2281             /* XXX see note in pp_entereval() for why we forgo typo
2282                warnings if the symbol must be introduced in an eval.
2283                GSAR 96-10-12 */
2284             gv_fetchpvn_flags(s, len,
2285                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2286                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2287                               kind == '$' ? SVt_PV :
2288                               kind == '@' ? SVt_PVAV :
2289                               kind == '%' ? SVt_PVHV :
2290                               SVt_PVGV
2291                               );
2292         }
2293     }
2294 }
2295
2296 static void
2297 S_force_ident_maybe_lex(pTHX_ char pit)
2298 {
2299     start_force(PL_curforce);
2300     NEXTVAL_NEXTTOKE.ival = pit;
2301     force_next('p');
2302 }
2303
2304 NV
2305 Perl_str_to_version(pTHX_ SV *sv)
2306 {
2307     NV retval = 0.0;
2308     NV nshift = 1.0;
2309     STRLEN len;
2310     const char *start = SvPV_const(sv,len);
2311     const char * const end = start + len;
2312     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2313
2314     PERL_ARGS_ASSERT_STR_TO_VERSION;
2315
2316     while (start < end) {
2317         STRLEN skip;
2318         UV n;
2319         if (utf)
2320             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2321         else {
2322             n = *(U8*)start;
2323             skip = 1;
2324         }
2325         retval += ((NV)n)/nshift;
2326         start += skip;
2327         nshift *= 1000;
2328     }
2329     return retval;
2330 }
2331
2332 /*
2333  * S_force_version
2334  * Forces the next token to be a version number.
2335  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2336  * and if "guessing" is TRUE, then no new token is created (and the caller
2337  * must use an alternative parsing method).
2338  */
2339
2340 STATIC char *
2341 S_force_version(pTHX_ char *s, int guessing)
2342 {
2343     dVAR;
2344     OP *version = NULL;
2345     char *d;
2346 #ifdef PERL_MAD
2347     I32 startoff = s - SvPVX(PL_linestr);
2348 #endif
2349
2350     PERL_ARGS_ASSERT_FORCE_VERSION;
2351
2352     s = SKIPSPACE1(s);
2353
2354     d = s;
2355     if (*d == 'v')
2356         d++;
2357     if (isDIGIT(*d)) {
2358         while (isDIGIT(*d) || *d == '_' || *d == '.')
2359             d++;
2360 #ifdef PERL_MAD
2361         if (PL_madskills) {
2362             start_force(PL_curforce);
2363             curmad('X', newSVpvn(s,d-s));
2364         }
2365 #endif
2366         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2367             SV *ver;
2368 #ifdef USE_LOCALE_NUMERIC
2369             char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2370             setlocale(LC_NUMERIC, "C");
2371 #endif
2372             s = scan_num(s, &pl_yylval);
2373 #ifdef USE_LOCALE_NUMERIC
2374             setlocale(LC_NUMERIC, loc);
2375             Safefree(loc);
2376 #endif
2377             version = pl_yylval.opval;
2378             ver = cSVOPx(version)->op_sv;
2379             if (SvPOK(ver) && !SvNIOK(ver)) {
2380                 SvUPGRADE(ver, SVt_PVNV);
2381                 SvNV_set(ver, str_to_version(ver));
2382                 SvNOK_on(ver);          /* hint that it is a version */
2383             }
2384         }
2385         else if (guessing) {
2386 #ifdef PERL_MAD
2387             if (PL_madskills) {
2388                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2389                 PL_nextwhite = 0;
2390                 s = SvPVX(PL_linestr) + startoff;
2391             }
2392 #endif
2393             return s;
2394         }
2395     }
2396
2397 #ifdef PERL_MAD
2398     if (PL_madskills && !version) {
2399         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2400         PL_nextwhite = 0;
2401         s = SvPVX(PL_linestr) + startoff;
2402     }
2403 #endif
2404     /* NOTE: The parser sees the package name and the VERSION swapped */
2405     start_force(PL_curforce);
2406     NEXTVAL_NEXTTOKE.opval = version;
2407     force_next(WORD);
2408
2409     return s;
2410 }
2411
2412 /*
2413  * S_force_strict_version
2414  * Forces the next token to be a version number using strict syntax rules.
2415  */
2416
2417 STATIC char *
2418 S_force_strict_version(pTHX_ char *s)
2419 {
2420     dVAR;
2421     OP *version = NULL;
2422 #ifdef PERL_MAD
2423     I32 startoff = s - SvPVX(PL_linestr);
2424 #endif
2425     const char *errstr = NULL;
2426
2427     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2428
2429     while (isSPACE(*s)) /* leading whitespace */
2430         s++;
2431
2432     if (is_STRICT_VERSION(s,&errstr)) {
2433         SV *ver = newSV(0);
2434         s = (char *)scan_version(s, ver, 0);
2435         version = newSVOP(OP_CONST, 0, ver);
2436     }
2437     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2438             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2439     {
2440         PL_bufptr = s;
2441         if (errstr)
2442             yyerror(errstr); /* version required */
2443         return s;
2444     }
2445
2446 #ifdef PERL_MAD
2447     if (PL_madskills && !version) {
2448         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2449         PL_nextwhite = 0;
2450         s = SvPVX(PL_linestr) + startoff;
2451     }
2452 #endif
2453     /* NOTE: The parser sees the package name and the VERSION swapped */
2454     start_force(PL_curforce);
2455     NEXTVAL_NEXTTOKE.opval = version;
2456     force_next(WORD);
2457
2458     return s;
2459 }
2460
2461 /*
2462  * S_tokeq
2463  * Tokenize a quoted string passed in as an SV.  It finds the next
2464  * chunk, up to end of string or a backslash.  It may make a new
2465  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2466  * turns \\ into \.
2467  */
2468
2469 STATIC SV *
2470 S_tokeq(pTHX_ SV *sv)
2471 {
2472     dVAR;
2473     char *s;
2474     char *send;
2475     char *d;
2476     STRLEN len = 0;
2477     SV *pv = sv;
2478
2479     PERL_ARGS_ASSERT_TOKEQ;
2480
2481     if (!SvLEN(sv))
2482         goto finish;
2483
2484     s = SvPV_force(sv, len);
2485     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2486         goto finish;
2487     send = s + len;
2488     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2489     while (s < send && !(*s == '\\' && s[1] == '\\'))
2490         s++;
2491     if (s == send)
2492         goto finish;
2493     d = s;
2494     if ( PL_hints & HINT_NEW_STRING ) {
2495         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2496     }
2497     while (s < send) {
2498         if (*s == '\\') {
2499             if (s + 1 < send && (s[1] == '\\'))
2500                 s++;            /* all that, just for this */
2501         }
2502         *d++ = *s++;
2503     }
2504     *d = '\0';
2505     SvCUR_set(sv, d - SvPVX_const(sv));
2506   finish:
2507     if ( PL_hints & HINT_NEW_STRING )
2508        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2509     return sv;
2510 }
2511
2512 /*
2513  * Now come three functions related to double-quote context,
2514  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2515  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2516  * interact with PL_lex_state, and create fake ( ... ) argument lists
2517  * to handle functions and concatenation.
2518  * For example,
2519  *   "foo\lbar"
2520  * is tokenised as
2521  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2522  */
2523
2524 /*
2525  * S_sublex_start
2526  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2527  *
2528  * Pattern matching will set PL_lex_op to the pattern-matching op to
2529  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2530  *
2531  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2532  *
2533  * Everything else becomes a FUNC.
2534  *
2535  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2536  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2537  * call to S_sublex_push().
2538  */
2539
2540 STATIC I32
2541 S_sublex_start(pTHX)
2542 {
2543     dVAR;
2544     const I32 op_type = pl_yylval.ival;
2545
2546     if (op_type == OP_NULL) {
2547         pl_yylval.opval = PL_lex_op;
2548         PL_lex_op = NULL;
2549         return THING;
2550     }
2551     if (op_type == OP_CONST || op_type == OP_READLINE) {
2552         SV *sv = tokeq(PL_lex_stuff);
2553
2554         if (SvTYPE(sv) == SVt_PVIV) {
2555             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2556             STRLEN len;
2557             const char * const p = SvPV_const(sv, len);
2558             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2559             SvREFCNT_dec(sv);
2560             sv = nsv;
2561         }
2562         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2563         PL_lex_stuff = NULL;
2564         /* Allow <FH> // "foo" */
2565         if (op_type == OP_READLINE)
2566             PL_expect = XTERMORDORDOR;
2567         return THING;
2568     }
2569     else if (op_type == OP_BACKTICK && PL_lex_op) {
2570         /* readpipe() vas overriden */
2571         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2572         pl_yylval.opval = PL_lex_op;
2573         PL_lex_op = NULL;
2574         PL_lex_stuff = NULL;
2575         return THING;
2576     }
2577
2578     PL_sublex_info.super_state = PL_lex_state;
2579     PL_sublex_info.sub_inwhat = (U16)op_type;
2580     PL_sublex_info.sub_op = PL_lex_op;
2581     PL_lex_state = LEX_INTERPPUSH;
2582
2583     PL_expect = XTERM;
2584     if (PL_lex_op) {
2585         pl_yylval.opval = PL_lex_op;
2586         PL_lex_op = NULL;
2587         return PMFUNC;
2588     }
2589     else
2590         return FUNC;
2591 }
2592
2593 /*
2594  * S_sublex_push
2595  * Create a new scope to save the lexing state.  The scope will be
2596  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2597  * to the uc, lc, etc. found before.
2598  * Sets PL_lex_state to LEX_INTERPCONCAT.
2599  */
2600
2601 STATIC I32
2602 S_sublex_push(pTHX)
2603 {
2604     dVAR;
2605     LEXSHARED *shared;
2606     ENTER;
2607
2608     PL_lex_state = PL_sublex_info.super_state;
2609     SAVEBOOL(PL_lex_dojoin);
2610     SAVEI32(PL_lex_brackets);
2611     SAVEI32(PL_lex_allbrackets);
2612     SAVEI32(PL_lex_formbrack);
2613     SAVEI8(PL_lex_fakeeof);
2614     SAVEI32(PL_lex_casemods);
2615     SAVEI32(PL_lex_starts);
2616     SAVEI8(PL_lex_state);
2617     SAVESPTR(PL_lex_repl);
2618     SAVEVPTR(PL_lex_inpat);
2619     SAVEI16(PL_lex_inwhat);
2620     SAVECOPLINE(PL_curcop);
2621     SAVEPPTR(PL_bufptr);
2622     SAVEPPTR(PL_bufend);
2623     SAVEPPTR(PL_oldbufptr);
2624     SAVEPPTR(PL_oldoldbufptr);
2625     SAVEPPTR(PL_last_lop);
2626     SAVEPPTR(PL_last_uni);
2627     SAVEPPTR(PL_linestart);
2628     SAVESPTR(PL_linestr);
2629     SAVEGENERICPV(PL_lex_brackstack);
2630     SAVEGENERICPV(PL_lex_casestack);
2631     SAVEGENERICPV(PL_parser->lex_shared);
2632     SAVEBOOL(PL_parser->lex_re_reparsing);
2633
2634     /* The here-doc parser needs to be able to peek into outer lexing
2635        scopes to find the body of the here-doc.  So we put PL_linestr and
2636        PL_bufptr into lex_shared, to ‘share’ those values.
2637      */
2638     PL_parser->lex_shared->ls_linestr = PL_linestr;
2639     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2640
2641     PL_linestr = PL_lex_stuff;
2642     PL_lex_repl = PL_sublex_info.repl;
2643     PL_lex_stuff = NULL;
2644     PL_sublex_info.repl = NULL;
2645
2646     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2647         = SvPVX(PL_linestr);
2648     PL_bufend += SvCUR(PL_linestr);
2649     PL_last_lop = PL_last_uni = NULL;
2650     SAVEFREESV(PL_linestr);
2651     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2652
2653     PL_lex_dojoin = FALSE;
2654     PL_lex_brackets = PL_lex_formbrack = 0;
2655     PL_lex_allbrackets = 0;
2656     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2657     Newx(PL_lex_brackstack, 120, char);
2658     Newx(PL_lex_casestack, 12, char);
2659     PL_lex_casemods = 0;
2660     *PL_lex_casestack = '\0';
2661     PL_lex_starts = 0;
2662     PL_lex_state = LEX_INTERPCONCAT;
2663     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2664
2665     Newxz(shared, 1, LEXSHARED);
2666     shared->ls_prev = PL_parser->lex_shared;
2667     PL_parser->lex_shared = shared;
2668
2669     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2670     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2671     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2672         PL_lex_inpat = PL_sublex_info.sub_op;
2673     else
2674         PL_lex_inpat = NULL;
2675
2676     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2677     PL_in_eval &= ~EVAL_RE_REPARSING;
2678
2679     return '(';
2680 }
2681
2682 /*
2683  * S_sublex_done
2684  * Restores lexer state after a S_sublex_push.
2685  */
2686
2687 STATIC I32
2688 S_sublex_done(pTHX)
2689 {
2690     dVAR;
2691     if (!PL_lex_starts++) {
2692         SV * const sv = newSVpvs("");
2693         if (SvUTF8(PL_linestr))
2694             SvUTF8_on(sv);
2695         PL_expect = XOPERATOR;
2696         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2697         return THING;
2698     }
2699
2700     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2701         PL_lex_state = LEX_INTERPCASEMOD;
2702         return yylex();
2703     }
2704
2705     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2706     assert(PL_lex_inwhat != OP_TRANSR);
2707     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2708         PL_linestr = PL_lex_repl;
2709         PL_lex_inpat = 0;
2710         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2711         PL_bufend += SvCUR(PL_linestr);
2712         PL_last_lop = PL_last_uni = NULL;
2713         PL_lex_dojoin = FALSE;
2714         PL_lex_brackets = 0;
2715         PL_lex_allbrackets = 0;
2716         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2717         PL_lex_casemods = 0;
2718         *PL_lex_casestack = '\0';
2719         PL_lex_starts = 0;
2720         if (SvEVALED(PL_lex_repl)) {
2721             PL_lex_state = LEX_INTERPNORMAL;
2722             PL_lex_starts++;
2723             /*  we don't clear PL_lex_repl here, so that we can check later
2724                 whether this is an evalled subst; that means we rely on the
2725                 logic to ensure sublex_done() is called again only via the
2726                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2727         }
2728         else {
2729             PL_lex_state = LEX_INTERPCONCAT;
2730             PL_lex_repl = NULL;
2731         }
2732         return ',';
2733     }
2734     else {
2735 #ifdef PERL_MAD
2736         if (PL_madskills) {
2737             if (PL_thiswhite) {
2738                 if (!PL_endwhite)
2739                     PL_endwhite = newSVpvs("");
2740                 sv_catsv(PL_endwhite, PL_thiswhite);
2741                 PL_thiswhite = 0;
2742             }
2743             if (PL_thistoken)
2744                 sv_setpvs(PL_thistoken,"");
2745             else
2746                 PL_realtokenstart = -1;
2747         }
2748 #endif
2749         LEAVE;
2750         PL_bufend = SvPVX(PL_linestr);
2751         PL_bufend += SvCUR(PL_linestr);
2752         PL_expect = XOPERATOR;
2753         PL_sublex_info.sub_inwhat = 0;
2754         return ')';
2755     }
2756 }
2757
2758 PERL_STATIC_INLINE SV*
2759 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2760 {
2761     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2762      * interior, hence to the "}".  Finds what the name resolves to, returning
2763      * an SV* containing it; NULL if no valid one found */
2764
2765     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2766
2767     HV * table;
2768     SV **cvp;
2769     SV *cv;
2770     SV *rv;
2771     HV *stash;
2772     const U8* first_bad_char_loc;
2773     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2774
2775     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2776
2777     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2778                                      e - backslash_ptr,
2779                                      &first_bad_char_loc))
2780     {
2781         /* If warnings are on, this will print a more detailed analysis of what
2782          * is wrong than the error message below */
2783         utf8n_to_uvuni(first_bad_char_loc,
2784                        e - ((char *) first_bad_char_loc),
2785                        NULL, 0);
2786
2787         /* We deliberately don't try to print the malformed character, which
2788          * might not print very well; it also may be just the first of many
2789          * malformations, so don't print what comes after it */
2790         yyerror(Perl_form(aTHX_
2791             "Malformed UTF-8 character immediately after '%.*s'",
2792             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2793         return NULL;
2794     }
2795
2796     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2797                         /* include the <}> */
2798                         e - backslash_ptr + 1);
2799     if (! SvPOK(res)) {
2800         SvREFCNT_dec_NN(res);
2801         return NULL;
2802     }
2803
2804     /* See if the charnames handler is the Perl core's, and if so, we can skip
2805      * the validation needed for a user-supplied one, as Perl's does its own
2806      * validation. */
2807     table = GvHV(PL_hintgv);             /* ^H */
2808     cvp = hv_fetchs(table, "charnames", FALSE);
2809     if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
2810         && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
2811     {
2812         const char * const name = HvNAME(stash);
2813         if strEQ(name, "_charnames") {
2814            return res;
2815        }
2816     }
2817
2818     /* Here, it isn't Perl's charname handler.  We can't rely on a
2819      * user-supplied handler to validate the input name.  For non-ut8 input,
2820      * look to see that the first character is legal.  Then loop through the
2821      * rest checking that each is a continuation */
2822
2823     /* This code needs to be sync'ed with a regex in _charnames.pm which does
2824      * the same thing */
2825
2826     if (! UTF) {
2827         if (! isALPHAU(*s)) {
2828             goto bad_charname;
2829         }
2830         s++;
2831         while (s < e) {
2832             if (! isCHARNAME_CONT(*s)) {
2833                 goto bad_charname;
2834             }
2835             if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2836                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2837                            "A sequence of multiple spaces in a charnames "
2838                            "alias definition is deprecated");
2839             }
2840             s++;
2841         }
2842         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2843             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2844                         "Trailing white-space in a charnames alias "
2845                         "definition is deprecated");
2846         }
2847     }
2848     else {
2849         /* Similarly for utf8.  For invariants can check directly; for other
2850          * Latin1, can calculate their code point and check; otherwise  use a
2851          * swash */
2852         if (UTF8_IS_INVARIANT(*s)) {
2853             if (! isALPHAU(*s)) {
2854                 goto bad_charname;
2855             }
2856             s++;
2857         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2858             if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
2859                 goto bad_charname;
2860             }
2861             s += 2;
2862         }
2863         else {
2864             if (! PL_utf8_charname_begin) {
2865                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2866                 PL_utf8_charname_begin = _core_swash_init("utf8",
2867                                                         "_Perl_Charname_Begin",
2868                                                         &PL_sv_undef,
2869                                                         1, 0, NULL, &flags);
2870             }
2871             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2872                 goto bad_charname;
2873             }
2874             s += UTF8SKIP(s);
2875         }
2876
2877         while (s < e) {
2878             if (UTF8_IS_INVARIANT(*s)) {
2879                 if (! isCHARNAME_CONT(*s)) {
2880                     goto bad_charname;
2881                 }
2882                 if (*s == ' ' && *(s-1) == ' '
2883                  && ckWARN_d(WARN_DEPRECATED)) {
2884                     Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2885                                "A sequence of multiple spaces in a charnam"
2886                                "es alias definition is deprecated");
2887                 }
2888                 s++;
2889             }
2890             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2891                 if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
2892                                                                     *(s+1)))))
2893                 {
2894                     goto bad_charname;
2895                 }
2896                 s += 2;
2897             }
2898             else {
2899                 if (! PL_utf8_charname_continue) {
2900                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2901                     PL_utf8_charname_continue = _core_swash_init("utf8",
2902                                                 "_Perl_Charname_Continue",
2903                                                 &PL_sv_undef,
2904                                                 1, 0, NULL, &flags);
2905                 }
2906                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2907                     goto bad_charname;
2908                 }
2909                 s += UTF8SKIP(s);
2910             }
2911         }
2912         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2913             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2914                        "Trailing white-space in a charnames alias "
2915                        "definition is deprecated");
2916         }
2917     }
2918
2919     if (SvUTF8(res)) { /* Don't accept malformed input */
2920         const U8* first_bad_char_loc;
2921         STRLEN len;
2922         const char* const str = SvPV_const(res, len);
2923         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2924             /* If warnings are on, this will print a more detailed analysis of
2925              * what is wrong than the error message below */
2926             utf8n_to_uvuni(first_bad_char_loc,
2927                            (char *) first_bad_char_loc - str,
2928                            NULL, 0);
2929
2930             /* We deliberately don't try to print the malformed character,
2931              * which might not print very well; it also may be just the first
2932              * of many malformations, so don't print what comes after it */
2933             yyerror_pv(
2934               Perl_form(aTHX_
2935                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2936                  (int) (e - backslash_ptr + 1), backslash_ptr,
2937                  (int) ((char *) first_bad_char_loc - str), str
2938               ),
2939               SVf_UTF8);
2940             return NULL;
2941         }
2942     }
2943
2944     return res;
2945
2946   bad_charname: {
2947         int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2948
2949         /* The final %.*s makes sure that should the trailing NUL be missing
2950          * that this print won't run off the end of the string */
2951         yyerror_pv(
2952           Perl_form(aTHX_
2953             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2954             (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2955             (int)(e - s + bad_char_size), s + bad_char_size
2956           ),
2957           UTF ? SVf_UTF8 : 0);
2958         return NULL;
2959     }
2960 }
2961
2962 /*
2963   scan_const
2964
2965   Extracts the next constant part of a pattern, double-quoted string,
2966   or transliteration.  This is terrifying code.
2967
2968   For example, in parsing the double-quoted string "ab\x63$d", it would
2969   stop at the '$' and return an OP_CONST containing 'abc'.
2970
2971   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2972   processing a pattern (PL_lex_inpat is true), a transliteration
2973   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2974
2975   Returns a pointer to the character scanned up to. If this is
2976   advanced from the start pointer supplied (i.e. if anything was
2977   successfully parsed), will leave an OP_CONST for the substring scanned
2978   in pl_yylval. Caller must intuit reason for not parsing further
2979   by looking at the next characters herself.
2980
2981   In patterns:
2982     expand:
2983       \N{FOO}  => \N{U+hex_for_character_FOO}
2984       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2985
2986     pass through:
2987         all other \-char, including \N and \N{ apart from \N{ABC}
2988
2989     stops on:
2990         @ and $ where it appears to be a var, but not for $ as tail anchor
2991         \l \L \u \U \Q \E
2992         (?{  or  (??{
2993
2994
2995   In transliterations:
2996     characters are VERY literal, except for - not at the start or end
2997     of the string, which indicates a range. If the range is in bytes,
2998     scan_const expands the range to the full set of intermediate
2999     characters. If the range is in utf8, the hyphen is replaced with
3000     a certain range mark which will be handled by pmtrans() in op.c.
3001
3002   In double-quoted strings:
3003     backslashes:
3004       double-quoted style: \r and \n
3005       constants: \x31, etc.
3006       deprecated backrefs: \1 (in substitution replacements)
3007       case and quoting: \U \Q \E
3008     stops on @ and $
3009
3010   scan_const does *not* construct ops to handle interpolated strings.
3011   It stops processing as soon as it finds an embedded $ or @ variable
3012   and leaves it to the caller to work out what's going on.
3013
3014   embedded arrays (whether in pattern or not) could be:
3015       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3016
3017   $ in double-quoted strings must be the symbol of an embedded scalar.
3018
3019   $ in pattern could be $foo or could be tail anchor.  Assumption:
3020   it's a tail anchor if $ is the last thing in the string, or if it's
3021   followed by one of "()| \r\n\t"
3022
3023   \1 (backreferences) are turned into $1 in substitutions
3024
3025   The structure of the code is
3026       while (there's a character to process) {
3027           handle transliteration ranges
3028           skip regexp comments /(?#comment)/ and codes /(?{code})/
3029           skip #-initiated comments in //x patterns
3030           check for embedded arrays
3031           check for embedded scalars
3032           if (backslash) {
3033               deprecate \1 in substitution replacements
3034               handle string-changing backslashes \l \U \Q \E, etc.
3035               switch (what was escaped) {
3036                   handle \- in a transliteration (becomes a literal -)
3037                   if a pattern and not \N{, go treat as regular character
3038                   handle \132 (octal characters)
3039                   handle \x15 and \x{1234} (hex characters)
3040                   handle \N{name} (named characters, also \N{3,5} in a pattern)
3041                   handle \cV (control characters)
3042                   handle printf-style backslashes (\f, \r, \n, etc)
3043               } (end switch)
3044               continue
3045           } (end if backslash)
3046           handle regular character
3047     } (end while character to read)
3048
3049 */
3050
3051 STATIC char *
3052 S_scan_const(pTHX_ char *start)
3053 {
3054     dVAR;
3055     char *send = PL_bufend;             /* end of the constant */
3056     SV *sv = newSV(send - start);               /* sv for the constant.  See
3057                                                    note below on sizing. */
3058     char *s = start;                    /* start of the constant */
3059     char *d = SvPVX(sv);                /* destination for copies */
3060     bool dorange = FALSE;                       /* are we in a translit range? */
3061     bool didrange = FALSE;                      /* did we just finish a range? */
3062     bool in_charclass = FALSE;                  /* within /[...]/ */
3063     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
3064     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
3065                                                    to be UTF8?  But, this can
3066                                                    show as true when the source
3067                                                    isn't utf8, as for example
3068                                                    when it is entirely composed
3069                                                    of hex constants */
3070     SV *res;                            /* result from charnames */
3071
3072     /* Note on sizing:  The scanned constant is placed into sv, which is
3073      * initialized by newSV() assuming one byte of output for every byte of
3074      * input.  This routine expects newSV() to allocate an extra byte for a
3075      * trailing NUL, which this routine will append if it gets to the end of
3076      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3077      * CAPITAL LETTER A}), or more output than input if the constant ends up
3078      * recoded to utf8, but each time a construct is found that might increase
3079      * the needed size, SvGROW() is called.  Its size parameter each time is
3080      * based on the best guess estimate at the time, namely the length used so
3081      * far, plus the length the current construct will occupy, plus room for
3082      * the trailing NUL, plus one byte for every input byte still unscanned */
3083
3084     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3085                        before set */
3086 #ifdef EBCDIC
3087     UV literal_endpoint = 0;
3088     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3089 #endif
3090
3091     PERL_ARGS_ASSERT_SCAN_CONST;
3092
3093     assert(PL_lex_inwhat != OP_TRANSR);
3094     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3095         /* If we are doing a trans and we know we want UTF8 set expectation */
3096         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3097         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3098     }
3099
3100     /* Protect sv from errors and fatal warnings. */
3101     ENTER_with_name("scan_const");
3102     SAVEFREESV(sv);
3103
3104     while (s < send || dorange) {
3105
3106         /* get transliterations out of the way (they're most literal) */
3107         if (PL_lex_inwhat == OP_TRANS) {
3108             /* expand a range A-Z to the full set of characters.  AIE! */
3109             if (dorange) {
3110                 I32 i;                          /* current expanded character */
3111                 I32 min;                        /* first character in range */
3112                 I32 max;                        /* last character in range */
3113
3114 #ifdef EBCDIC
3115                 UV uvmax = 0;
3116 #endif
3117
3118                 if (has_utf8
3119 #ifdef EBCDIC
3120                     && !native_range
3121 #endif
3122                 ) {
3123                     char * const c = (char*)utf8_hop((U8*)d, -1);
3124                     char *e = d++;
3125                     while (e-- > c)
3126                         *(e + 1) = *e;
3127                     *c = (char)UTF_TO_NATIVE(0xff);
3128                     /* mark the range as done, and continue */
3129                     dorange = FALSE;
3130                     didrange = TRUE;
3131                     continue;
3132                 }
3133
3134                 i = d - SvPVX_const(sv);                /* remember current offset */
3135 #ifdef EBCDIC
3136                 SvGROW(sv,
3137                        SvLEN(sv) + (has_utf8 ?
3138                                     (512 - UTF_CONTINUATION_MARK +
3139                                      UNISKIP(0x100))
3140                                     : 256));
3141                 /* How many two-byte within 0..255: 128 in UTF-8,
3142                  * 96 in UTF-8-mod. */
3143 #else
3144                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
3145 #endif
3146                 d = SvPVX(sv) + i;              /* refresh d after realloc */
3147 #ifdef EBCDIC
3148                 if (has_utf8) {
3149                     int j;
3150                     for (j = 0; j <= 1; j++) {
3151                         char * const c = (char*)utf8_hop((U8*)d, -1);
3152                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3153                         if (j)
3154                             min = (U8)uv;
3155                         else if (uv < 256)
3156                             max = (U8)uv;
3157                         else {
3158                             max = (U8)0xff; /* only to \xff */
3159                             uvmax = uv; /* \x{100} to uvmax */
3160                         }
3161                         d = c; /* eat endpoint chars */
3162                      }
3163                 }
3164                else {
3165 #endif
3166                    d -= 2;              /* eat the first char and the - */
3167                    min = (U8)*d;        /* first char in range */
3168                    max = (U8)d[1];      /* last char in range  */
3169 #ifdef EBCDIC
3170                }
3171 #endif
3172
3173                 if (min > max) {
3174                     Perl_croak(aTHX_
3175                                "Invalid range \"%c-%c\" in transliteration operator",
3176                                (char)min, (char)max);
3177                 }
3178
3179 #ifdef EBCDIC
3180                 if (literal_endpoint == 2 &&
3181                     ((isLOWER(min) && isLOWER(max)) ||
3182                      (isUPPER(min) && isUPPER(max)))) {
3183                     if (isLOWER(min)) {
3184                         for (i = min; i <= max; i++)
3185                             if (isLOWER(i))
3186                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
3187                     } else {
3188                         for (i = min; i <= max; i++)
3189                             if (isUPPER(i))
3190                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
3191                     }
3192                 }
3193                 else
3194 #endif
3195                     for (i = min; i <= max; i++)
3196 #ifdef EBCDIC
3197                         if (has_utf8) {
3198                             const U8 ch = (U8)NATIVE_TO_UTF(i);
3199                             if (UNI_IS_INVARIANT(ch))
3200                                 *d++ = (U8)i;
3201                             else {
3202                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
3203                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
3204                             }
3205                         }
3206                         else
3207 #endif
3208                             *d++ = (char)i;
3209
3210 #ifdef EBCDIC
3211                 if (uvmax) {
3212                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3213                     if (uvmax > 0x101)
3214                         *d++ = (char)UTF_TO_NATIVE(0xff);
3215                     if (uvmax > 0x100)
3216                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3217                 }
3218 #endif
3219
3220                 /* mark the range as done, and continue */
3221                 dorange = FALSE;
3222                 didrange = TRUE;
3223 #ifdef EBCDIC
3224                 literal_endpoint = 0;
3225 #endif
3226                 continue;
3227             }
3228
3229             /* range begins (ignore - as first or last char) */
3230             else if (*s == '-' && s+1 < send  && s != start) {
3231                 if (didrange) {
3232                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3233                 }
3234                 if (has_utf8
3235 #ifdef EBCDIC
3236                     && !native_range
3237 #endif
3238                     ) {
3239                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
3240                     s++;
3241                     continue;
3242                 }
3243                 dorange = TRUE;
3244                 s++;
3245             }
3246             else {
3247                 didrange = FALSE;
3248 #ifdef EBCDIC
3249                 literal_endpoint = 0;
3250                 native_range = TRUE;
3251 #endif
3252             }
3253         }
3254
3255         /* if we get here, we're not doing a transliteration */
3256
3257         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3258             char *s1 = s-1;
3259             int esc = 0;
3260             while (s1 >= start && *s1-- == '\\')
3261                 esc = !esc;
3262             if (!esc)
3263                 in_charclass = TRUE;
3264         }
3265
3266         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
3267             char *s1 = s-1;
3268             int esc = 0;
3269             while (s1 >= start && *s1-- == '\\')
3270                 esc = !esc;
3271             if (!esc)
3272                 in_charclass = FALSE;
3273         }
3274
3275         /* skip for regexp comments /(?#comment)/, except for the last
3276          * char, which will be done separately.
3277          * Stop on (?{..}) and friends */
3278
3279         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
3280             if (s[2] == '#') {
3281                 while (s+1 < send && *s != ')')
3282                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3283             }
3284             else if (!PL_lex_casemods && !in_charclass &&
3285                      (    s[2] == '{' /* This should match regcomp.c */
3286                       || (s[2] == '?' && s[3] == '{')))
3287             {
3288                 break;
3289             }
3290         }
3291
3292         /* likewise skip #-initiated comments in //x patterns */
3293         else if (*s == '#' && PL_lex_inpat &&
3294           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3295             while (s+1 < send && *s != '\n')
3296                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3297         }
3298
3299         /* no further processing of single-quoted regex */
3300         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3301             goto default_action;
3302
3303         /* check for embedded arrays
3304            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3305            */
3306         else if (*s == '@' && s[1]) {
3307             if (isWORDCHAR_lazy_if(s+1,UTF))
3308                 break;
3309             if (strchr(":'{$", s[1]))
3310                 break;
3311             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3312                 break; /* in regexp, neither @+ nor @- are interpolated */
3313         }
3314
3315         /* check for embedded scalars.  only stop if we're sure it's a
3316            variable.
3317         */
3318         else if (*s == '$') {
3319             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3320                 break;
3321             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3322                 if (s[1] == '\\') {
3323                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3324                                    "Possible unintended interpolation of $\\ in regex");
3325                 }
3326                 break;          /* in regexp, $ might be tail anchor */
3327             }
3328         }
3329
3330         /* End of else if chain - OP_TRANS rejoin rest */
3331
3332         /* backslashes */
3333         if (*s == '\\' && s+1 < send) {
3334             char* e;    /* Can be used for ending '}', etc. */
3335
3336             s++;
3337
3338             /* warn on \1 - \9 in substitution replacements, but note that \11
3339              * is an octal; and \19 is \1 followed by '9' */
3340             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3341                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3342             {
3343                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3344                 *--s = '$';
3345                 break;
3346             }
3347
3348             /* string-change backslash escapes */
3349             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3350                 --s;
3351                 break;
3352             }
3353             /* In a pattern, process \N, but skip any other backslash escapes.
3354              * This is because we don't want to translate an escape sequence
3355              * into a meta symbol and have the regex compiler use the meta
3356              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3357              * in spite of this, we do have to process \N here while the proper
3358              * charnames handler is in scope.  See bugs #56444 and #62056.
3359              * There is a complication because \N in a pattern may also stand
3360              * for 'match a non-nl', and not mean a charname, in which case its
3361              * processing should be deferred to the regex compiler.  To be a
3362              * charname it must be followed immediately by a '{', and not look
3363              * like \N followed by a curly quantifier, i.e., not something like
3364              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3365              * quantifier */
3366             else if (PL_lex_inpat
3367                     && (*s != 'N'
3368                         || s[1] != '{'
3369                         || regcurly(s + 1, FALSE)))
3370             {
3371                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3372                 goto default_action;
3373             }
3374
3375             switch (*s) {
3376
3377             /* quoted - in transliterations */
3378             case '-':
3379                 if (PL_lex_inwhat == OP_TRANS) {
3380                     *d++ = *s++;
3381                     continue;
3382                 }
3383                 /* FALL THROUGH */
3384             default:
3385                 {
3386                     if ((isALPHANUMERIC(*s)))
3387                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3388                                        "Unrecognized escape \\%c passed through",
3389                                        *s);
3390                     /* default action is to copy the quoted character */
3391                     goto default_action;
3392                 }
3393
3394             /* eg. \132 indicates the octal constant 0132 */
3395             case '0': case '1': case '2': case '3':
3396             case '4': case '5': case '6': case '7':
3397                 {
3398                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3399                     STRLEN len = 3;
3400                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3401                     s += len;
3402                     if (len < 3 && s < send && isDIGIT(*s)
3403                         && ckWARN(WARN_MISC))
3404                     {
3405                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3406                                     "%s", form_short_octal_warning(s, len));
3407                     }
3408                 }
3409                 goto NUM_ESCAPE_INSERT;
3410
3411             /* eg. \o{24} indicates the octal constant \024 */
3412             case 'o':
3413                 {
3414                     const char* error;
3415
3416                     bool valid = grok_bslash_o(&s, &uv, &error,
3417                                                TRUE, /* Output warning */
3418                                                FALSE, /* Not strict */
3419                                                TRUE, /* Output warnings for
3420                                                          non-portables */
3421                                                UTF);
3422                     if (! valid) {
3423                         yyerror(error);
3424                         continue;
3425                     }
3426                     goto NUM_ESCAPE_INSERT;
3427                 }
3428
3429             /* eg. \x24 indicates the hex constant 0x24 */
3430             case 'x':
3431                 {
3432                     const char* error;
3433
3434                     bool valid = grok_bslash_x(&s, &uv, &error,
3435                                                TRUE, /* Output warning */
3436                                                FALSE, /* Not strict */
3437                                                TRUE,  /* Output warnings for
3438                                                          non-portables */
3439                                                UTF);
3440                     if (! valid) {
3441                         yyerror(error);
3442                         continue;
3443                     }
3444                 }
3445
3446               NUM_ESCAPE_INSERT:
3447                 /* Insert oct or hex escaped character.  There will always be
3448                  * enough room in sv since such escapes will be longer than any
3449                  * UTF-8 sequence they can end up as, except if they force us
3450                  * to recode the rest of the string into utf8 */
3451
3452                 /* Here uv is the ordinal of the next character being added in
3453                  * unicode (converted from native). */
3454                 if (!UNI_IS_INVARIANT(uv)) {
3455                     if (!has_utf8 && uv > 255) {
3456                         /* Might need to recode whatever we have accumulated so
3457                          * far if it contains any chars variant in utf8 or
3458                          * utf-ebcdic. */
3459
3460                         SvCUR_set(sv, d - SvPVX_const(sv));
3461                         SvPOK_on(sv);
3462                         *d = '\0';
3463                         /* See Note on sizing above.  */
3464                         sv_utf8_upgrade_flags_grow(sv,
3465                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3466                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3467                         d = SvPVX(sv) + SvCUR(sv);
3468                         has_utf8 = TRUE;
3469                     }
3470
3471                     if (has_utf8) {
3472                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3473                         if (PL_lex_inwhat == OP_TRANS &&
3474                             PL_sublex_info.sub_op) {
3475                             PL_sublex_info.sub_op->op_private |=
3476                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3477                                              : OPpTRANS_TO_UTF);
3478                         }
3479 #ifdef EBCDIC
3480                         if (uv > 255 && !dorange)
3481                             native_range = FALSE;
3482 #endif
3483                     }
3484                     else {
3485                         *d++ = (char)uv;
3486                     }
3487                 }
3488                 else {
3489                     *d++ = (char) uv;
3490                 }
3491                 continue;
3492
3493             case 'N':
3494                 /* In a non-pattern \N must be a named character, like \N{LATIN
3495                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3496                  * mean to match a non-newline.  For non-patterns, named
3497                  * characters are converted to their string equivalents. In
3498                  * patterns, named characters are not converted to their
3499                  * ultimate forms for the same reasons that other escapes
3500                  * aren't.  Instead, they are converted to the \N{U+...} form
3501                  * to get the value from the charnames that is in effect right
3502                  * now, while preserving the fact that it was a named character
3503                  * so that the regex compiler knows this */
3504
3505                 /* This section of code doesn't generally use the
3506                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3507                  * a close examination of this macro and determined it is a
3508                  * no-op except on utfebcdic variant characters.  Every
3509                  * character generated by this that would normally need to be
3510                  * enclosed by this macro is invariant, so the macro is not
3511                  * needed, and would complicate use of copy().  XXX There are
3512                  * other parts of this file where the macro is used
3513                  * inconsistently, but are saved by it being a no-op */
3514
3515                 /* The structure of this section of code (besides checking for
3516                  * errors and upgrading to utf8) is:
3517                  *  Further disambiguate between the two meanings of \N, and if
3518                  *      not a charname, go process it elsewhere
3519                  *  If of form \N{U+...}, pass it through if a pattern;
3520                  *      otherwise convert to utf8
3521                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3522                  *  pattern; otherwise convert to utf8 */
3523
3524                 /* Here, s points to the 'N'; the test below is guaranteed to
3525                  * succeed if we are being called on a pattern as we already
3526                  * know from a test above that the next character is a '{'.
3527                  * On a non-pattern \N must mean 'named sequence, which
3528                  * requires braces */
3529                 s++;
3530                 if (*s != '{') {
3531                     yyerror("Missing braces on \\N{}");
3532                     continue;
3533                 }
3534                 s++;
3535
3536                 /* If there is no matching '}', it is an error. */
3537                 if (! (e = strchr(s, '}'))) {
3538                     if (! PL_lex_inpat) {
3539                         yyerror("Missing right brace on \\N{}");
3540                     } else {
3541                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3542                     }
3543                     continue;
3544                 }
3545
3546                 /* Here it looks like a named character */
3547
3548                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3549                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3550                                 | PERL_SCAN_DISALLOW_PREFIX;
3551                     STRLEN len;
3552
3553                     /* For \N{U+...}, the '...' is a unicode value even on
3554                      * EBCDIC machines */
3555                     s += 2;         /* Skip to next char after the 'U+' */
3556                     len = e - s;
3557                     uv = grok_hex(s, &len, &flags, NULL);
3558                     if (len == 0 || len != (STRLEN)(e - s)) {
3559                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3560                         s = e + 1;
3561                         continue;
3562                     }
3563
3564                     if (PL_lex_inpat) {
3565
3566                         /* On non-EBCDIC platforms, pass through to the regex
3567                          * compiler unchanged.  The reason we evaluated the
3568                          * number above is to make sure there wasn't a syntax
3569                          * error.  But on EBCDIC we convert to native so
3570                          * downstream code can continue to assume it's native
3571                          */
3572                         s -= 5;     /* Include the '\N{U+' */
3573 #ifdef EBCDIC
3574                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3575                                                                and the \0 */
3576                                     "\\N{U+%X}",
3577                                     (unsigned int) UNI_TO_NATIVE(uv));
3578 #else
3579                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3580                         d += e - s + 1;
3581 #endif
3582                     }
3583                     else {  /* Not a pattern: convert the hex to string */
3584
3585                          /* If destination is not in utf8, unconditionally
3586                           * recode it to be so.  This is because \N{} implies
3587                           * Unicode semantics, and scalars have to be in utf8
3588                           * to guarantee those semantics */
3589                         if (! has_utf8) {
3590                             SvCUR_set(sv, d - SvPVX_const(sv));
3591                             SvPOK_on(sv);
3592                             *d = '\0';
3593                             /* See Note on sizing above.  */
3594                             sv_utf8_upgrade_flags_grow(
3595                                         sv,
3596                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3597                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3598                             d = SvPVX(sv) + SvCUR(sv);
3599                             has_utf8 = TRUE;
3600                         }
3601
3602                         /* Add the string to the output */
3603                         if (UNI_IS_INVARIANT(uv)) {
3604                             *d++ = (char) uv;
3605                         }
3606                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3607                     }
3608                 }
3609                 else /* Here is \N{NAME} but not \N{U+...}. */
3610                      if ((res = get_and_check_backslash_N_name(s, e)))
3611                 {
3612                     STRLEN len;
3613                     const char *str = SvPV_const(res, len);
3614                     if (PL_lex_inpat) {
3615
3616                         if (! len) { /* The name resolved to an empty string */
3617                             Copy("\\N{}", d, 4, char);
3618                             d += 4;
3619                         }
3620                         else {
3621                             /* In order to not lose information for the regex
3622                             * compiler, pass the result in the specially made
3623                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3624                             * the code points in hex of each character
3625                             * returned by charnames */
3626
3627                             const char *str_end = str + len;
3628                             const STRLEN off = d - SvPVX_const(sv);
3629
3630                             if (! SvUTF8(res)) {
3631                                 /* For the non-UTF-8 case, we can determine the
3632                                  * exact length needed without having to parse
3633                                  * through the string.  Each character takes up
3634                                  * 2 hex digits plus either a trailing dot or
3635                                  * the "}" */
3636                                 d = off + SvGROW(sv, off
3637                                                     + 3 * len
3638                                                     + 6 /* For the "\N{U+", and
3639                                                            trailing NUL */
3640                                                     + (STRLEN)(send - e));
3641                                 Copy("\\N{U+", d, 5, char);
3642                                 d += 5;
3643                                 while (str < str_end) {
3644                                     char hex_string[4];
3645                                     my_snprintf(hex_string, sizeof(hex_string),
3646                                                 "%02X.", (U8) *str);
3647                                     Copy(hex_string, d, 3, char);
3648                                     d += 3;
3649                                     str++;
3650                                 }
3651                                 d--;    /* We will overwrite below the final
3652                                            dot with a right brace */
3653                             }
3654                             else {
3655                                 STRLEN char_length; /* cur char's byte length */
3656
3657                                 /* and the number of bytes after this is
3658                                  * translated into hex digits */
3659                                 STRLEN output_length;
3660
3661                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3662                                  * for max('U+', '.'); and 1 for NUL */
3663                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3664
3665                                 /* Get the first character of the result. */
3666                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3667                                                         len,
3668                                                         &char_length,
3669                                                         UTF8_ALLOW_ANYUV);
3670                                 /* Convert first code point to hex, including
3671                                  * the boiler plate before it.  For all these,
3672                                  * we convert to native format so that
3673                                  * downstream code can continue to assume the
3674                                  * input is native */
3675                                 output_length =
3676                                     my_snprintf(hex_string, sizeof(hex_string),
3677                                             "\\N{U+%X",
3678                                             (unsigned int) UNI_TO_NATIVE(uv));
3679
3680                                 /* Make sure there is enough space to hold it */
3681                                 d = off + SvGROW(sv, off
3682                                                     + output_length
3683                                                     + (STRLEN)(send - e)
3684                                                     + 2);       /* '}' + NUL */
3685                                 /* And output it */
3686                                 Copy(hex_string, d, output_length, char);
3687                                 d += output_length;
3688
3689                                 /* For each subsequent character, append dot and
3690                                 * its ordinal in hex */
3691                                 while ((str += char_length) < str_end) {
3692                                     const STRLEN off = d - SvPVX_const(sv);
3693                                     U32 uv = utf8n_to_uvuni((U8 *) str,
3694                                                             str_end - str,
3695                                                             &char_length,
3696                                                             UTF8_ALLOW_ANYUV);
3697                                     output_length =
3698                                         my_snprintf(hex_string,
3699                                             sizeof(hex_string),
3700                                             ".%X",
3701                                             (unsigned int) UNI_TO_NATIVE(uv));
3702
3703                                     d = off + SvGROW(sv, off
3704                                                         + output_length
3705                                                         + (STRLEN)(send - e)
3706                                                         + 2);   /* '}' +  NUL */
3707                                     Copy(hex_string, d, output_length, char);
3708                                     d += output_length;
3709                                 }
3710                             }
3711
3712                             *d++ = '}'; /* Done.  Add the trailing brace */
3713                         }
3714                     }
3715                     else { /* Here, not in a pattern.  Convert the name to a
3716                             * string. */
3717
3718                          /* If destination is not in utf8, unconditionally
3719                           * recode it to be so.  This is because \N{} implies
3720                           * Unicode semantics, and scalars have to be in utf8
3721                           * to guarantee those semantics */
3722                         if (! has_utf8) {
3723                             SvCUR_set(sv, d - SvPVX_const(sv));
3724                             SvPOK_on(sv);
3725                             *d = '\0';
3726                             /* See Note on sizing above.  */
3727                             sv_utf8_upgrade_flags_grow(sv,
3728                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3729                                                 len + (STRLEN)(send - s) + 1);
3730                             d = SvPVX(sv) + SvCUR(sv);
3731                             has_utf8 = TRUE;
3732                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3733
3734                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3735                              * set correctly here). */
3736                             const STRLEN off = d - SvPVX_const(sv);
3737                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3738                         }
3739                         Copy(str, d, len, char);
3740                         d += len;
3741                     }
3742
3743                     SvREFCNT_dec(res);
3744
3745                 } /* End \N{NAME} */
3746 #ifdef EBCDIC
3747                 if (!dorange)
3748                     native_range = FALSE; /* \N{} is defined to be Unicode */
3749 #endif
3750                 s = e + 1;  /* Point to just after the '}' */
3751                 continue;
3752
3753             /* \c is a control character */
3754             case 'c':
3755                 s++;
3756                 if (s < send) {
3757                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3758                 }
3759                 else {
3760                     yyerror("Missing control char name in \\c");
3761                 }
3762                 continue;
3763
3764             /* printf-style backslashes, formfeeds, newlines, etc */
3765             case 'b':
3766                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3767                 break;
3768             case 'n':
3769                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3770                 break;
3771             case 'r':
3772                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3773                 break;
3774             case 'f':
3775                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3776                 break;
3777             case 't':
3778                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3779                 break;
3780             case 'e':
3781                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3782                 break;
3783             case 'a':
3784                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3785                 break;
3786             } /* end switch */
3787
3788             s++;
3789             continue;
3790         } /* end if (backslash) */
3791 #ifdef EBCDIC
3792         else
3793             literal_endpoint++;
3794 #endif
3795
3796     default_action:
3797         /* If we started with encoded form, or already know we want it,
3798            then encode the next character */
3799         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3800             STRLEN len  = 1;
3801
3802
3803             /* One might think that it is wasted effort in the case of the
3804              * source being utf8 (this_utf8 == TRUE) to take the next character
3805              * in the source, convert it to an unsigned value, and then convert
3806              * it back again.  But the source has not been validated here.  The
3807              * routine that does the conversion checks for errors like
3808              * malformed utf8 */
3809
3810             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3811             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3812             if (!has_utf8) {
3813                 SvCUR_set(sv, d - SvPVX_const(sv));
3814                 SvPOK_on(sv);
3815                 *d = '\0';
3816                 /* See Note on sizing above.  */
3817                 sv_utf8_upgrade_flags_grow(sv,
3818                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3819                                         need + (STRLEN)(send - s) + 1);
3820                 d = SvPVX(sv) + SvCUR(sv);
3821                 has_utf8 = TRUE;
3822             } else if (need > len) {
3823                 /* encoded value larger than old, may need extra space (NOTE:
3824                  * SvCUR() is not set correctly here).   See Note on sizing
3825                  * above.  */
3826                 const STRLEN off = d - SvPVX_const(sv);
3827                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3828             }
3829             s += len;
3830
3831             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3832 #ifdef EBCDIC
3833             if (uv > 255 && !dorange)
3834                 native_range = FALSE;
3835 #endif
3836         }
3837         else {
3838             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3839         }
3840     } /* while loop to process each character */
3841
3842     /* terminate the string and set up the sv */
3843     *d = '\0';
3844     SvCUR_set(sv, d - SvPVX_const(sv));
3845     if (SvCUR(sv) >= SvLEN(sv))
3846         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3847                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3848
3849     SvPOK_on(sv);
3850     if (PL_encoding && !has_utf8) {
3851         sv_recode_to_utf8(sv, PL_encoding);
3852         if (SvUTF8(sv))
3853             has_utf8 = TRUE;
3854     }
3855     if (has_utf8) {
3856         SvUTF8_on(sv);
3857         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3858             PL_sublex_info.sub_op->op_private |=
3859                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3860         }
3861     }
3862
3863     /* shrink the sv if we allocated more than we used */
3864     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3865         SvPV_shrink_to_cur(sv);
3866     }
3867
3868     /* return the substring (via pl_yylval) only if we parsed anything */
3869     if (s > PL_bufptr) {
3870         SvREFCNT_inc_simple_void_NN(sv);
3871         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3872             && ! PL_parser->lex_re_reparsing)
3873         {
3874             const char *const key = PL_lex_inpat ? "qr" : "q";
3875             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3876             const char *type;
3877             STRLEN typelen;
3878
3879             if (PL_lex_inwhat == OP_TRANS) {
3880                 type = "tr";
3881                 typelen = 2;
3882             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3883                 type = "s";
3884                 typelen = 1;
3885             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3886                 type = "q";
3887                 typelen = 1;
3888             } else  {
3889                 type = "qq";
3890                 typelen = 2;
3891             }
3892
3893             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3894                                 type, typelen);
3895         }
3896         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3897     }
3898     LEAVE_with_name("scan_const");
3899     return s;
3900 }
3901
3902 /* S_intuit_more
3903  * Returns TRUE if there's more to the expression (e.g., a subscript),
3904  * FALSE otherwise.
3905  *
3906  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3907  *
3908  * ->[ and ->{ return TRUE
3909  * { and [ outside a pattern are always subscripts, so return TRUE
3910  * if we're outside a pattern and it's not { or [, then return FALSE
3911  * if we're in a pattern and the first char is a {
3912  *   {4,5} (any digits around the comma) returns FALSE
3913  * if we're in a pattern and the first char is a [
3914  *   [] returns FALSE
3915  *   [SOMETHING] has a funky algorithm to decide whether it's a
3916  *      character class or not.  It has to deal with things like
3917  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3918  * anything else returns TRUE
3919  */
3920
3921 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3922
3923 STATIC int
3924 S_intuit_more(pTHX_ char *s)
3925 {
3926     dVAR;
3927
3928     PERL_ARGS_ASSERT_INTUIT_MORE;
3929
3930     if (PL_lex_brackets)
3931         return TRUE;
3932     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3933         return TRUE;
3934     if (*s != '{' && *s != '[')
3935         return FALSE;
3936     if (!PL_lex_inpat)
3937         return TRUE;
3938
3939     /* In a pattern, so maybe we have {n,m}. */
3940     if (*s == '{') {
3941         if (regcurly(s, FALSE)) {
3942             return FALSE;
3943         }
3944         return TRUE;
3945     }
3946
3947     /* On the other hand, maybe we have a character class */
3948
3949     s++;
3950     if (*s == ']' || *s == '^')
3951         return FALSE;
3952     else {
3953         /* this is terrifying, and it works */
3954         int weight;
3955         char seen[256];
3956         const char * const send = strchr(s,']');
3957         unsigned char un_char, last_un_char;
3958         char tmpbuf[sizeof PL_tokenbuf * 4];
3959
3960         if (!send)              /* has to be an expression */
3961             return TRUE;
3962         weight = 2;             /* let's weigh the evidence */
3963
3964         if (*s == '$')
3965             weight -= 3;
3966         else if (isDIGIT(*s)) {
3967             if (s[1] != ']') {
3968                 if (isDIGIT(s[1]) && s[2] == ']')
3969                     weight -= 10;
3970             }
3971             else
3972                 weight -= 100;
3973         }
3974         Zero(seen,256,char);
3975         un_char = 255;
3976         for (; s < send; s++) {
3977             last_un_char = un_char;
3978             un_char = (unsigned char)*s;
3979             switch (*s) {
3980             case '@':
3981             case '&':
3982             case '$':
3983                 weight -= seen[un_char] * 10;
3984                 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3985                     int len;
3986                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3987                     len = (int)strlen(tmpbuf);
3988                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3989                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3990                         weight -= 100;
3991                     else
3992                         weight -= 10;
3993                 }
3994                 else if (*s == '$' && s[1] &&
3995                   strchr("[#!%*<>()-=",s[1])) {
3996                     if (/*{*/ strchr("])} =",s[2]))
3997                         weight -= 10;
3998                     else
3999                         weight -= 1;
4000                 }
4001                 break;
4002             case '\\':
4003                 un_char = 254;
4004                 if (s[1]) {
4005                     if (strchr("wds]",s[1]))
4006                         weight += 100;
4007                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4008                         weight += 1;
4009                     else if (strchr("rnftbxcav",s[1]))
4010                         weight += 40;
4011                     else if (isDIGIT(s[1])) {
4012                         weight += 40;
4013                         while (s[1] && isDIGIT(s[1]))
4014                             s++;
4015                     }
4016                 }
4017                 else
4018                     weight += 100;
4019                 break;
4020             case '-':
4021                 if (s[1] == '\\')
4022                     weight += 50;
4023                 if (strchr("aA01! ",last_un_char))
4024                     weight += 30;
4025                 if (strchr("zZ79~",s[1]))
4026                     weight += 30;
4027                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4028                     weight -= 5;        /* cope with negative subscript */
4029                 break;
4030             default:
4031                 if (!isWORDCHAR(last_un_char)
4032                     && !(last_un_char == '$' || last_un_char == '@'
4033                          || last_un_char == '&')
4034                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4035                     char *d = tmpbuf;
4036                     while (isALPHA(*s))
4037                         *d++ = *s++;
4038                     *d = '\0';
4039                     if (keyword(tmpbuf, d - tmpbuf, 0))
4040                         weight -= 150;
4041                 }
4042                 if (un_char == last_un_char + 1)
4043                     weight += 5;
4044                 weight -= seen[un_char];
4045                 break;
4046             }
4047             seen[un_char]++;
4048         }
4049         if (weight >= 0)        /* probably a character class */
4050             return FALSE;
4051     }
4052
4053     return TRUE;
4054 }
4055
4056 /*
4057  * S_intuit_method
4058  *
4059  * Does all the checking to disambiguate
4060  *   foo bar
4061  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4062  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4063  *
4064  * First argument is the stuff after the first token, e.g. "bar".
4065  *
4066  * Not a method if foo is a filehandle.
4067  * Not a method if foo is a subroutine prototyped to take a filehandle.
4068  * Not a method if it's really "Foo $bar"
4069  * Method if it's "foo $bar"
4070  * Not a method if it's really "print foo $bar"
4071  * Method if it's really "foo package::" (interpreted as package->foo)
4072  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4073  * Not a method if bar is a filehandle or package, but is quoted with
4074  *   =>
4075  */
4076
4077 STATIC int
4078 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4079 {
4080     dVAR;
4081     char *s = start + (*start == '$');
4082     char tmpbuf[sizeof PL_tokenbuf];
4083     STRLEN len;
4084     GV* indirgv;
4085 #ifdef PERL_MAD
4086     int soff;
4087 #endif
4088
4089     PERL_ARGS_ASSERT_INTUIT_METHOD;
4090
4091     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4092             return 0;
4093     if (cv && SvPOK(cv)) {
4094         const char *proto = CvPROTO(cv);
4095         if (proto) {
4096             while (*proto && (isSPACE(*proto) || *proto == ';'))
4097                 proto++;
4098             if (*proto == '*')
4099                 return 0;
4100         }
4101     }
4102
4103     if (*start == '$') {
4104         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4105                 isUPPER(*PL_tokenbuf))
4106             return 0;
4107 #ifdef PERL_MAD
4108         len = start - SvPVX(PL_linestr);
4109 #endif
4110         s = PEEKSPACE(s);
4111 #ifdef PERL_MAD
4112         start = SvPVX(PL_linestr) + len;
4113 #endif
4114         PL_bufptr = start;
4115         PL_expect = XREF;
4116         return *s == '(' ? FUNCMETH : METHOD;
4117     }
4118
4119     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4120     /* start is the beginning of the possible filehandle/object,
4121      * and s is the end of it
4122      * tmpbuf is a copy of it (but with single quotes as double colons)
4123      */
4124
4125     if (!keyword(tmpbuf, len, 0)) {
4126         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4127             len -= 2;
4128             tmpbuf[len] = '\0';
4129 #ifdef PERL_MAD
4130             soff = s - SvPVX(PL_linestr);
4131 #endif
4132             goto bare_package;
4133         }
4134         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4135         if (indirgv && GvCVu(indirgv))
4136             return 0;
4137         /* filehandle or package name makes it a method */
4138         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4139 #ifdef PERL_MAD
4140             soff = s - SvPVX(PL_linestr);
4141 #endif
4142             s = PEEKSPACE(s);
4143             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4144                 return 0;       /* no assumptions -- "=>" quotes bareword */
4145       bare_package:
4146             start_force(PL_curforce);
4147             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4148                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4149             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4150             if (PL_madskills)
4151                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4152                                                             ( UTF ? SVf_UTF8 : 0 )));
4153             PL_expect = XTERM;
4154             force_next(WORD);
4155             PL_bufptr = s;
4156 #ifdef PERL_MAD
4157             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4158 #endif
4159             return *s == '(' ? FUNCMETH : METHOD;
4160         }
4161     }
4162     return 0;
4163 }
4164
4165 /* Encoded script support. filter_add() effectively inserts a
4166  * 'pre-processing' function into the current source input stream.
4167  * Note that the filter function only applies to the current source file
4168  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4169  *
4170  * The datasv parameter (which may be NULL) can be used to pass
4171  * private data to this instance of the filter. The filter function
4172  * can recover the SV using the FILTER_DATA macro and use it to
4173  * store private buffers and state information.
4174  *
4175  * The supplied datasv parameter is upgraded to a PVIO type
4176  * and the IoDIRP/IoANY field is used to store the function pointer,
4177  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4178  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4179  * private use must be set using malloc'd pointers.
4180  */
4181
4182 SV *
4183 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4184 {
4185     dVAR;
4186     if (!funcp)
4187         return NULL;
4188
4189     if (!PL_parser)
4190         return NULL;
4191
4192     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4193         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4194
4195     if (!PL_rsfp_filters)
4196         PL_rsfp_filters = newAV();
4197     if (!datasv)
4198         datasv = newSV(0);
4199     SvUPGRADE(datasv, SVt_PVIO);
4200     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4201     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4202     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4203                           FPTR2DPTR(void *, IoANY(datasv)),
4204                           SvPV_nolen(datasv)));
4205     av_unshift(PL_rsfp_filters, 1);
4206     av_store(PL_rsfp_filters, 0, datasv) ;
4207     if (
4208         !PL_parser->filtered
4209      && PL_parser->lex_flags & LEX_EVALBYTES
4210      && PL_bufptr < PL_bufend
4211     ) {
4212         const char *s = PL_bufptr;
4213         while (s < PL_bufend) {
4214             if (*s == '\n') {
4215                 SV *linestr = PL_parser->linestr;
4216                 char *buf = SvPVX(linestr);
4217                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4218                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4219                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4220                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4221                 STRLEN const last_uni_pos =
4222                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4223                 STRLEN const last_lop_pos =
4224                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4225                 av_push(PL_rsfp_filters, linestr);
4226                 PL_parser->linestr =
4227                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4228                 buf = SvPVX(PL_parser->linestr);
4229                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4230                 PL_parser->bufptr = buf + bufptr_pos;
4231                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4232                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4233                 PL_parser->linestart = buf + linestart_pos;
4234                 if (PL_parser->last_uni)
4235                     PL_parser->last_uni = buf + last_uni_pos;
4236                 if (PL_parser->last_lop)
4237                     PL_parser->last_lop = buf + last_lop_pos;
4238                 SvLEN(linestr) = SvCUR(linestr);
4239                 SvCUR(linestr) = s-SvPVX(linestr);
4240                 PL_parser->filtered = 1;
4241                 break;
4242             }
4243             s++;
4244         }
4245     }
4246     return(datasv);
4247 }
4248
4249
4250 /* Delete most recently added instance of this filter function. */
4251 void
4252 Perl_filter_del(pTHX_ filter_t funcp)
4253 {
4254     dVAR;
4255     SV *datasv;
4256
4257     PERL_ARGS_ASSERT_FILTER_DEL;
4258
4259 #ifdef DEBUGGING
4260     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4261                           FPTR2DPTR(void*, funcp)));
4262 #endif
4263     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4264         return;
4265     /* if filter is on top of stack (usual case) just pop it off */
4266     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4267     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4268         sv_free(av_pop(PL_rsfp_filters));
4269
4270         return;
4271     }
4272     /* we need to search for the correct entry and clear it     */
4273     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4274 }
4275
4276
4277 /* Invoke the idxth filter function for the current rsfp.        */
4278 /* maxlen 0 = read one text line */
4279 I32
4280 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4281 {
4282     dVAR;
4283     filter_t funcp;
4284     SV *datasv = NULL;
4285     /* This API is bad. It should have been using unsigned int for maxlen.
4286        Not sure if we want to change the API, but if not we should sanity
4287        check the value here.  */
4288     unsigned int correct_length
4289         = maxlen < 0 ?
4290 #ifdef PERL_MICRO
4291         0x7FFFFFFF
4292 #else
4293         INT_MAX
4294 #endif
4295         : maxlen;
4296
4297     PERL_ARGS_ASSERT_FILTER_READ;
4298
4299     if (!PL_parser || !PL_rsfp_filters)
4300         return -1;
4301     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4302         /* Provide a default input filter to make life easy.    */
4303         /* Note that we append to the line. This is handy.      */
4304         DEBUG_P(PerlIO_printf(Perl_debug_log,
4305                               "filter_read %d: from rsfp\n", idx));
4306         if (correct_length) {
4307             /* Want a block */
4308             int len ;
4309             const int old_len = SvCUR(buf_sv);
4310
4311             /* ensure buf_sv is large enough */
4312             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4313             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4314                                    correct_length)) <= 0) {
4315                 if (PerlIO_error(PL_rsfp))
4316                     return -1;          /* error */
4317                 else
4318                     return 0 ;          /* end of file */
4319             }
4320             SvCUR_set(buf_sv, old_len + len) ;
4321             SvPVX(buf_sv)[old_len + len] = '\0';
4322         } else {
4323             /* Want a line */
4324             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4325                 if (PerlIO_error(PL_rsfp))
4326                     return -1;          /* error */
4327                 else
4328                     return 0 ;          /* end of file */
4329             }
4330         }
4331         return SvCUR(buf_sv);
4332     }
4333     /* Skip this filter slot if filter has been deleted */
4334     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4335         DEBUG_P(PerlIO_printf(Perl_debug_log,
4336                               "filter_read %d: skipped (filter deleted)\n",
4337                               idx));
4338         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4339     }
4340     if (SvTYPE(datasv) != SVt_PVIO) {
4341         if (correct_length) {
4342             /* Want a block */
4343             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4344             if (!remainder) return 0; /* eof */
4345             if (correct_length > remainder) correct_length = remainder;
4346             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4347             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4348         } else {
4349             /* Want a line */
4350             const char *s = SvEND(datasv);
4351             const char *send = SvPVX(datasv) + SvLEN(datasv);
4352             while (s < send) {
4353                 if (*s == '\n') {
4354                     s++;
4355                     break;
4356                 }
4357                 s++;
4358             }
4359             if (s == send) return 0; /* eof */
4360             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4361             SvCUR_set(datasv, s-SvPVX(datasv));
4362         }
4363         return SvCUR(buf_sv);
4364     }
4365     /* Get function pointer hidden within datasv        */
4366     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4367     DEBUG_P(PerlIO_printf(Perl_debug_log,
4368                           "filter_read %d: via function %p (%s)\n",
4369                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4370     /* Call function. The function is expected to       */
4371     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4372     /* Return: <0:error, =0:eof, >0:not eof             */
4373     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4374 }
4375
4376 STATIC char *
4377 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4378 {
4379     dVAR;
4380
4381     PERL_ARGS_ASSERT_FILTER_GETS;
4382
4383 #ifdef PERL_CR_FILTER
4384     if (!PL_rsfp_filters) {
4385         filter_add(S_cr_textfilter,NULL);
4386     }
4387 #endif
4388     if (PL_rsfp_filters) {
4389         if (!append)
4390             SvCUR_set(sv, 0);   /* start with empty line        */
4391         if (FILTER_READ(0, sv, 0) > 0)
4392             return ( SvPVX(sv) ) ;
4393         else
4394             return NULL ;
4395     }
4396     else
4397         return (sv_gets(sv, PL_rsfp, append));
4398 }
4399
4400 STATIC HV *
4401 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4402 {
4403     dVAR;
4404     GV *gv;
4405
4406     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4407
4408     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4409         return PL_curstash;
4410
4411     if (len > 2 &&
4412         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4413         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4414     {
4415         return GvHV(gv);                        /* Foo:: */
4416     }
4417
4418     /* use constant CLASS => 'MyClass' */
4419     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4420     if (gv && GvCV(gv)) {
4421         SV * const sv = cv_const_sv(GvCV(gv));
4422         if (sv)
4423             pkgname = SvPV_const(sv, len);
4424     }
4425
4426     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4427 }
4428
4429 /*
4430  * S_readpipe_override
4431  * Check whether readpipe() is overridden, and generates the appropriate
4432  * optree, provided sublex_start() is called afterwards.
4433  */
4434 STATIC void
4435 S_readpipe_override(pTHX)
4436 {
4437     GV **gvp;
4438     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4439     pl_yylval.ival = OP_BACKTICK;
4440     if ((gv_readpipe
4441                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4442             ||
4443             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4444              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4445              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4446     {
4447         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4448             op_append_elem(OP_LIST,
4449                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4450                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4451     }
4452 }
4453
4454 #ifdef PERL_MAD
4455  /*
4456  * Perl_madlex
4457  * The intent of this yylex wrapper is to minimize the changes to the
4458  * tokener when we aren't interested in collecting madprops.  It remains
4459  * to be seen how successful this strategy will be...
4460  */
4461
4462 int
4463 Perl_madlex(pTHX)
4464 {
4465     int optype;
4466     char *s = PL_bufptr;
4467
4468     /* make sure PL_thiswhite is initialized */
4469     PL_thiswhite = 0;
4470     PL_thismad = 0;
4471
4472     /* previous token ate up our whitespace? */
4473     if (!PL_lasttoke && PL_nextwhite) {
4474         PL_thiswhite = PL_nextwhite;
4475         PL_nextwhite = 0;
4476     }
4477
4478     /* isolate the token, and figure out where it is without whitespace */
4479     PL_realtokenstart = -1;
4480     PL_thistoken = 0;
4481     optype = yylex();
4482     s = PL_bufptr;
4483     assert(PL_curforce < 0);
4484
4485     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4486         if (!PL_thistoken) {
4487             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4488                 PL_thistoken = newSVpvs("");
4489             else {
4490                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4491                 PL_thistoken = newSVpvn(tstart, s - tstart);
4492             }
4493         }
4494         if (PL_thismad) /* install head */
4495             CURMAD('X', PL_thistoken);
4496     }
4497
4498     /* last whitespace of a sublex? */
4499     if (optype == ')' && PL_endwhite) {
4500         CURMAD('X', PL_endwhite);
4501     }
4502
4503     if (!PL_thismad) {
4504
4505         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4506         if (!PL_thiswhite && !PL_endwhite && !optype) {
4507             sv_free(PL_thistoken);
4508             PL_thistoken = 0;
4509             return 0;
4510         }
4511
4512         /* put off final whitespace till peg */
4513         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4514             PL_nextwhite = PL_thiswhite;
4515             PL_thiswhite = 0;
4516         }
4517         else if (PL_thisopen) {
4518             CURMAD('q', PL_thisopen);
4519             if (PL_thistoken)
4520                 sv_free(PL_thistoken);
4521             PL_thistoken = 0;
4522         }
4523         else {
4524             /* Store actual token text as madprop X */
4525             CURMAD('X', PL_thistoken);
4526         }
4527
4528         if (PL_thiswhite) {
4529             /* add preceding whitespace as madprop _ */
4530             CURMAD('_', PL_thiswhite);
4531         }
4532
4533         if (PL_thisstuff) {
4534             /* add quoted material as madprop = */
4535             CURMAD('=', PL_thisstuff);
4536         }
4537
4538         if (PL_thisclose) {
4539             /* add terminating quote as madprop Q */
4540             CURMAD('Q', PL_thisclose);
4541         }
4542     }
4543
4544     /* special processing based on optype */
4545
4546     switch (optype) {
4547
4548     /* opval doesn't need a TOKEN since it can already store mp */
4549     case WORD:
4550     case METHOD:
4551     case FUNCMETH:
4552     case THING:
4553     case PMFUNC:
4554     case PRIVATEREF:
4555     case FUNC0SUB:
4556     case UNIOPSUB:
4557     case LSTOPSUB:
4558         if (pl_yylval.opval)
4559             append_madprops(PL_thismad, pl_yylval.opval, 0);
4560         PL_thismad = 0;
4561         return optype;
4562
4563     /* fake EOF */
4564     case 0:
4565         optype = PEG;
4566         if (PL_endwhite) {
4567             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4568             PL_endwhite = 0;
4569         }
4570         break;
4571
4572     /* pval */
4573     case LABEL:
4574         break;
4575
4576     case ']':
4577     case '}':
4578         if (PL_faketokens)
4579             break;
4580         /* remember any fake bracket that lexer is about to discard */
4581         if (PL_lex_brackets == 1 &&
4582             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4583         {
4584             s = PL_bufptr;
4585             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4586                 s++;
4587             if (*s == '}') {
4588                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4589                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4590                 PL_thiswhite = 0;
4591                 PL_bufptr = s - 1;
4592                 break;  /* don't bother looking for trailing comment */
4593             }
4594             else
4595                 s = PL_bufptr;
4596         }
4597         if (optype == ']')
4598             break;
4599         /* FALLTHROUGH */
4600
4601     /* attach a trailing comment to its statement instead of next token */
4602     case ';':
4603         if (PL_faketokens)
4604             break;
4605         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4606             s = PL_bufptr;
4607             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4608                 s++;
4609             if (*s == '\n' || *s == '#') {
4610                 while (s < PL_bufend && *s != '\n')
4611                     s++;
4612                 if (s < PL_bufend)
4613                     s++;
4614                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4615                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4616                 PL_thiswhite = 0;
4617                 PL_bufptr = s;
4618             }
4619         }
4620         break;
4621
4622     /* ival */
4623     default:
4624         break;
4625
4626     }
4627
4628     /* Create new token struct.  Note: opvals return early above. */
4629     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4630     PL_thismad = 0;
4631     return optype;
4632 }
4633 #endif
4634
4635 STATIC char *
4636 S_tokenize_use(pTHX_ int is_use, char *s) {
4637     dVAR;
4638
4639     PERL_ARGS_ASSERT_TOKENIZE_USE;
4640
4641     if (PL_expect != XSTATE)
4642         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4643                     is_use ? "use" : "no"));
4644     PL_expect = XTERM;
4645     s = SKIPSPACE1(s);
4646     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4647         s = force_version(s, TRUE);
4648         if (*s == ';' || *s == '}'
4649                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4650             start_force(PL_curforce);
4651             NEXTVAL_NEXTTOKE.opval = NULL;
4652             force_next(WORD);
4653         }
4654         else if (*s == 'v') {
4655             s = force_word(s,WORD,FALSE,TRUE);
4656             s = force_version(s, FALSE);
4657         }
4658     }
4659     else {
4660         s = force_word(s,WORD,FALSE,TRUE);
4661         s = force_version(s, FALSE);
4662     }
4663     pl_yylval.ival = is_use;
4664     return s;
4665 }
4666 #ifdef DEBUGGING
4667     static const char* const exp_name[] =
4668         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4669           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4670         };
4671 #endif
4672
4673 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4674 STATIC bool
4675 S_word_takes_any_delimeter(char *p, STRLEN len)
4676 {
4677     return (len == 1 && strchr("msyq", p[0])) ||
4678            (len == 2 && (
4679             (p[0] == 't' && p[1] == 'r') ||
4680             (p[0] == 'q' && strchr("qwxr", p[1]))));
4681 }
4682
4683 /*
4684   yylex
4685
4686   Works out what to call the token just pulled out of the input
4687   stream.  The yacc parser takes care of taking the ops we return and
4688   stitching them into a tree.
4689
4690   Returns:
4691     The type of the next token
4692
4693   Structure:
4694       Switch based on the current state:
4695           - if we already built the token before, use it
4696           - if we have a case modifier in a string, deal with that
4697           - handle other cases of interpolation inside a string
4698           - scan the next line if we are inside a format
4699       In the normal state switch on the next character:
4700           - default:
4701             if alphabetic, go to key lookup
4702             unrecoginized character - croak
4703           - 0/4/26: handle end-of-line or EOF
4704           - cases for whitespace
4705           - \n and #: handle comments and line numbers
4706           - various operators, brackets and sigils
4707           - numbers
4708           - quotes
4709           - 'v': vstrings (or go to key lookup)
4710           - 'x' repetition operator (or go to key lookup)
4711           - other ASCII alphanumerics (key lookup begins here):
4712               word before => ?
4713               keyword plugin
4714               scan built-in keyword (but do nothing with it yet)
4715               check for statement label
4716               check for lexical subs
4717                   goto just_a_word if there is one
4718               see whether built-in keyword is overridden
4719               switch on keyword number:
4720                   - default: just_a_word:
4721                       not a built-in keyword; handle bareword lookup
4722                       disambiguate between method and sub call
4723                       fall back to bareword
4724                   - cases for built-in keywords
4725 */
4726
4727
4728 #ifdef __SC__
4729 #pragma segment Perl_yylex
4730 #endif
4731 int
4732 Perl_yylex(pTHX)
4733 {
4734     dVAR;
4735     char *s = PL_bufptr;
4736     char *d;
4737     STRLEN len;
4738     bool bof = FALSE;
4739     const bool saw_infix_sigil = PL_parser->saw_infix_sigil;
4740     U8 formbrack = 0;
4741     U32 fake_eof = 0;
4742
4743     /* orig_keyword, gvp, and gv are initialized here because
4744      * jump to the label just_a_word_zero can bypass their
4745      * initialization later. */
4746     I32 orig_keyword = 0;
4747     GV *gv = NULL;
4748     GV **gvp = NULL;
4749
4750     DEBUG_T( {
4751         SV* tmp = newSVpvs("");
4752         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4753             (IV)CopLINE(PL_curcop),
4754             lex_state_names[PL_lex_state],
4755             exp_name[PL_expect],
4756             pv_display(tmp, s, strlen(s), 0, 60));
4757         SvREFCNT_dec(tmp);
4758     } );
4759
4760     switch (PL_lex_state) {
4761 #ifdef COMMENTARY
4762     case LEX_NORMAL:            /* Some compilers will produce faster */
4763     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4764         break;
4765 #endif
4766
4767     /* when we've already built the next token, just pull it out of the queue */
4768     case LEX_KNOWNEXT:
4769 #ifdef PERL_MAD
4770         PL_lasttoke--;
4771         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4772         if (PL_madskills) {
4773             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4774             PL_nexttoke[PL_lasttoke].next_mad = 0;
4775             if (PL_thismad && PL_thismad->mad_key == '_') {
4776                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4777                 PL_thismad->mad_val = 0;
4778                 mad_free(PL_thismad);
4779                 PL_thismad = 0;
4780             }
4781         }
4782         if (!PL_lasttoke) {
4783             PL_lex_state = PL_lex_defer;
4784             PL_expect = PL_lex_expect;
4785             PL_lex_defer = LEX_NORMAL;
4786             if (!PL_nexttoke[PL_lasttoke].next_type)
4787                 return yylex();
4788         }
4789 #else
4790         PL_nexttoke--;
4791         pl_yylval = PL_nextval[PL_nexttoke];
4792         if (!PL_nexttoke) {
4793             PL_lex_state = PL_lex_defer;
4794             PL_expect = PL_lex_expect;
4795             PL_lex_defer = LEX_NORMAL;
4796         }
4797 #endif
4798         {
4799             I32 next_type;
4800 #ifdef PERL_MAD
4801             next_type = PL_nexttoke[PL_lasttoke].next_type;
4802 #else
4803             next_type = PL_nexttype[PL_nexttoke];
4804 #endif
4805             if (next_type & (7<<24)) {
4806                 if (next_type & (1<<24)) {
4807                     if (PL_lex_brackets > 100)
4808                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4809                     PL_lex_brackstack[PL_lex_brackets++] =
4810                         (char) ((next_type >> 16) & 0xff);
4811                 }
4812                 if (next_type & (2<<24))
4813                     PL_lex_allbrackets++;
4814                 if (next_type & (4<<24))
4815                     PL_lex_allbrackets--;
4816                 next_type &= 0xffff;
4817             }
4818             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4819         }
4820
4821     /* interpolated case modifiers like \L \U, including \Q and \E.
4822        when we get here, PL_bufptr is at the \
4823     */
4824     case LEX_INTERPCASEMOD:
4825 #ifdef DEBUGGING
4826         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4827             Perl_croak(aTHX_
4828                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4829                        PL_bufptr, PL_bufend, *PL_bufptr);
4830 #endif
4831         /* handle \E or end of string */
4832         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4833             /* if at a \E */
4834             if (PL_lex_casemods) {
4835                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4836                 PL_lex_casestack[PL_lex_casemods] = '\0';
4837
4838                 if (PL_bufptr != PL_bufend
4839                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4840                         || oldmod == 'F')) {
4841                     PL_bufptr += 2;
4842                     PL_lex_state = LEX_INTERPCONCAT;
4843 #ifdef PERL_MAD
4844                     if (PL_madskills)
4845                         PL_thistoken = newSVpvs("\\E");
4846 #endif
4847                 }
4848                 PL_lex_allbrackets--;
4849                 return REPORT(')');
4850             }
4851             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4852                /* Got an unpaired \E */
4853                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4854                         "Useless use of \\E");
4855             }
4856 #ifdef PERL_MAD
4857             while (PL_bufptr != PL_bufend &&
4858               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4859                 if (PL_madskills) {
4860                   if (!PL_thiswhite)
4861                     PL_thiswhite = newSVpvs("");
4862                   sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4863                 }
4864                 PL_bufptr += 2;
4865             }
4866 #else
4867             if (PL_bufptr != PL_bufend)
4868                 PL_bufptr += 2;
4869 #endif
4870             PL_lex_state = LEX_INTERPCONCAT;
4871             return yylex();
4872         }
4873         else {
4874             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4875               "### Saw case modifier\n"); });
4876             s = PL_bufptr + 1;
4877             if (s[1] == '\\' && s[2] == 'E') {
4878 #ifdef PERL_MAD
4879                 if (PL_madskills) {
4880                   if (!PL_thiswhite)
4881                     PL_thiswhite = newSVpvs("");
4882                   sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4883                 }
4884 #endif
4885                 PL_bufptr = s + 3;
4886                 PL_lex_state = LEX_INTERPCONCAT;
4887                 return yylex();
4888             }
4889             else {
4890                 I32 tmp;
4891                 if (!PL_madskills) /* when just compiling don't need correct */
4892                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4893                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4894                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4895                     (strchr(PL_lex_casestack, 'L')
4896                         || strchr(PL_lex_casestack, 'U')
4897                         || strchr(PL_lex_casestack, 'F'))) {
4898                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4899                     PL_lex_allbrackets--;
4900                     return REPORT(')');
4901                 }
4902                 if (PL_lex_casemods > 10)
4903                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4904                 PL_lex_casestack[PL_lex_casemods++] = *s;
4905                 PL_lex_casestack[PL_lex_casemods] = '\0';
4906                 PL_lex_state = LEX_INTERPCONCAT;
4907                 start_force(PL_curforce);
4908                 NEXTVAL_NEXTTOKE.ival = 0;
4909                 force_next((2<<24)|'(');
4910                 start_force(PL_curforce);
4911                 if (*s == 'l')
4912                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4913                 else if (*s == 'u')
4914                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4915                 else if (*s == 'L')
4916                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4917                 else if (*s == 'U')
4918                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4919                 else if (*s == 'Q')
4920                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4921                 else if (*s == 'F')
4922                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4923                 else
4924                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4925                 if (PL_madskills) {
4926                     SV* const tmpsv = newSVpvs("\\ ");
4927                     /* replace the space with the character we want to escape
4928                      */
4929                     SvPVX(tmpsv)[1] = *s;
4930                     curmad('_', tmpsv);
4931                 }
4932                 PL_bufptr = s + 1;
4933             }
4934             force_next(FUNC);
4935             if (PL_lex_starts) {
4936                 s = PL_bufptr;
4937                 PL_lex_starts = 0;
4938 #ifdef PERL_MAD
4939                 if (PL_madskills) {
4940                     if (PL_thistoken)
4941                         sv_free(PL_thistoken);
4942                     PL_thistoken = newSVpvs("");
4943                 }
4944 #endif
4945                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4946                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4947                     OPERATOR(',');
4948                 else
4949                     Aop(OP_CONCAT);
4950             }
4951             else
4952                 return yylex();
4953         }
4954
4955     case LEX_INTERPPUSH:
4956         return REPORT(sublex_push());
4957
4958     case LEX_INTERPSTART:
4959         if (PL_bufptr == PL_bufend)
4960             return REPORT(sublex_done());
4961         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4962               "### Interpolated variable\n"); });
4963         PL_expect = XTERM;
4964         /* for /@a/, we leave the joining for the regex engine to do
4965          * (unless we're within \Q etc) */
4966         PL_lex_dojoin = (*PL_bufptr == '@'
4967                             && (!PL_lex_inpat || PL_lex_casemods));
4968         PL_lex_state = LEX_INTERPNORMAL;
4969         if (PL_lex_dojoin) {
4970             start_force(PL_curforce);
4971             NEXTVAL_NEXTTOKE.ival = 0;
4972             force_next(',');
4973             start_force(PL_curforce);
4974             force_ident("\"", '$');
4975             start_force(PL_curforce);
4976             NEXTVAL_NEXTTOKE.ival = 0;
4977             force_next('$');
4978             start_force(PL_curforce);
4979             NEXTVAL_NEXTTOKE.ival = 0;
4980             force_next((2<<24)|'(');
4981             start_force(PL_curforce);
4982             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4983             force_next(FUNC);
4984         }
4985         /* Convert (?{...}) and friends to 'do {...}' */
4986         if (PL_lex_inpat && *PL_bufptr == '(') {
4987             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4988             PL_bufptr += 2;
4989             if (*PL_bufptr != '{')
4990                 PL_bufptr++;
4991             start_force(PL_curforce);
4992             /* XXX probably need a CURMAD(something) here */
4993             PL_expect = XTERMBLOCK;
4994             force_next(DO);
4995         }
4996
4997         if (PL_lex_starts++) {
4998             s = PL_bufptr;
4999 #ifdef PERL_MAD
5000             if (PL_madskills) {
5001                 if (PL_thistoken)
5002                     sv_free(PL_thistoken);
5003                 PL_thistoken = newSVpvs("");
5004             }
5005 #endif
5006             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5007             if (!PL_lex_casemods && PL_lex_inpat)
5008                 OPERATOR(',');
5009             else
5010                 Aop(OP_CONCAT);
5011         }
5012         return yylex();
5013
5014     case LEX_INTERPENDMAYBE:
5015         if (intuit_more(PL_bufptr)) {
5016             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
5017             break;
5018         }
5019         /* FALL THROUGH */
5020
5021     case LEX_INTERPEND:
5022         if (PL_lex_dojoin) {
5023             PL_lex_dojoin = FALSE;
5024             PL_lex_state = LEX_INTERPCONCAT;
5025 #ifdef PERL_MAD
5026             if (PL_madskills) {
5027                 if (PL_thistoken)
5028                     sv_free(PL_thistoken);
5029                 PL_thistoken = newSVpvs("");
5030             }
5031 #endif
5032             PL_lex_allbrackets--;
5033             return REPORT(')');
5034         }
5035         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5036             && SvEVALED(PL_lex_repl))
5037         {
5038             if (PL_bufptr != PL_bufend)
5039                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5040             PL_lex_repl = NULL;
5041         }
5042         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
5043            re_eval_str.  If the here-doc body’s length equals the previous
5044            value of re_eval_start, re_eval_start will now be null.  So
5045            check re_eval_str as well. */
5046         if (PL_parser->lex_shared->re_eval_start
5047          || PL_parser->lex_shared->re_eval_str) {
5048             SV *sv;
5049             if (*PL_bufptr != ')')
5050                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5051             PL_bufptr++;
5052             /* having compiled a (?{..}) expression, return the original
5053              * text too, as a const */
5054             if (PL_parser->lex_shared->re_eval_str) {
5055                 sv = PL_parser->lex_shared->re_eval_str;
5056                 PL_parser->lex_shared->re_eval_str = NULL;
5057                 SvCUR_set(sv,
5058                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5059                 SvPV_shrink_to_cur(sv);
5060             }
5061             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5062                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5063             start_force(PL_curforce);
5064             /* XXX probably need a CURMAD(something) here */
5065             NEXTVAL_NEXTTOKE.opval =
5066                     (OP*)newSVOP(OP_CONST, 0,
5067                                  sv);
5068             force_next(THING);
5069             PL_parser->lex_shared->re_eval_start = NULL;
5070             PL_expect = XTERM;
5071             return REPORT(',');
5072         }
5073
5074         /* FALLTHROUGH */
5075     case LEX_INTERPCONCAT:
5076 #ifdef DEBUGGING
5077         if (PL_lex_brackets)
5078             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5079                        (long) PL_lex_brackets);
5080 #endif
5081         if (PL_bufptr == PL_bufend)
5082             return REPORT(sublex_done());
5083
5084         /* m'foo' still needs to be parsed for possible (?{...}) */
5085         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5086             SV *sv = newSVsv(PL_linestr);
5087             sv = tokeq(sv);
5088             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5089             s = PL_bufend;
5090         }
5091         else {
5092             s = scan_const(PL_bufptr);
5093             if (*s == '\\')
5094                 PL_lex_state = LEX_INTERPCASEMOD;
5095             else
5096                 PL_lex_state = LEX_INTERPSTART;
5097         }
5098
5099         if (s != PL_bufptr) {
5100             start_force(PL_curforce);
5101             if (PL_madskills) {
5102                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5103             }
5104             NEXTVAL_NEXTTOKE = pl_yylval;
5105             PL_expect = XTERM;
5106             force_next(THING);
5107             if (PL_lex_starts++) {
5108 #ifdef PERL_MAD
5109                 if (PL_madskills) {
5110                     if (PL_thistoken)
5111                         sv_free(PL_thistoken);
5112                     PL_thistoken = newSVpvs("");
5113                 }
5114 #endif
5115                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5116                 if (!PL_lex_casemods && PL_lex_inpat)
5117                     OPERATOR(',');
5118                 else
5119                     Aop(OP_CONCAT);
5120             }
5121             else {
5122                 PL_bufptr = s;
5123                 return yylex();
5124             }
5125         }
5126
5127         return yylex();
5128     case LEX_FORMLINE:
5129         s = scan_formline(PL_bufptr);
5130         if (!PL_lex_formbrack)
5131         {
5132             formbrack = 1;
5133             goto rightbracket;
5134         }
5135         PL_bufptr = s;
5136         return yylex();
5137     }
5138
5139     s = PL_bufptr;
5140     PL_oldoldbufptr = PL_oldbufptr;
5141     PL_oldbufptr = s;
5142     PL_parser->saw_infix_sigil = 0;
5143
5144   retry:
5145 #ifdef PERL_MAD
5146     if (PL_thistoken) {
5147         sv_free(PL_thistoken);
5148         PL_thistoken = 0;
5149     }
5150     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
5151 #endif
5152     switch (*s) {
5153     default:
5154         if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5155             goto keylookup;
5156         {
5157         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5158         const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
5159                                                     UTF8SKIP(s),
5160                                                     SVs_TEMP | SVf_UTF8),
5161                                             10, UNI_DISPLAY_ISPRINT))
5162                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5163         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5164         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5165             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5166         } else {
5167             d = PL_linestart;
5168         }
5169         *s = '\0';
5170         sv_setpv(dsv, d);
5171         if (UTF)
5172             SvUTF8_on(dsv);
5173         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
5174     }
5175     case 4:
5176     case 26:
5177         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
5178     case 0:
5179 #ifdef PERL_MAD
5180         if (PL_madskills)
5181             PL_faketokens = 0;
5182 #endif
5183         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5184             PL_last_uni = 0;
5185             PL_last_lop = 0;
5186             if (PL_lex_brackets &&
5187                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5188                 yyerror((const char *)
5189                         (PL_lex_formbrack
5190                          ? "Format not terminated"
5191                          : "Missing right curly or square bracket"));
5192             }
5193             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5194                         "### Tokener got EOF\n");
5195             } );
5196             TOKEN(0);
5197         }
5198         if (s++ < PL_bufend)
5199             goto retry;                 /* ignore stray nulls */
5200         PL_last_uni = 0;
5201         PL_last_lop = 0;
5202         if (!PL_in_eval && !PL_preambled) {
5203             PL_preambled = TRUE;
5204 #ifdef PERL_MAD
5205             if (PL_madskills)
5206                 PL_faketokens = 1;
5207 #endif
5208             if (PL_perldb) {
5209                 /* Generate a string of Perl code to load the debugger.
5210                  * If PERL5DB is set, it will return the contents of that,
5211                  * otherwise a compile-time require of perl5db.pl.  */
5212
5213                 const char * const pdb = PerlEnv_getenv("PERL5DB");
5214
5215                 if (pdb) {
5216                     sv_setpv(PL_linestr, pdb);
5217                     sv_catpvs(PL_linestr,";");
5218                 } else {
5219                     SETERRNO(0,SS_NORMAL);
5220                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5221                 }
5222             } else
5223                 sv_setpvs(PL_linestr,"");
5224             if (PL_preambleav) {
5225                 SV **svp = AvARRAY(PL_preambleav);
5226                 SV **const end = svp + AvFILLp(PL_preambleav);
5227                 while(svp <= end) {
5228                     sv_catsv(PL_linestr, *svp);
5229                     ++svp;
5230                     sv_catpvs(PL_linestr, ";");
5231                 }
5232                 sv_free(MUTABLE_SV(PL_preambleav));
5233                 PL_preambleav = NULL;
5234             }
5235             if (PL_minus_E)
5236                 sv_catpvs(PL_linestr,
5237                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5238             if (PL_minus_n || PL_minus_p) {
5239                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5240                 if (PL_minus_l)
5241                     sv_catpvs(PL_linestr,"chomp;");
5242                 if (PL_minus_a) {
5243                     if (PL_minus_F) {
5244                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5245                              || *PL_splitstr == '"')
5246                               && strchr(PL_splitstr + 1, *PL_splitstr))
5247                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5248                         else {
5249                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5250                                bytes can be used as quoting characters.  :-) */
5251                             const char *splits = PL_splitstr;
5252                             sv_catpvs(PL_linestr, "our @F=split(q\0");
5253                             do {
5254                                 /* Need to \ \s  */
5255                                 if (*splits == '\\')
5256                                     sv_catpvn(PL_linestr, splits, 1);
5257                                 sv_catpvn(PL_linestr, splits, 1);
5258                             } while (*splits++);
5259                             /* This loop will embed the trailing NUL of
5260                                PL_linestr as the last thing it does before
5261                                terminating.  */
5262                             sv_catpvs(PL_linestr, ");");
5263                         }
5264                     }
5265                     else
5266                         sv_catpvs(PL_linestr,"our @F=split(' ');");
5267                 }
5268             }
5269             sv_catpvs(PL_linestr, "\n");
5270             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5271             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5272             PL_last_lop = PL_last_uni = NULL;
5273             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5274                 update_debugger_info(PL_linestr, NULL, 0);
5275             goto retry;
5276         }
5277         do {
5278             fake_eof = 0;
5279             bof = PL_rsfp ? TRUE : FALSE;
5280             if (0) {
5281               fake_eof:
5282                 fake_eof = LEX_FAKE_EOF;
5283             }
5284             PL_bufptr = PL_bufend;
5285             COPLINE_INC_WITH_HERELINES;
5286             if (!lex_next_chunk(fake_eof)) {
5287                 CopLINE_dec(PL_curcop);
5288                 s = PL_bufptr;
5289                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5290             }
5291             CopLINE_dec(PL_curcop);
5292 #ifdef PERL_MAD
5293             if (!PL_rsfp)
5294                 PL_realtokenstart = -1;
5295 #endif
5296             s = PL_bufptr;
5297             /* If it looks like the start of a BOM or raw UTF-16,
5298              * check if it in fact is. */
5299             if (bof && PL_rsfp &&
5300                      (*s == 0 ||
5301                       *(U8*)s == 0xEF ||
5302                       *(U8*)s >= 0xFE ||
5303                       s[1] == 0)) {
5304                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5305                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5306 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5307                 /* offset may include swallowed CR */
5308                 if (!bof)
5309                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5310 #endif
5311                 if (bof) {
5312                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5313                     s = swallow_bom((U8*)s);
5314                 }
5315             }
5316             if (PL_parser->in_pod) {
5317                 /* Incest with pod. */
5318 #ifdef PERL_MAD
5319                 if (PL_madskills)
5320                     sv_catsv(PL_thiswhite, PL_linestr);
5321 #endif
5322                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5323                     sv_setpvs(PL_linestr, "");
5324                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5325                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5326                     PL_last_lop = PL_last_uni = NULL;
5327                     PL_parser->in_pod = 0;
5328                 }
5329             }
5330             if (PL_rsfp || PL_parser->filtered)
5331                 incline(s);
5332         } while (PL_parser->in_pod);
5333         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5334         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5335         PL_last_lop = PL_last_uni = NULL;
5336         if (CopLINE(PL_curcop) == 1) {
5337             while (s < PL_bufend && isSPACE(*s))
5338                 s++;
5339             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5340                 s++;
5341 #ifdef PERL_MAD
5342             if (PL_madskills)
5343                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5344 #endif
5345             d = NULL;
5346             if (!PL_in_eval) {
5347                 if (*s == '#' && *(s+1) == '!')
5348                     d = s + 2;
5349 #ifdef ALTERNATE_SHEBANG
5350                 else {
5351                     static char const as[] = ALTERNATE_SHEBANG;
5352                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5353                         d = s + (sizeof(as) - 1);
5354                 }
5355 #endif /* ALTERNATE_SHEBANG */
5356             }
5357             if (d) {
5358                 char *ipath;
5359                 char *ipathend;
5360
5361                 while (isSPACE(*d))
5362                     d++;
5363                 ipath = d;
5364                 while (*d && !isSPACE(*d))
5365                     d++;
5366                 ipathend = d;
5367
5368 #ifdef ARG_ZERO_IS_SCRIPT
5369                 if (ipathend > ipath) {
5370                     /*
5371                      * HP-UX (at least) sets argv[0] to the script name,
5372                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5373                      * at least, set argv[0] to the basename of the Perl
5374                      * interpreter. So, having found "#!", we'll set it right.
5375                      */
5376                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5377                                                     SVt_PV)); /* $^X */
5378                     assert(SvPOK(x) || SvGMAGICAL(x));
5379                     if (sv_eq(x, CopFILESV(PL_curcop))) {
5380                         sv_setpvn(x, ipath, ipathend - ipath);
5381                         SvSETMAGIC(x);
5382                     }
5383                     else {
5384                         STRLEN blen;
5385                         STRLEN llen;
5386                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5387                         const char * const lstart = SvPV_const(x,llen);
5388                         if (llen < blen) {
5389                             bstart += blen - llen;
5390                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5391                                 sv_setpvn(x, ipath, ipathend - ipath);
5392                                 SvSETMAGIC(x);
5393                             }
5394                         }
5395                     }
5396                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5397                 }
5398 #endif /* ARG_ZERO_IS_SCRIPT */
5399
5400                 /*
5401                  * Look for options.
5402                  */
5403                 d = instr(s,"perl -");
5404                 if (!d) {
5405                     d = instr(s,"perl");
5406 #if defined(DOSISH)
5407                     /* avoid getting into infinite loops when shebang
5408                      * line contains "Perl" rather than "perl" */
5409                     if (!d) {
5410                         for (d = ipathend-4; d >= ipath; --d) {
5411                             if ((*d == 'p' || *d == 'P')
5412                                 && !ibcmp(d, "perl", 4))
5413                             {
5414                                 break;
5415                             }
5416                         }
5417                         if (d < ipath)
5418                             d = NULL;
5419                     }
5420 #endif
5421                 }
5422 #ifdef ALTERNATE_SHEBANG
5423                 /*
5424                  * If the ALTERNATE_SHEBANG on this system starts with a
5425                  * character that can be part of a Perl expression, then if
5426                  * we see it but not "perl", we're probably looking at the
5427                  * start of Perl code, not a request to hand off to some
5428                  * other interpreter.  Similarly, if "perl" is there, but
5429                  * not in the first 'word' of the line, we assume the line
5430                  * contains the start of the Perl program.
5431                  */
5432                 if (d && *s != '#') {
5433                     const char *c = ipath;
5434                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5435                         c++;
5436                     if (c < d)
5437                         d = NULL;       /* "perl" not in first word; ignore */
5438                     else
5439                         *s = '#';       /* Don't try to parse shebang line */
5440                 }
5441 #endif /* ALTERNATE_SHEBANG */
5442                 if (!d &&
5443                     *s == '#' &&
5444                     ipathend > ipath &&
5445                     !PL_minus_c &&
5446                     !instr(s,"indir") &&
5447                     instr(PL_origargv[0],"perl"))
5448                 {
5449                     dVAR;
5450                     char **newargv;
5451
5452                     *ipathend = '\0';
5453                     s = ipathend + 1;
5454                     while (s < PL_bufend && isSPACE(*s))
5455                         s++;
5456                     if (s < PL_bufend) {
5457                         Newx(newargv,PL_origargc+3,char*);
5458                         newargv[1] = s;
5459                         while (s < PL_bufend && !isSPACE(*s))
5460                             s++;
5461                         *s = '\0';
5462                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5463                     }
5464                     else
5465                         newargv = PL_origargv;
5466                     newargv[0] = ipath;
5467                     PERL_FPU_PRE_EXEC
5468                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5469                     PERL_FPU_POST_EXEC
5470                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5471                 }
5472                 if (d) {
5473                     while (*d && !isSPACE(*d))
5474                         d++;
5475                     while (SPACE_OR_TAB(*d))
5476                         d++;
5477
5478                     if (*d++ == '-') {
5479                         const bool switches_done = PL_doswitches;
5480                         const U32 oldpdb = PL_perldb;
5481                         const bool oldn = PL_minus_n;
5482                         const bool oldp = PL_minus_p;
5483                         const char *d1 = d;
5484
5485                         do {
5486                             bool baduni = FALSE;
5487                             if (*d1 == 'C') {
5488                                 const char *d2 = d1 + 1;
5489                                 if (parse_unicode_opts((const char **)&d2)
5490                                     != PL_unicode)
5491                                     baduni = TRUE;
5492                             }
5493                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5494                                 const char * const m = d1;
5495                                 while (*d1 && !isSPACE(*d1))
5496                                     d1++;
5497                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5498                                       (int)(d1 - m), m);
5499                             }
5500                             d1 = moreswitches(d1);
5501                         } while (d1);
5502                         if (PL_doswitches && !switches_done) {
5503                             int argc = PL_origargc;
5504                             char **argv = PL_origargv;
5505                             do {
5506                                 argc--,argv++;
5507                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5508                             init_argv_symbols(argc,argv);
5509                         }
5510                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5511                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5512                               /* if we have already added "LINE: while (<>) {",
5513                                  we must not do it again */
5514                         {
5515                             sv_setpvs(PL_linestr, "");
5516                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5517                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5518                             PL_last_lop = PL_last_uni = NULL;
5519                             PL_preambled = FALSE;
5520                             if (PERLDB_LINE || PERLDB_SAVESRC)
5521                                 (void)gv_fetchfile(PL_origfilename);
5522                             goto retry;
5523                         }
5524                     }
5525                 }
5526             }
5527         }
5528         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5529             PL_lex_state = LEX_FORMLINE;
5530             start_force(PL_curforce);
5531             NEXTVAL_NEXTTOKE.ival = 0;
5532             force_next(FORMRBRACK);
5533             TOKEN(';');
5534         }
5535         goto retry;
5536     case '\r':
5537 #ifdef PERL_STRICT_CR
5538         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5539         Perl_croak(aTHX_
5540       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5541 #endif
5542     case ' ': case '\t': case '\f': case 013:
5543 #ifdef PERL_MAD
5544         PL_realtokenstart = -1;
5545         if (PL_madskills) {
5546           if (!PL_thiswhite)
5547             PL_thiswhite = newSVpvs("");
5548           sv_catpvn(PL_thiswhite, s, 1);
5549         }
5550 #endif
5551         s++;
5552         goto retry;
5553     case '#':
5554     case '\n':
5555 #ifdef PERL_MAD
5556         PL_realtokenstart = -1;
5557         if (PL_madskills)
5558             PL_faketokens = 0;
5559 #endif
5560         if (PL_lex_state != LEX_NORMAL ||
5561              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5562             if (*s == '#' && s == PL_linestart && PL_in_eval
5563              && !PL_rsfp && !PL_parser->filtered) {
5564                 /* handle eval qq[#line 1 "foo"\n ...] */
5565                 CopLINE_dec(PL_curcop);
5566                 incline(s);
5567             }
5568             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5569                 s = SKIPSPACE0(s);
5570                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5571                     incline(s);
5572             }
5573             else {
5574                 const bool in_comment = *s == '#';
5575                 d = s;
5576                 while (d < PL_bufend && *d != '\n')
5577                     d++;
5578                 if (d < PL_bufend)
5579                     d++;
5580                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5581                     Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5582                                d, PL_bufend);
5583 #ifdef PERL_MAD
5584                 if (PL_madskills)
5585                     PL_thiswhite = newSVpvn(s, d - s);
5586 #endif
5587                 s = d;
5588                 if (in_comment && d == PL_bufend
5589                  && PL_lex_state == LEX_INTERPNORMAL
5590                  && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5591                  && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5592                 else incline(s);
5593             }
5594             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5595                 PL_lex_state = LEX_FORMLINE;
5596                 start_force(PL_curforce);
5597                 NEXTVAL_NEXTTOKE.ival = 0;
5598                 force_next(FORMRBRACK);
5599                 TOKEN(';');
5600             }
5601         }
5602         else {
5603 #ifdef PERL_MAD
5604             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5605                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5606                     PL_faketokens = 0;
5607                     s = SKIPSPACE0(s);
5608                     TOKEN(PEG); /* make sure any #! line is accessible */
5609                 }
5610                 s = SKIPSPACE0(s);
5611             }
5612             else {
5613 /*              if (PL_madskills && PL_lex_formbrack) { */
5614                     d = s;
5615                     while (d < PL_bufend && *d != '\n')
5616                         d++;
5617                     if (d < PL_bufend)
5618                         d++;
5619                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5620                       Perl_croak(aTHX_ "panic: input overflow");
5621                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5622                         if (!PL_thiswhite)
5623                             PL_thiswhite = newSVpvs("");
5624                         if (CopLINE(PL_curcop) == 1) {
5625                             sv_setpvs(PL_thiswhite, "");
5626                             PL_faketokens = 0;
5627                         }
5628                         sv_catpvn(PL_thiswhite, s, d - s);
5629                     }
5630                     s = d;
5631 /*              }
5632                 *s = '\0';
5633                 PL_bufend = s; */
5634             }
5635 #else
5636             *s = '\0';
5637             PL_bufend = s;
5638 #endif
5639         }
5640         goto retry;
5641     case '-':
5642         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5643             I32 ftst = 0;
5644             char tmp;
5645
5646             s++;
5647             PL_bufptr = s;
5648             tmp = *s++;
5649
5650             while (s < PL_bufend && SPACE_OR_TAB(*s))
5651                 s++;
5652
5653             if (strnEQ(s,"=>",2)) {
5654                 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5655                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5656                 OPERATOR('-');          /* unary minus */
5657             }
5658             PL_last_uni = PL_oldbufptr;
5659             switch (tmp) {
5660             case 'r': ftst = OP_FTEREAD;        break;
5661             case 'w': ftst = OP_FTEWRITE;       break;
5662             case 'x': ftst = OP_FTEEXEC;        break;
5663             case 'o': ftst = OP_FTEOWNED;       break;
5664             case 'R': ftst = OP_FTRREAD;        break;
5665             case 'W': ftst = OP_FTRWRITE;       break;
5666             case 'X': ftst = OP_FTREXEC;        break;
5667             case 'O': ftst = OP_FTROWNED;       break;
5668             case 'e': ftst = OP_FTIS;           break;
5669             case 'z': ftst = OP_FTZERO;         break;
5670             case 's': ftst = OP_FTSIZE;         break;
5671             case 'f': ftst = OP_FTFILE;         break;
5672             case 'd': ftst = OP_FTDIR;          break;
5673             case 'l': ftst = OP_FTLINK;         break;
5674             case 'p': ftst = OP_FTPIPE;         break;
5675             case 'S': ftst = OP_FTSOCK;         break;
5676             case 'u': ftst = OP_FTSUID;         break;
5677             case 'g': ftst = OP_FTSGID;         break;
5678             case 'k': ftst = OP_FTSVTX;         break;
5679             case 'b': ftst = OP_FTBLK;          break;
5680             case 'c': ftst = OP_FTCHR;          break;
5681             case 't': ftst = OP_FTTTY;          break;
5682             case 'T': ftst = OP_FTTEXT;         break;
5683             case 'B': ftst = OP_FTBINARY;       break;
5684             case 'M': case 'A': case 'C':
5685                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5686                 switch (tmp) {
5687                 case 'M': ftst = OP_FTMTIME;    break;
5688                 case 'A': ftst = OP_FTATIME;    break;
5689                 case 'C': ftst = OP_FTCTIME;    break;
5690                 default:                        break;
5691                 }
5692                 break;
5693             default:
5694                 break;
5695             }
5696             if (ftst) {
5697                 PL_last_lop_op = (OPCODE)ftst;
5698                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5699                         "### Saw file test %c\n", (int)tmp);
5700                 } );
5701                 FTST(ftst);
5702             }
5703             else {
5704                 /* Assume it was a minus followed by a one-letter named
5705                  * subroutine call (or a -bareword), then. */
5706                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5707                         "### '-%c' looked like a file test but was not\n",
5708                         (int) tmp);
5709                 } );
5710                 s = --PL_bufptr;
5711             }
5712         }
5713         {
5714             const char tmp = *s++;
5715             if (*s == tmp) {
5716                 s++;
5717                 if (PL_expect == XOPERATOR)
5718                     TERM(POSTDEC);
5719                 else
5720                     OPERATOR(PREDEC);
5721             }
5722             else if (*s == '>') {
5723                 s++;
5724                 s = SKIPSPACE1(s);
5725                 if (isIDFIRST_lazy_if(s,UTF)) {
5726                     s = force_word(s,METHOD,FALSE,TRUE);
5727                     TOKEN(ARROW);
5728                 }
5729                 else if (*s == '$')
5730                     OPERATOR(ARROW);
5731                 else
5732                     TERM(ARROW);
5733             }
5734             if (PL_expect == XOPERATOR) {
5735                 if (*s == '=' && !PL_lex_allbrackets &&
5736                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5737                     s--;
5738                     TOKEN(0);
5739                 }
5740                 Aop(OP_SUBTRACT);
5741             }
5742             else {
5743                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5744                     check_uni();
5745                 OPERATOR('-');          /* unary minus */
5746             }
5747         }
5748
5749     case '+':
5750         {
5751             const char tmp = *s++;
5752             if (*s == tmp) {
5753                 s++;
5754                 if (PL_expect == XOPERATOR)
5755                     TERM(POSTINC);
5756                 else
5757                     OPERATOR(PREINC);
5758             }
5759             if (PL_expect == XOPERATOR) {
5760                 if (*s == '=' && !PL_lex_allbrackets &&
5761                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5762                     s--;
5763                     TOKEN(0);
5764                 }
5765                 Aop(OP_ADD);
5766             }
5767             else {
5768                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5769                     check_uni();
5770                 OPERATOR('+');
5771             }
5772         }
5773
5774     case '*':
5775         if (PL_expect != XOPERATOR) {
5776             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5777             PL_expect = XOPERATOR;
5778             force_ident(PL_tokenbuf, '*');
5779             if (!*PL_tokenbuf)
5780                 PREREF('*');
5781             TERM('*');
5782         }
5783         s++;
5784         if (*s == '*') {
5785             s++;
5786             if (*s == '=' && !PL_lex_allbrackets &&
5787                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5788                 s -= 2;
5789                 TOKEN(0);
5790             }
5791             PWop(OP_POW);
5792         }
5793         if (*s == '=' && !PL_lex_allbrackets &&
5794                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5795             s--;
5796             TOKEN(0);
5797         }
5798         PL_parser->saw_infix_sigil = 1;
5799         Mop(OP_MULTIPLY);
5800
5801     case '%':
5802         if (PL_expect == XOPERATOR) {
5803             if (s[1] == '=' && !PL_lex_allbrackets &&
5804                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5805                 TOKEN(0);
5806             ++s;
5807             PL_parser->saw_infix_sigil = 1;
5808             Mop(OP_MODULO);
5809         }
5810         PL_tokenbuf[0] = '%';
5811         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5812                 sizeof PL_tokenbuf - 1, FALSE);
5813         if (!PL_tokenbuf[1]) {
5814             PREREF('%');
5815         }
5816         PL_expect = XOPERATOR;
5817         force_ident_maybe_lex('%');
5818         TERM('%');
5819
5820     case '^':
5821         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5822                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5823             TOKEN(0);
5824         s++;
5825         BOop(OP_BIT_XOR);
5826     case '[':
5827         if (PL_lex_brackets > 100)
5828             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5829         PL_lex_brackstack[PL_lex_brackets++] = 0;
5830         PL_lex_allbrackets++;
5831         {
5832             const char tmp = *s++;
5833             OPERATOR(tmp);
5834         }
5835     case '~':
5836         if (s[1] == '~'
5837             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5838         {
5839             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5840                 TOKEN(0);
5841             s += 2;
5842             Perl_ck_warner_d(aTHX_
5843                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5844                 "Smartmatch is experimental");
5845             Eop(OP_SMARTMATCH);
5846         }
5847         s++;
5848         OPERATOR('~');
5849     case ',':
5850         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5851             TOKEN(0);
5852         s++;
5853         OPERATOR(',');
5854     case ':':
5855         if (s[1] == ':') {
5856             len = 0;
5857             goto just_a_word_zero_gv;
5858         }
5859         s++;
5860         switch (PL_expect) {
5861             OP *attrs;
5862 #ifdef PERL_MAD
5863             I32 stuffstart;
5864 #endif
5865         case XOPERATOR:
5866             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5867                 break;
5868             PL_bufptr = s;      /* update in case we back off */
5869             if (*s == '=') {
5870                 Perl_croak(aTHX_
5871                            "Use of := for an empty attribute list is not allowed");
5872             }
5873             goto grabattrs;
5874         case XATTRBLOCK:
5875             PL_expect = XBLOCK;
5876             goto grabattrs;
5877         case XATTRTERM:
5878             PL_expect = XTERMBLOCK;
5879          grabattrs:
5880 #ifdef PERL_MAD
5881             stuffstart = s - SvPVX(PL_linestr) - 1;
5882 #endif
5883             s = PEEKSPACE(s);
5884             attrs = NULL;
5885             while (isIDFIRST_lazy_if(s,UTF)) {
5886                 I32 tmp;
5887                 SV *sv;
5888                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5889                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5890                     if (tmp < 0) tmp = -tmp;
5891                     switch (tmp) {
5892                     case KEY_or:
5893                     case KEY_and:
5894                     case KEY_for:
5895                     case KEY_foreach:
5896                     case KEY_unless:
5897                     case KEY_if:
5898                     case KEY_while:
5899                     case KEY_until:
5900                         goto got_attrs;
5901                     default:
5902                         break;
5903                     }
5904                 }
5905                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5906                 if (*d == '(') {
5907                     d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
5908                     if (!d) {
5909                         /* MUST advance bufptr here to avoid bogus
5910                            "at end of line" context messages from yyerror().
5911                          */
5912                         PL_bufptr = s + len;
5913                         yyerror("Unterminated attribute parameter in attribute list");
5914                         if (attrs)
5915                             op_free(attrs);
5916                         sv_free(sv);
5917                         return REPORT(0);       /* EOF indicator */
5918                     }
5919                 }
5920                 if (PL_lex_stuff) {
5921                     sv_catsv(sv, PL_lex_stuff);
5922                     attrs = op_append_elem(OP_LIST, attrs,
5923                                         newSVOP(OP_CONST, 0, sv));
5924                     SvREFCNT_dec(PL_lex_stuff);
5925                     PL_lex_stuff = NULL;
5926                 }
5927                 else {
5928                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5929                         sv_free(sv);
5930                         if (PL_in_my == KEY_our) {
5931                             deprecate(":unique");
5932                         }
5933                         else
5934                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5935                     }
5936
5937                     /* NOTE: any CV attrs applied here need to be part of
5938                        the CVf_BUILTIN_ATTRS define in cv.h! */
5939                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5940                         sv_free(sv);
5941                         CvLVALUE_on(PL_compcv);
5942                     }
5943                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5944                         sv_free(sv);
5945                         deprecate(":locked");
5946                     }
5947                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5948                         sv_free(sv);
5949                         CvMETHOD_on(PL_compcv);
5950                     }
5951                     /* After we've set the flags, it could be argued that
5952                        we don't need to do the attributes.pm-based setting
5953                        process, and shouldn't bother appending recognized
5954                        flags.  To experiment with that, uncomment the
5955                        following "else".  (Note that's already been
5956                        uncommented.  That keeps the above-applied built-in
5957                        attributes from being intercepted (and possibly
5958                        rejected) by a package's attribute routines, but is
5959                        justified by the performance win for the common case
5960                        of applying only built-in attributes.) */
5961                     else
5962                         attrs = op_append_elem(OP_LIST, attrs,
5963                                             newSVOP(OP_CONST, 0,
5964                                                     sv));
5965                 }
5966                 s = PEEKSPACE(d);
5967                 if (*s == ':' && s[1] != ':')
5968                     s = PEEKSPACE(s+1);
5969                 else if (s == d)
5970                     break;      /* require real whitespace or :'s */
5971                 /* XXX losing whitespace on sequential attributes here */
5972             }
5973             {
5974                 const char tmp
5975                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5976                 if (*s != ';' && *s != '}' && *s != tmp
5977                     && (tmp != '=' || *s != ')')) {
5978                     const char q = ((*s == '\'') ? '"' : '\'');
5979                     /* If here for an expression, and parsed no attrs, back
5980                        off. */
5981                     if (tmp == '=' && !attrs) {
5982                         s = PL_bufptr;
5983                         break;
5984                     }
5985                     /* MUST advance bufptr here to avoid bogus "at end of line"
5986                        context messages from yyerror().
5987                     */
5988                     PL_bufptr = s;
5989                     yyerror( (const char *)
5990                              (*s
5991                               ? Perl_form(aTHX_ "Invalid separator character "
5992                                           "%c%c%c in attribute list", q, *s, q)
5993                               : "Unterminated attribute list" ) );
5994                     if (attrs)
5995                         op_free(attrs);
5996                     OPERATOR(':');
5997                 }
5998             }
5999         got_attrs:
6000             if (attrs) {
6001                 start_force(PL_curforce);
6002                 NEXTVAL_NEXTTOKE.opval = attrs;
6003                 CURMAD('_', PL_nextwhite);
6004                 force_next(THING);
6005             }
6006 #ifdef PERL_MAD
6007             if (PL_madskills) {
6008                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6009                                      (s - SvPVX(PL_linestr)) - stuffstart);
6010             }
6011 #endif
6012             TOKEN(COLONATTR);
6013         }
6014         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6015             s--;
6016             TOKEN(0);
6017         }
6018         PL_lex_allbrackets--;
6019         OPERATOR(':');
6020     case '(':
6021         s++;
6022         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6023             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
6024         else
6025             PL_expect = XTERM;
6026         s = SKIPSPACE1(s);
6027         PL_lex_allbrackets++;
6028         TOKEN('(');
6029     case ';':
6030         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6031             TOKEN(0);
6032         CLINE;
6033         s++;
6034         OPERATOR(';');
6035     case ')':
6036         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6037             TOKEN(0);
6038         s++;
6039         PL_lex_allbrackets--;
6040         s = SKIPSPACE1(s);
6041         if (*s == '{')
6042             PREBLOCK(')');
6043         TERM(')');
6044     case ']':
6045         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6046             TOKEN(0);
6047         s++;
6048         if (PL_lex_brackets <= 0)
6049             yyerror("Unmatched right square bracket");
6050         else
6051             --PL_lex_brackets;
6052         PL_lex_allbrackets--;
6053         if (PL_lex_state == LEX_INTERPNORMAL) {
6054             if (PL_lex_brackets == 0) {
6055                 if (*s == '-' && s[1] == '>')
6056                     PL_lex_state = LEX_INTERPENDMAYBE;
6057                 else if (*s != '[' && *s != '{')
6058                     PL_lex_state = LEX_INTERPEND;
6059             }
6060         }
6061         TERM(']');
6062     case '{':
6063         s++;
6064       leftbracket:
6065         if (PL_lex_brackets > 100) {
6066             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6067         }
6068         switch (PL_expect) {
6069         case XTERM:
6070             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6071             PL_lex_allbrackets++;
6072             OPERATOR(HASHBRACK);
6073         case XOPERATOR:
6074             while (s < PL_bufend && SPACE_OR_TAB(*s))
6075                 s++;
6076             d = s;
6077             PL_tokenbuf[0] = '\0';
6078             if (d < PL_bufend && *d == '-') {
6079                 PL_tokenbuf[0] = '-';
6080                 d++;
6081                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6082                     d++;
6083             }
6084             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6085                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6086                               FALSE, &len);
6087                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6088                     d++;
6089                 if (*d == '}') {
6090                     const char minus = (PL_tokenbuf[0] == '-');
6091                     s = force_word(s + minus, WORD, FALSE, TRUE);
6092                     if (minus)
6093                         force_next('-');
6094                 }
6095             }
6096             /* FALL THROUGH */
6097         case XATTRBLOCK:
6098         case XBLOCK:
6099             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6100             PL_lex_allbrackets++;
6101             PL_expect = XSTATE;
6102             break;
6103         case XATTRTERM:
6104         case XTERMBLOCK:
6105             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6106             PL_lex_allbrackets++;
6107             PL_expect = XSTATE;
6108             break;
6109         default: {
6110                 const char *t;
6111                 if (PL_oldoldbufptr == PL_last_lop)
6112                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6113                 else
6114                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6115                 PL_lex_allbrackets++;
6116                 s = SKIPSPACE1(s);
6117                 if (*s == '}') {
6118                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6119                         PL_expect = XTERM;
6120                         /* This hack is to get the ${} in the message. */
6121                         PL_bufptr = s+1;
6122                         yyerror("syntax error");
6123                         break;
6124                     }
6125                     OPERATOR(HASHBRACK);
6126                 }
6127                 /* This hack serves to disambiguate a pair of curlies
6128                  * as being a block or an anon hash.  Normally, expectation
6129                  * determines that, but in cases where we're not in a
6130                  * position to expect anything in particular (like inside
6131                  * eval"") we have to resolve the ambiguity.  This code
6132                  * covers the case where the first term in the curlies is a
6133                  * quoted string.  Most other cases need to be explicitly
6134                  * disambiguated by prepending a "+" before the opening
6135                  * curly in order to force resolution as an anon hash.
6136                  *
6137                  * XXX should probably propagate the outer expectation
6138                  * into eval"" to rely less on this hack, but that could
6139                  * potentially break current behavior of eval"".
6140                  * GSAR 97-07-21
6141                  */
6142                 t = s;
6143                 if (*s == '\'' || *s == '"' || *s == '`') {
6144                     /* common case: get past first string, handling escapes */
6145                     for (t++; t < PL_bufend && *t != *s;)
6146                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
6147                             t++;
6148                     t++;
6149                 }
6150                 else if (*s == 'q') {
6151                     if (++t < PL_bufend
6152                         && (!isWORDCHAR(*t)
6153                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6154                                 && !isWORDCHAR(*t))))
6155                     {
6156                         /* skip q//-like construct */
6157                         const char *tmps;
6158                         char open, close, term;
6159                         I32 brackets = 1;
6160
6161                         while (t < PL_bufend && isSPACE(*t))
6162                             t++;
6163                         /* check for q => */
6164                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6165                             OPERATOR(HASHBRACK);
6166                         }
6167                         term = *t;
6168                         open = term;
6169                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6170                             term = tmps[5];
6171                         close = term;
6172                         if (open == close)
6173                             for (t++; t < PL_bufend; t++) {
6174                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6175                                     t++;
6176                                 else if (*t == open)
6177                                     break;
6178                             }
6179                         else {
6180                             for (t++; t < PL_bufend; t++) {
6181                                 if (*t == '\\' && t+1 < PL_bufend)
6182                                     t++;
6183                                 else if (*t == close && --brackets <= 0)
6184                                     break;
6185                                 else if (*t == open)
6186                                     brackets++;
6187                             }
6188                         }
6189                         t++;
6190                     }
6191                     else
6192                         /* skip plain q word */
6193                         while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6194                              t += UTF8SKIP(t);
6195                 }
6196                 else if (isWORDCHAR_lazy_if(t,UTF)) {
6197                     t += UTF8SKIP(t);
6198                     while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6199                          t += UTF8SKIP(t);
6200                 }
6201                 while (t < PL_bufend && isSPACE(*t))
6202                     t++;
6203                 /* if comma follows first term, call it an anon hash */
6204                 /* XXX it could be a comma expression with loop modifiers */
6205                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6206                                    || (*t == '=' && t[1] == '>')))
6207                     OPERATOR(HASHBRACK);
6208                 if (PL_expect == XREF)
6209                     PL_expect = XTERM;
6210                 else {
6211                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6212                     PL_expect = XSTATE;
6213                 }
6214             }
6215             break;
6216         }
6217         pl_yylval.ival = CopLINE(PL_curcop);
6218         if (isSPACE(*s) || *s == '#')
6219             PL_copline = NOLINE;   /* invalidate current command line number */
6220         TOKEN(formbrack ? '=' : '{');
6221     case '}':
6222         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6223             TOKEN(0);
6224       rightbracket:
6225         s++;
6226         if (PL_lex_brackets <= 0)
6227             yyerror("Unmatched right curly bracket");
6228         else
6229             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6230         PL_lex_allbrackets--;
6231         if (PL_lex_state == LEX_INTERPNORMAL) {
6232             if (PL_lex_brackets == 0) {
6233                 if (PL_expect & XFAKEBRACK) {
6234                     PL_expect &= XENUMMASK;
6235                     PL_lex_state = LEX_INTERPEND;
6236                     PL_bufptr = s;
6237 #if 0
6238                     if (PL_madskills) {
6239                         if (!PL_thiswhite)
6240                             PL_thiswhite = newSVpvs("");
6241                         sv_catpvs(PL_thiswhite,"}");
6242                     }
6243 #endif
6244                     return yylex();     /* ignore fake brackets */
6245                 }
6246                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6247                  && SvEVALED(PL_lex_repl))
6248                     PL_lex_state = LEX_INTERPEND;
6249                 else if (*s == '-' && s[1] == '>')
6250                     PL_lex_state = LEX_INTERPENDMAYBE;
6251                 else if (*s != '[' && *s != '{')
6252                     PL_lex_state = LEX_INTERPEND;
6253             }
6254         }
6255         if (PL_expect & XFAKEBRACK) {
6256             PL_expect &= XENUMMASK;
6257             PL_bufptr = s;
6258             return yylex();             /* ignore fake brackets */
6259         }
6260         start_force(PL_curforce);
6261         if (PL_madskills) {
6262             curmad('X', newSVpvn(s-1,1));
6263             CURMAD('_', PL_thiswhite);
6264         }
6265         force_next(formbrack ? '.' : '}');
6266         if (formbrack) LEAVE;
6267 #ifdef PERL_MAD
6268         if (PL_madskills && !PL_thistoken)
6269             PL_thistoken = newSVpvs("");
6270 #endif
6271         if (formbrack == 2) { /* means . where arguments were expected */
6272             start_force(PL_curforce);
6273             force_next(';');
6274             TOKEN(FORMRBRACK);
6275         }
6276         TOKEN(';');
6277     case '&':
6278         s++;
6279         if (*s++ == '&') {
6280             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6281                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6282                 s -= 2;
6283                 TOKEN(0);
6284             }
6285             AOPERATOR(ANDAND);
6286         }
6287         s--;
6288         if (PL_expect == XOPERATOR) {
6289             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6290                 && isIDFIRST_lazy_if(s,UTF))
6291             {
6292                 CopLINE_dec(PL_curcop);
6293                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6294                 CopLINE_inc(PL_curcop);
6295             }
6296             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6297                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6298                 s--;
6299                 TOKEN(0);
6300             }
6301             PL_parser->saw_infix_sigil = 1;
6302             BAop(OP_BIT_AND);
6303         }
6304
6305         PL_tokenbuf[0] = '&';
6306         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6307                        sizeof PL_tokenbuf - 1, TRUE);
6308         if (PL_tokenbuf[1]) {
6309             PL_expect = XOPERATOR;
6310             force_ident_maybe_lex('&');
6311         }
6312         else
6313             PREREF('&');
6314         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6315         TERM('&');
6316
6317     case '|':
6318         s++;
6319         if (*s++ == '|') {
6320             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6321                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6322                 s -= 2;
6323                 TOKEN(0);
6324             }
6325             AOPERATOR(OROR);
6326         }
6327         s--;
6328         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6329                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6330             s--;
6331             TOKEN(0);
6332         }
6333         BOop(OP_BIT_OR);
6334     case '=':
6335         s++;
6336         {
6337             const char tmp = *s++;
6338             if (tmp == '=') {
6339                 if (!PL_lex_allbrackets &&
6340                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6341                     s -= 2;
6342                     TOKEN(0);
6343                 }
6344                 Eop(OP_EQ);
6345             }
6346             if (tmp == '>') {
6347                 if (!PL_lex_allbrackets &&
6348                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6349                     s -= 2;
6350                     TOKEN(0);
6351                 }
6352                 OPERATOR(',');
6353             }
6354             if (tmp == '~')
6355                 PMop(OP_MATCH);
6356             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6357                 && strchr("+-*/%.^&|<",tmp))
6358                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6359                             "Reversed %c= operator",(int)tmp);
6360             s--;
6361             if (PL_expect == XSTATE && isALPHA(tmp) &&
6362                 (s == PL_linestart+1 || s[-2] == '\n') )
6363                 {
6364                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6365                         || PL_lex_state != LEX_NORMAL) {
6366                         d = PL_bufend;
6367                         while (s < d) {
6368                             if (*s++ == '\n') {
6369                                 incline(s);
6370                                 if (strnEQ(s,"=cut",4)) {
6371                                     s = strchr(s,'\n');
6372                                     if (s)
6373                                         s++;
6374                                     else
6375                                         s = d;
6376                                     incline(s);
6377                                     goto retry;
6378                                 }
6379                             }
6380                         }
6381                         goto retry;
6382                     }
6383 #ifdef PERL_MAD
6384                     if (PL_madskills) {
6385                         if (!PL_thiswhite)
6386                             PL_thiswhite = newSVpvs("");
6387                         sv_catpvn(PL_thiswhite, PL_linestart,
6388                                   PL_bufend - PL_linestart);
6389                     }
6390 #endif
6391                     s = PL_bufend;
6392                     PL_parser->in_pod = 1;
6393                     goto retry;
6394                 }
6395         }
6396         if (PL_expect == XBLOCK) {
6397             const char *t = s;
6398 #ifdef PERL_STRICT_CR
6399             while (SPACE_OR_TAB(*t))
6400 #else
6401             while (SPACE_OR_TAB(*t) || *t == '\r')
6402 #endif
6403                 t++;
6404             if (*t == '\n' || *t == '#') {
6405                 formbrack = 1;
6406                 ENTER;
6407                 SAVEI8(PL_parser->form_lex_state);
6408                 SAVEI32(PL_lex_formbrack);
6409                 PL_parser->form_lex_state = PL_lex_state;
6410                 PL_lex_formbrack = PL_lex_brackets + 1;
6411                 goto leftbracket;
6412             }
6413         }
6414         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6415             s--;
6416             TOKEN(0);
6417         }
6418         pl_yylval.ival = 0;
6419         OPERATOR(ASSIGNOP);
6420     case '!':
6421         s++;
6422         {
6423             const char tmp = *s++;
6424             if (tmp == '=') {
6425                 /* was this !=~ where !~ was meant?
6426                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6427
6428                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6429                     const char *t = s+1;
6430
6431                     while (t < PL_bufend && isSPACE(*t))
6432                         ++t;
6433
6434                     if (*t == '/' || *t == '?' ||
6435                         ((*t == 'm' || *t == 's' || *t == 'y')
6436                          && !isWORDCHAR(t[1])) ||
6437                         (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6438                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6439                                     "!=~ should be !~");
6440                 }
6441                 if (!PL_lex_allbrackets &&
6442                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6443                     s -= 2;
6444                     TOKEN(0);
6445                 }
6446                 Eop(OP_NE);
6447             }
6448             if (tmp == '~')
6449                 PMop(OP_NOT);
6450         }
6451         s--;
6452         OPERATOR('!');
6453     case '<':
6454         if (PL_expect != XOPERATOR) {
6455             if (s[1] != '<' && !strchr(s,'>'))
6456                 check_uni();
6457             if (s[1] == '<')
6458                 s = scan_heredoc(s);
6459             else
6460                 s = scan_inputsymbol(s);
6461             PL_expect = XOPERATOR;
6462             TOKEN(sublex_start());
6463         }
6464         s++;
6465         {
6466             char tmp = *s++;
6467             if (tmp == '<') {
6468                 if (*s == '=' && !PL_lex_allbrackets &&
6469                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6470                     s -= 2;
6471                     TOKEN(0);
6472                 }
6473                 SHop(OP_LEFT_SHIFT);
6474             }
6475             if (tmp == '=') {
6476                 tmp = *s++;
6477                 if (tmp == '>') {
6478                     if (!PL_lex_allbrackets &&
6479                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6480                         s -= 3;
6481                         TOKEN(0);
6482                     }
6483                     Eop(OP_NCMP);
6484                 }
6485                 s--;
6486                 if (!PL_lex_allbrackets &&
6487                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6488                     s -= 2;
6489                     TOKEN(0);
6490                 }
6491                 Rop(OP_LE);
6492             }
6493         }
6494         s--;
6495         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6496             s--;
6497             TOKEN(0);
6498         }
6499         Rop(OP_LT);
6500     case '>':
6501         s++;
6502         {
6503             const char tmp = *s++;
6504             if (tmp == '>') {
6505                 if (*s == '=' && !PL_lex_allbrackets &&
6506                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6507                     s -= 2;
6508                     TOKEN(0);
6509                 }
6510                 SHop(OP_RIGHT_SHIFT);
6511             }
6512             else if (tmp == '=') {
6513                 if (!PL_lex_allbrackets &&
6514                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6515                     s -= 2;
6516                     TOKEN(0);
6517                 }
6518                 Rop(OP_GE);
6519             }
6520         }
6521         s--;
6522         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6523             s--;
6524             TOKEN(0);
6525         }
6526         Rop(OP_GT);
6527
6528     case '$':
6529         CLINE;
6530
6531         if (PL_expect == XOPERATOR) {
6532             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6533                 return deprecate_commaless_var_list();
6534             }
6535         }
6536
6537         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6538             PL_tokenbuf[0] = '@';
6539             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6540                            sizeof PL_tokenbuf - 1, FALSE);
6541             if (PL_expect == XOPERATOR)
6542                 no_op("Array length", s);
6543             if (!PL_tokenbuf[1])
6544                 PREREF(DOLSHARP);
6545             PL_expect = XOPERATOR;
6546             force_ident_maybe_lex('#');
6547             TOKEN(DOLSHARP);
6548         }
6549
6550         PL_tokenbuf[0] = '$';
6551         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6552                        sizeof PL_tokenbuf - 1, FALSE);
6553         if (PL_expect == XOPERATOR)
6554             no_op("Scalar", s);
6555         if (!PL_tokenbuf[1]) {
6556             if (s == PL_bufend)
6557                 yyerror("Final $ should be \\$ or $name");
6558             PREREF('$');
6559         }
6560
6561         d = s;
6562         {
6563             const char tmp = *s;
6564             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6565                 s = SKIPSPACE1(s);
6566
6567             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6568                 && intuit_more(s)) {
6569                 if (*s == '[') {
6570                     PL_tokenbuf[0] = '@';
6571                     if (ckWARN(WARN_SYNTAX)) {
6572                         char *t = s+1;
6573
6574                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6575                             t++;
6576                         if (*t++ == ',') {
6577                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6578                             while (t < PL_bufend && *t != ']')
6579                                 t++;
6580                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6581                                         "Multidimensional syntax %.*s not supported",
6582                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6583                         }
6584                     }
6585                 }
6586                 else if (*s == '{') {
6587                     char *t;
6588                     PL_tokenbuf[0] = '%';
6589                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6590                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6591                         {
6592                             char tmpbuf[sizeof PL_tokenbuf];
6593                             do {
6594                                 t++;
6595                             } while (isSPACE(*t));
6596                             if (isIDFIRST_lazy_if(t,UTF)) {
6597                                 STRLEN len;
6598                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6599                                               &len);
6600                                 while (isSPACE(*t))
6601                                     t++;
6602                                 if (*t == ';'
6603                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6604                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6605                                         "You need to quote \"%"UTF8f"\"",
6606                                          UTF8fARG(UTF, len, tmpbuf));
6607                             }
6608                         }
6609                 }
6610             }
6611
6612             PL_expect = XOPERATOR;
6613             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6614                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6615                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6616                     PL_expect = XOPERATOR;
6617                 else if (strchr("$@\"'`q", *s))
6618                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6619                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6620                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6621                 else if (isIDFIRST_lazy_if(s,UTF)) {
6622                     char tmpbuf[sizeof PL_tokenbuf];
6623                     int t2;
6624                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6625                     if ((t2 = keyword(tmpbuf, len, 0))) {
6626                         /* binary operators exclude handle interpretations */
6627                         switch (t2) {
6628                         case -KEY_x:
6629                         case -KEY_eq:
6630                         case -KEY_ne:
6631                         case -KEY_gt:
6632                         case -KEY_lt:
6633                         case -KEY_ge:
6634                         case -KEY_le:
6635                         case -KEY_cmp:
6636                             break;
6637                         default:
6638                             PL_expect = XTERM;  /* e.g. print $fh length() */
6639                             break;
6640                         }
6641                     }
6642                     else {
6643                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6644                     }
6645                 }
6646                 else if (isDIGIT(*s))
6647                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6648                 else if (*s == '.' && isDIGIT(s[1]))
6649                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6650                 else if ((*s == '?' || *s == '-' || *s == '+')
6651                          && !isSPACE(s[1]) && s[1] != '=')
6652                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6653                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6654                          && s[1] != '/')
6655                     PL_expect = XTERM;          /* e.g. print $fh /.../
6656                                                    XXX except DORDOR operator
6657                                                 */
6658                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6659                          && s[2] != '=')
6660                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6661             }
6662         }
6663         force_ident_maybe_lex('$');
6664         TOKEN('$');
6665
6666     case '@':
6667         if (PL_expect == XOPERATOR)
6668             no_op("Array", s);
6669         PL_tokenbuf[0] = '@';
6670         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6671         if (!PL_tokenbuf[1]) {
6672             PREREF('@');
6673         }
6674         if (PL_lex_state == LEX_NORMAL)
6675             s = SKIPSPACE1(s);
6676         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6677             if (*s == '{')
6678                 PL_tokenbuf[0] = '%';
6679
6680             /* Warn about @ where they meant $. */
6681             if (*s == '[' || *s == '{') {
6682                 if (ckWARN(WARN_SYNTAX)) {
6683                     const char *t = s + 1;
6684                     while (*t && (isWORDCHAR_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6685                         t += UTF ? UTF8SKIP(t) : 1;
6686                     if (*t == '}' || *t == ']') {
6687                         t++;
6688                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6689        /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6690                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6691                          "Scalar value %"UTF8f" better written as $%"UTF8f,
6692                           UTF8fARG(UTF, t-PL_bufptr, PL_bufptr),
6693                           UTF8fARG(UTF, t-PL_bufptr-1, PL_bufptr+1));
6694                     }
6695                 }
6696             }
6697         }
6698         PL_expect = XOPERATOR;
6699         force_ident_maybe_lex('@');
6700         TERM('@');
6701
6702      case '/':                  /* may be division, defined-or, or pattern */
6703         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6704             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6705                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6706                 TOKEN(0);
6707             s += 2;
6708             AOPERATOR(DORDOR);
6709         }
6710      case '?':                  /* may either be conditional or pattern */
6711         if (PL_expect == XOPERATOR) {
6712              char tmp = *s++;
6713              if(tmp == '?') {
6714                 if (!PL_lex_allbrackets &&
6715                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6716                     s--;
6717                     TOKEN(0);
6718                 }
6719                 PL_lex_allbrackets++;
6720                 OPERATOR('?');
6721              }
6722              else {
6723                  tmp = *s++;
6724                  if(tmp == '/') {
6725                      /* A // operator. */
6726                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6727                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6728                                             LEX_FAKEEOF_LOGIC)) {
6729                         s -= 2;
6730                         TOKEN(0);
6731                     }
6732                     AOPERATOR(DORDOR);
6733                  }
6734                  else {
6735                      s--;
6736                      if (*s == '=' && !PL_lex_allbrackets &&
6737                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6738                          s--;
6739                          TOKEN(0);
6740                      }
6741                      Mop(OP_DIVIDE);
6742                  }
6743              }
6744          }
6745          else {
6746              /* Disable warning on "study /blah/" */
6747              if (PL_oldoldbufptr == PL_last_uni
6748               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6749                   || memNE(PL_last_uni, "study", 5)
6750                   || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6751               ))
6752                  check_uni();
6753              if (*s == '?')
6754                  deprecate("?PATTERN? without explicit operator");
6755              s = scan_pat(s,OP_MATCH);
6756              TERM(sublex_start());
6757          }
6758
6759     case '.':
6760         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6761 #ifdef PERL_STRICT_CR
6762             && s[1] == '\n'
6763 #else
6764             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6765 #endif
6766             && (s == PL_linestart || s[-1] == '\n') )
6767         {
6768             PL_expect = XSTATE;
6769             formbrack = 2; /* dot seen where arguments expected */
6770             goto rightbracket;
6771         }
6772         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6773             s += 3;
6774             OPERATOR(YADAYADA);
6775         }
6776         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6777             char tmp = *s++;
6778             if (*s == tmp) {
6779                 if (!PL_lex_allbrackets &&
6780                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6781                     s--;
6782                     TOKEN(0);
6783                 }
6784                 s++;
6785                 if (*s == tmp) {
6786                     s++;
6787                     pl_yylval.ival = OPf_SPECIAL;
6788                 }
6789                 else
6790                     pl_yylval.ival = 0;
6791                 OPERATOR(DOTDOT);
6792             }
6793             if (*s == '=' && !PL_lex_allbrackets &&
6794                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6795                 s--;
6796                 TOKEN(0);
6797             }
6798             Aop(OP_CONCAT);
6799         }
6800         /* FALL THROUGH */
6801     case '0': case '1': case '2': case '3': case '4':
6802     case '5': case '6': case '7': case '8': case '9':
6803         s = scan_num(s, &pl_yylval);
6804         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6805         if (PL_expect == XOPERATOR)
6806             no_op("Number",s);
6807         TERM(THING);
6808
6809     case '\'':
6810         s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6811         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6812         if (PL_expect == XOPERATOR) {
6813             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6814                 return deprecate_commaless_var_list();
6815             }
6816             else
6817                 no_op("String",s);
6818         }
6819         if (!s)
6820             missingterm(NULL);
6821         pl_yylval.ival = OP_CONST;
6822         TERM(sublex_start());
6823
6824     case '"':
6825         s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6826         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6827         if (PL_expect == XOPERATOR) {
6828             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6829                 return deprecate_commaless_var_list();
6830             }
6831             else
6832                 no_op("String",s);
6833         }
6834         if (!s)
6835             missingterm(NULL);
6836         pl_yylval.ival = OP_CONST;
6837         /* FIXME. I think that this can be const if char *d is replaced by
6838            more localised variables.  */
6839         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6840             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6841                 pl_yylval.ival = OP_STRINGIFY;
6842                 break;
6843             }
6844         }
6845         TERM(sublex_start());
6846
6847     case '`':
6848         s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6849         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6850         if (PL_expect == XOPERATOR)
6851             no_op("Backticks",s);
6852         if (!s)
6853             missingterm(NULL);
6854         readpipe_override();
6855         TERM(sublex_start());
6856
6857     case '\\':
6858         s++;
6859         if (PL_lex_inwhat && isDIGIT(*s))
6860             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6861                            *s, *s);
6862         if (PL_expect == XOPERATOR)
6863             no_op("Backslash",s);
6864         OPERATOR(REFGEN);
6865
6866     case 'v':
6867         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6868             char *start = s + 2;
6869             while (isDIGIT(*start) || *start == '_')
6870                 start++;
6871             if (*start == '.' && isDIGIT(start[1])) {
6872                 s = scan_num(s, &pl_yylval);
6873                 TERM(THING);
6874             }
6875             else if ((*start == ':' && start[1] == ':')
6876                   || (PL_expect == XSTATE && *start == ':'))
6877                 goto keylookup;
6878             else if (PL_expect == XSTATE) {
6879                 d = start;
6880                 while (d < PL_bufend && isSPACE(*d)) d++;
6881                 if (*d == ':') goto keylookup;
6882             }
6883             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6884             if (!isALPHA(*start) && (PL_expect == XTERM
6885                         || PL_expect == XREF || PL_expect == XSTATE
6886                         || PL_expect == XTERMORDORDOR)) {
6887                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6888                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6889                 if (!gv) {
6890                     s = scan_num(s, &pl_yylval);
6891                     TERM(THING);
6892                 }
6893             }
6894         }
6895         goto keylookup;
6896     case 'x':
6897         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6898             s++;
6899             Mop(OP_REPEAT);
6900         }
6901         goto keylookup;
6902
6903     case '_':
6904     case 'a': case 'A':
6905     case 'b': case 'B':
6906     case 'c': case 'C':
6907     case 'd': case 'D':
6908     case 'e': case 'E':
6909     case 'f': case 'F':
6910     case 'g': case 'G':
6911     case 'h': case 'H':
6912     case 'i': case 'I':
6913     case 'j': case 'J':
6914     case 'k': case 'K':
6915     case 'l': case 'L':
6916     case 'm': case 'M':
6917     case 'n': case 'N':
6918     case 'o': case 'O':
6919     case 'p': case 'P':
6920     case 'q': case 'Q':
6921     case 'r': case 'R':
6922     case 's': case 'S':
6923     case 't': case 'T':
6924     case 'u': case 'U':
6925               case 'V':
6926     case 'w': case 'W':
6927               case 'X':
6928     case 'y': case 'Y':
6929     case 'z': case 'Z':
6930
6931       keylookup: {
6932         bool anydelim;
6933         bool lex;
6934         I32 tmp;
6935         SV *sv;
6936         CV *cv;
6937         PADOFFSET off;
6938         OP *rv2cv_op;
6939
6940         lex = FALSE;
6941         orig_keyword = 0;
6942         off = 0;
6943         sv = NULL;
6944         cv = NULL;
6945         gv = NULL;
6946         gvp = NULL;
6947         rv2cv_op = NULL;
6948
6949         PL_bufptr = s;
6950         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6951
6952         /* Some keywords can be followed by any delimiter, including ':' */
6953         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6954
6955         /* x::* is just a word, unless x is "CORE" */
6956         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6957             goto just_a_word;
6958
6959         d = s;
6960         while (d < PL_bufend && isSPACE(*d))
6961                 d++;    /* no comments skipped here, or s### is misparsed */
6962
6963         /* Is this a word before a => operator? */
6964         if (*d == '=' && d[1] == '>') {
6965             CLINE;
6966             pl_yylval.opval
6967                 = (OP*)newSVOP(OP_CONST, 0,
6968                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6969             pl_yylval.opval->op_private = OPpCONST_BARE;
6970             TERM(WORD);
6971         }
6972
6973         /* Check for plugged-in keyword */
6974         {
6975             OP *o;
6976             int result;
6977             char *saved_bufptr = PL_bufptr;
6978             PL_bufptr = s;
6979             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6980             s = PL_bufptr;
6981             if (result == KEYWORD_PLUGIN_DECLINE) {
6982                 /* not a plugged-in keyword */
6983                 PL_bufptr = saved_bufptr;
6984             } else if (result == KEYWORD_PLUGIN_STMT) {
6985                 pl_yylval.opval = o;
6986                 CLINE;
6987                 PL_expect = XSTATE;
6988                 return REPORT(PLUGSTMT);
6989             } else if (result == KEYWORD_PLUGIN_EXPR) {
6990                 pl_yylval.opval = o;
6991                 CLINE;
6992                 PL_expect = XOPERATOR;
6993                 return REPORT(PLUGEXPR);
6994             } else {
6995                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6996                                         PL_tokenbuf);
6997             }
6998         }
6999
7000         /* Check for built-in keyword */
7001         tmp = keyword(PL_tokenbuf, len, 0);
7002
7003         /* Is this a label? */
7004         if (!anydelim && PL_expect == XSTATE
7005               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7006             s = d + 1;
7007             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7008             pl_yylval.pval[len] = '\0';
7009             pl_yylval.pval[len+1] = UTF ? 1 : 0;
7010             CLINE;
7011             TOKEN(LABEL);
7012         }
7013
7014         /* Check for lexical sub */
7015         if (PL_expect != XOPERATOR) {
7016             char tmpbuf[sizeof PL_tokenbuf + 1];
7017             *tmpbuf = '&';
7018             Copy(PL_tokenbuf, tmpbuf+1, len, char);
7019             off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7020             if (off != NOT_IN_PAD) {
7021                 assert(off); /* we assume this is boolean-true below */
7022                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7023                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
7024                     HEK * const stashname = HvNAME_HEK(stash);
7025                     sv = newSVhek(stashname);
7026                     sv_catpvs(sv, "::");
7027                     sv_catpvn_flags(sv, PL_tokenbuf, len,
7028                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
7029                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7030                                     SVt_PVCV);
7031                     off = 0;
7032                     if (!gv) {
7033                         sv_free(sv);
7034                         sv = NULL;
7035                         goto just_a_word;
7036                     }
7037                 }
7038                 else {
7039                     rv2cv_op = newOP(OP_PADANY, 0);
7040                     rv2cv_op->op_targ = off;
7041                     cv = find_lexical_cv(off);
7042                 }
7043                 lex = TRUE;
7044                 goto just_a_word;
7045             }
7046             off = 0;
7047         }
7048
7049         if (tmp < 0) {                  /* second-class keyword? */
7050             GV *ogv = NULL;     /* override (winner) */
7051             GV *hgv = NULL;     /* hidden (loser) */
7052             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7053                 CV *cv;
7054                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7055                                             UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
7056                     (cv = GvCVu(gv)))
7057                 {
7058                     if (GvIMPORTED_CV(gv))
7059                         ogv = gv;
7060                     else if (! CvMETHOD(cv))
7061                         hgv = gv;
7062                 }
7063                 if (!ogv &&
7064                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7065                                             UTF ? -(I32)len : (I32)len, FALSE)) &&
7066                     (gv = *gvp) && isGV_with_GP(gv) &&
7067                     GvCVu(gv) && GvIMPORTED_CV(gv))
7068                 {
7069                     ogv = gv;
7070                 }
7071             }
7072             if (ogv) {
7073                 orig_keyword = tmp;
7074                 tmp = 0;                /* overridden by import or by GLOBAL */
7075             }
7076             else if (gv && !gvp
7077                      && -tmp==KEY_lock  /* XXX generalizable kludge */
7078                      && GvCVu(gv))
7079             {
7080                 tmp = 0;                /* any sub overrides "weak" keyword */
7081             }
7082             else {                      /* no override */
7083                 tmp = -tmp;
7084                 if (tmp == KEY_dump) {
7085                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7086                                    "dump() better written as CORE::dump()");
7087                 }
7088                 gv = NULL;
7089                 gvp = 0;
7090                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
7091                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7092                                    "Ambiguous call resolved as CORE::%s(), "
7093                                    "qualify as such or use &",
7094                                    GvENAME(hgv));
7095             }
7096         }
7097
7098       reserved_word:
7099         switch (tmp) {
7100
7101         default:                        /* not a keyword */
7102             /* Trade off - by using this evil construction we can pull the
7103                variable gv into the block labelled keylookup. If not, then
7104                we have to give it function scope so that the goto from the
7105                earlier ':' case doesn't bypass the initialisation.  */
7106             if (0) {
7107             just_a_word_zero_gv:
7108                 sv = NULL;
7109                 cv = NULL;
7110                 gv = NULL;
7111                 gvp = NULL;
7112                 rv2cv_op = NULL;
7113                 orig_keyword = 0;
7114                 lex = 0;
7115                 off = 0;
7116             }
7117           just_a_word: {
7118                 int pkgname = 0;
7119                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7120                 const char penultchar =
7121                     lastchar && PL_bufptr - 2 >= PL_linestart
7122                          ? PL_bufptr[-2]
7123                          : 0;
7124 #ifdef PERL_MAD
7125                 SV *nextPL_nextwhite = 0;
7126 #endif
7127
7128
7129                 /* Get the rest if it looks like a package qualifier */
7130
7131                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7132                     STRLEN morelen;
7133                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7134                                   TRUE, &morelen);
7135                     if (!morelen)
7136                         Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7137                                 UTF8fARG(UTF, len, PL_tokenbuf),
7138                                 *s == '\'' ? "'" : "::");
7139                     len += morelen;
7140                     pkgname = 1;
7141                 }
7142
7143                 if (PL_expect == XOPERATOR) {
7144                     if (PL_bufptr == PL_linestart) {
7145                         CopLINE_dec(PL_curcop);
7146                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7147                         CopLINE_inc(PL_curcop);
7148                     }
7149                     else
7150                         no_op("Bareword",s);
7151                 }
7152
7153                 /* Look for a subroutine with this name in current package,
7154                    unless this is a lexical sub, or name is "Foo::",
7155                    in which case Foo is a bareword
7156                    (and a package name). */
7157
7158                 if (len > 2 && !PL_madskills &&
7159                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7160                 {
7161                     if (ckWARN(WARN_BAREWORD)
7162                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7163                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7164                           "Bareword \"%"UTF8f"\" refers to nonexistent package",
7165                            UTF8fARG(UTF, len, PL_tokenbuf));
7166                     len -= 2;
7167                     PL_tokenbuf[len] = '\0';
7168                     gv = NULL;
7169                     gvp = 0;
7170                 }
7171                 else {
7172                     if (!lex && !gv) {
7173                         /* Mustn't actually add anything to a symbol table.
7174                            But also don't want to "initialise" any placeholder
7175                            constants that might already be there into full
7176                            blown PVGVs with attached PVCV.  */
7177                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7178                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7179                                                SVt_PVCV);
7180                     }
7181                     len = 0;
7182                 }
7183
7184                 /* if we saw a global override before, get the right name */
7185
7186                 if (!sv)
7187                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7188                     len ? len : strlen(PL_tokenbuf));
7189                 if (gvp) {
7190                     SV * const tmp_sv = sv;
7191                     sv = newSVpvs("CORE::GLOBAL::");
7192                     sv_catsv(sv, tmp_sv);
7193                     SvREFCNT_dec(tmp_sv);
7194                 }
7195
7196 #ifdef PERL_MAD
7197                 if (PL_madskills && !PL_thistoken) {
7198                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7199                     PL_thistoken = newSVpvn(start,s - start);
7200                     PL_realtokenstart = s - SvPVX(PL_linestr);
7201                 }
7202 #endif
7203
7204                 /* Presume this is going to be a bareword of some sort. */
7205                 CLINE;
7206                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7207                 pl_yylval.opval->op_private = OPpCONST_BARE;
7208
7209                 /* And if "Foo::", then that's what it certainly is. */
7210                 if (len)
7211                     goto safe_bareword;
7212
7213                 if (!off)
7214                 {
7215                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7216                     const_op->op_private = OPpCONST_BARE;
7217                     rv2cv_op = newCVREF(0, const_op);
7218                     cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7219                 }
7220
7221                 /* See if it's the indirect object for a list operator. */
7222
7223                 if (PL_oldoldbufptr &&
7224                     PL_oldoldbufptr < PL_bufptr &&
7225                     (PL_oldoldbufptr == PL_last_lop
7226                      || PL_oldoldbufptr == PL_last_uni) &&
7227                     /* NO SKIPSPACE BEFORE HERE! */
7228                     (PL_expect == XREF ||
7229                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7230                 {
7231                     bool immediate_paren = *s == '(';
7232
7233                     /* (Now we can afford to cross potential line boundary.) */
7234                     s = SKIPSPACE2(s,nextPL_nextwhite);
7235 #ifdef PERL_MAD
7236                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
7237 #endif
7238
7239                     /* Two barewords in a row may indicate method call. */
7240
7241                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7242                         (tmp = intuit_method(s, gv, cv))) {
7243                         op_free(rv2cv_op);
7244                         if (tmp == METHOD && !PL_lex_allbrackets &&
7245                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7246                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7247                         return REPORT(tmp);
7248                     }
7249
7250                     /* If not a declared subroutine, it's an indirect object. */
7251                     /* (But it's an indir obj regardless for sort.) */
7252                     /* Also, if "_" follows a filetest operator, it's a bareword */
7253
7254                     if (
7255                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7256                          (!cv &&
7257                         (PL_last_lop_op != OP_MAPSTART &&
7258                          PL_last_lop_op != OP_GREPSTART))))
7259                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7260                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7261                        )
7262                     {
7263                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7264                         goto bareword;
7265                     }
7266                 }
7267
7268                 PL_expect = XOPERATOR;
7269 #ifdef PERL_MAD
7270                 if (isSPACE(*s))
7271                     s = SKIPSPACE2(s,nextPL_nextwhite);
7272                 PL_nextwhite = nextPL_nextwhite;
7273 #else
7274                 s = skipspace(s);
7275 #endif
7276
7277                 /* Is this a word before a => operator? */
7278                 if (*s == '=' && s[1] == '>' && !pkgname) {
7279                     op_free(rv2cv_op);
7280                     CLINE;
7281                     /* This is our own scalar, created a few lines above,
7282                        so this is safe. */
7283                     SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7284                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7285                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7286                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7287                     SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7288                     TERM(WORD);
7289                 }
7290
7291                 /* If followed by a paren, it's certainly a subroutine. */
7292                 if (*s == '(') {
7293                     CLINE;
7294                     if (cv) {
7295                         d = s + 1;
7296                         while (SPACE_OR_TAB(*d))
7297                             d++;
7298                         if (*d == ')' && (sv = cv_const_sv(cv))) {
7299                             s = d + 1;
7300                             goto its_constant;
7301                         }
7302                     }
7303 #ifdef PERL_MAD
7304                     if (PL_madskills) {
7305                         PL_nextwhite = PL_thiswhite;
7306                         PL_thiswhite = 0;
7307                     }
7308                     start_force(PL_curforce);
7309 #endif
7310                     NEXTVAL_NEXTTOKE.opval =
7311                         off ? rv2cv_op : pl_yylval.opval;
7312                     PL_expect = XOPERATOR;
7313 #ifdef PERL_MAD
7314                     if (PL_madskills) {
7315                         PL_nextwhite = nextPL_nextwhite;
7316                         curmad('X', PL_thistoken);
7317                         PL_thistoken = newSVpvs("");
7318                     }
7319 #endif
7320                     if (off)
7321                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7322                     else op_free(rv2cv_op),        force_next(WORD);
7323                     pl_yylval.ival = 0;
7324                     TOKEN('&');
7325                 }
7326
7327                 /* If followed by var or block, call it a method (unless sub) */
7328
7329                 if ((*s == '$' || *s == '{') && !cv) {
7330                     op_free(rv2cv_op);
7331                     PL_last_lop = PL_oldbufptr;
7332                     PL_last_lop_op = OP_METHOD;
7333                     if (!PL_lex_allbrackets &&
7334                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7335                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7336                     PREBLOCK(METHOD);
7337                 }
7338
7339                 /* If followed by a bareword, see if it looks like indir obj. */
7340
7341                 if (!orig_keyword
7342                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7343                         && (tmp = intuit_method(s, gv, cv))) {
7344                     op_free(rv2cv_op);
7345                     if (tmp == METHOD && !PL_lex_allbrackets &&
7346                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7347                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7348                     return REPORT(tmp);
7349                 }
7350
7351                 /* Not a method, so call it a subroutine (if defined) */
7352
7353                 if (cv) {
7354                     if (lastchar == '-' && penultchar != '-') {
7355                         const STRLEN l = len ? len : strlen(PL_tokenbuf);
7356                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7357                             "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7358                              UTF8fARG(UTF, l, PL_tokenbuf),
7359                              UTF8fARG(UTF, l, PL_tokenbuf));
7360                     }
7361                     /* Check for a constant sub */
7362                     if ((sv = cv_const_sv(cv))) {
7363                   its_constant:
7364                         op_free(rv2cv_op);
7365                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7366                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7367                         pl_yylval.opval->op_private = OPpCONST_FOLDED;
7368                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
7369                         TOKEN(WORD);
7370                     }
7371
7372                     op_free(pl_yylval.opval);
7373                     pl_yylval.opval =
7374                         off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7375                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7376                     PL_last_lop = PL_oldbufptr;
7377                     PL_last_lop_op = OP_ENTERSUB;
7378                     /* Is there a prototype? */
7379                     if (
7380 #ifdef PERL_MAD
7381                         cv &&
7382 #endif
7383                         SvPOK(cv))
7384                     {
7385                         STRLEN protolen = CvPROTOLEN(cv);
7386                         const char *proto = CvPROTO(cv);
7387                         bool optional;
7388                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7389                         if (!protolen)
7390                             TERM(FUNC0SUB);
7391                         if ((optional = *proto == ';'))
7392                           do
7393                             proto++;
7394                           while (*proto == ';');
7395                         if (
7396                             (
7397                                 (
7398                                     *proto == '$' || *proto == '_'
7399                                  || *proto == '*' || *proto == '+'
7400                                 )
7401                              && proto[1] == '\0'
7402                             )
7403                          || (
7404                              *proto == '\\' && proto[1] && proto[2] == '\0'
7405                             )
7406                         )
7407                             UNIPROTO(UNIOPSUB,optional);
7408                         if (*proto == '\\' && proto[1] == '[') {
7409                             const char *p = proto + 2;
7410                             while(*p && *p != ']')
7411                                 ++p;
7412                             if(*p == ']' && !p[1])
7413                                 UNIPROTO(UNIOPSUB,optional);
7414                         }
7415                         if (*proto == '&' && *s == '{') {
7416                             if (PL_curstash)
7417                                 sv_setpvs(PL_subname, "__ANON__");
7418                             else
7419                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7420                             if (!PL_lex_allbrackets &&
7421                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7422                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7423                             PREBLOCK(LSTOPSUB);
7424                         }
7425                     }
7426 #ifdef PERL_MAD
7427                     {
7428                         if (PL_madskills) {
7429                             PL_nextwhite = PL_thiswhite;
7430                             PL_thiswhite = 0;
7431                         }
7432                         start_force(PL_curforce);
7433                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7434                         PL_expect = XTERM;
7435                         if (PL_madskills) {
7436                             PL_nextwhite = nextPL_nextwhite;
7437                             curmad('X', PL_thistoken);
7438                             PL_thistoken = newSVpvs("");
7439                         }
7440                         force_next(off ? PRIVATEREF : WORD);
7441                         if (!PL_lex_allbrackets &&
7442                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7443                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7444                         TOKEN(NOAMP);
7445                     }
7446                 }
7447
7448                 /* Guess harder when madskills require "best effort". */
7449                 if (PL_madskills && (!gv || !GvCVu(gv))) {
7450                     int probable_sub = 0;
7451                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
7452                         probable_sub = 1;
7453                     else if (isALPHA(*s)) {
7454                         char tmpbuf[1024];
7455                         STRLEN tmplen;
7456                         d = s;
7457                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7458                         if (!keyword(tmpbuf, tmplen, 0))
7459                             probable_sub = 1;
7460                         else {
7461                             while (d < PL_bufend && isSPACE(*d))
7462                                 d++;
7463                             if (*d == '=' && d[1] == '>')
7464                                 probable_sub = 1;
7465                         }
7466                     }
7467                     if (probable_sub) {
7468                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7469                                         SVt_PVCV);
7470                         op_free(pl_yylval.opval);
7471                         pl_yylval.opval =
7472                             off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7473                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7474                         PL_last_lop = PL_oldbufptr;
7475                         PL_last_lop_op = OP_ENTERSUB;
7476                         PL_nextwhite = PL_thiswhite;
7477                         PL_thiswhite = 0;
7478                         start_force(PL_curforce);
7479                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7480                         PL_expect = XTERM;
7481                         PL_nextwhite = nextPL_nextwhite;
7482                         curmad('X', PL_thistoken);
7483                         PL_thistoken = newSVpvs("");
7484                         force_next(off ? PRIVATEREF : WORD);
7485                         if (!PL_lex_allbrackets &&
7486                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7487                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7488                         TOKEN(NOAMP);
7489                     }
7490 #else
7491                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7492                     PL_expect = XTERM;
7493                     force_next(off ? PRIVATEREF : WORD);
7494                     if (!PL_lex_allbrackets &&
7495                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7496                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7497                     TOKEN(NOAMP);
7498 #endif
7499                 }
7500
7501                 /* Call it a bare word */
7502
7503                 if (PL_hints & HINT_STRICT_SUBS)
7504                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7505                 else {
7506                 bareword:
7507                     /* after "print" and similar functions (corresponding to
7508                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7509                      * a filehandle should be subject to "strict subs".
7510                      * Likewise for the optional indirect-object argument to system
7511                      * or exec, which can't be a bareword */
7512                     if ((PL_last_lop_op == OP_PRINT
7513                             || PL_last_lop_op == OP_PRTF
7514                             || PL_last_lop_op == OP_SAY
7515                             || PL_last_lop_op == OP_SYSTEM
7516                             || PL_last_lop_op == OP_EXEC)
7517                             && (PL_hints & HINT_STRICT_SUBS))
7518                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7519                     if (lastchar != '-') {
7520                         if (ckWARN(WARN_RESERVED)) {
7521                             d = PL_tokenbuf;
7522                             while (isLOWER(*d))
7523                                 d++;
7524                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7525                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7526                                        PL_tokenbuf);
7527                         }
7528                     }
7529                 }
7530                 op_free(rv2cv_op);
7531
7532             safe_bareword:
7533                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7534                  && saw_infix_sigil) {
7535                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7536                                      "Operator or semicolon missing before %c%"UTF8f,
7537                                      lastchar,
7538                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7539                                               PL_tokenbuf));
7540                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7541                                      "Ambiguous use of %c resolved as operator %c",
7542                                      lastchar, lastchar);
7543                 }
7544                 TOKEN(WORD);
7545             }
7546
7547         case KEY___FILE__:
7548             FUN0OP(
7549                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7550             );
7551
7552         case KEY___LINE__:
7553             FUN0OP(
7554                 (OP*)newSVOP(OP_CONST, 0,
7555                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7556             );
7557
7558         case KEY___PACKAGE__:
7559             FUN0OP(
7560                 (OP*)newSVOP(OP_CONST, 0,
7561                                         (PL_curstash
7562                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7563                                          : &PL_sv_undef))
7564             );
7565
7566         case KEY___DATA__:
7567         case KEY___END__: {
7568             GV *gv;
7569             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7570                 const char *pname = "main";
7571                 STRLEN plen = 4;
7572                 U32 putf8 = 0;
7573                 if (PL_tokenbuf[2] == 'D')
7574                 {
7575                     HV * const stash =
7576                         PL_curstash ? PL_curstash : PL_defstash;
7577                     pname = HvNAME_get(stash);
7578                     plen  = HvNAMELEN (stash);
7579                     if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7580                 }
7581                 gv = gv_fetchpvn_flags(
7582                         Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7583                         plen+6, GV_ADD|putf8, SVt_PVIO
7584                 );
7585                 GvMULTI_on(gv);
7586                 if (!GvIO(gv))
7587                     GvIOp(gv) = newIO();
7588                 IoIFP(GvIOp(gv)) = PL_rsfp;
7589 #if defined(HAS_FCNTL) && defined(F_SETFD)
7590                 {
7591                     const int fd = PerlIO_fileno(PL_rsfp);
7592                     fcntl(fd,F_SETFD,fd >= 3);
7593                 }
7594 #endif
7595                 /* Mark this internal pseudo-handle as clean */
7596                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7597                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7598                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7599                 else
7600                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7601 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7602                 /* if the script was opened in binmode, we need to revert
7603                  * it to text mode for compatibility; but only iff it has CRs
7604                  * XXX this is a questionable hack at best. */
7605                 if (PL_bufend-PL_bufptr > 2
7606                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7607                 {
7608                     Off_t loc = 0;
7609                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7610                         loc = PerlIO_tell(PL_rsfp);
7611                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7612                     }
7613 #ifdef NETWARE
7614                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7615 #else
7616                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7617 #endif  /* NETWARE */
7618                         if (loc > 0)
7619                             PerlIO_seek(PL_rsfp, loc, 0);
7620                     }
7621                 }
7622 #endif
7623 #ifdef PERLIO_LAYERS
7624                 if (!IN_BYTES) {
7625                     if (UTF)
7626                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7627                     else if (PL_encoding) {
7628                         SV *name;
7629                         dSP;
7630                         ENTER;
7631                         SAVETMPS;
7632                         PUSHMARK(sp);
7633                         EXTEND(SP, 1);
7634                         XPUSHs(PL_encoding);
7635                         PUTBACK;
7636                         call_method("name", G_SCALAR);
7637                         SPAGAIN;
7638                         name = POPs;
7639                         PUTBACK;
7640                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7641                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7642                                                       SVfARG(name)));
7643                         FREETMPS;
7644                         LEAVE;
7645                     }
7646                 }
7647 #endif
7648 #ifdef PERL_MAD
7649                 if (PL_madskills) {
7650                     if (PL_realtokenstart >= 0) {
7651                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7652                         if (!PL_endwhite)
7653                             PL_endwhite = newSVpvs("");
7654                         sv_catsv(PL_endwhite, PL_thiswhite);
7655                         PL_thiswhite = 0;
7656                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7657                         PL_realtokenstart = -1;
7658                     }
7659                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7660                            != NULL) ;
7661                 }
7662 #endif
7663                 PL_rsfp = NULL;
7664             }
7665             goto fake_eof;
7666         }
7667
7668         case KEY___SUB__:
7669             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7670
7671         case KEY_AUTOLOAD:
7672         case KEY_DESTROY:
7673         case KEY_BEGIN:
7674         case KEY_UNITCHECK:
7675         case KEY_CHECK:
7676         case KEY_INIT:
7677         case KEY_END:
7678             if (PL_expect == XSTATE) {
7679                 s = PL_bufptr;
7680                 goto really_sub;
7681             }
7682             goto just_a_word;
7683
7684         case KEY_CORE:
7685             if (*s == ':' && s[1] == ':') {
7686                 STRLEN olen = len;
7687                 d = s;
7688                 s += 2;
7689                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7690                 if ((*s == ':' && s[1] == ':')
7691                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7692                 {
7693                     s = d;
7694                     len = olen;
7695                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7696                     goto just_a_word;
7697                 }
7698                 if (!tmp)
7699                     Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7700                                       UTF8fARG(UTF, len, PL_tokenbuf));
7701                 if (tmp < 0)
7702                     tmp = -tmp;
7703                 else if (tmp == KEY_require || tmp == KEY_do
7704                       || tmp == KEY_glob)
7705                     /* that's a way to remember we saw "CORE::" */
7706                     orig_keyword = tmp;
7707                 goto reserved_word;
7708             }
7709             goto just_a_word;
7710
7711         case KEY_abs:
7712             UNI(OP_ABS);
7713
7714         case KEY_alarm:
7715             UNI(OP_ALARM);
7716
7717         case KEY_accept:
7718             LOP(OP_ACCEPT,XTERM);
7719
7720         case KEY_and:
7721             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7722                 return REPORT(0);
7723             OPERATOR(ANDOP);
7724
7725         case KEY_atan2:
7726             LOP(OP_ATAN2,XTERM);
7727
7728         case KEY_bind:
7729             LOP(OP_BIND,XTERM);
7730
7731         case KEY_binmode:
7732             LOP(OP_BINMODE,XTERM);
7733
7734         case KEY_bless:
7735             LOP(OP_BLESS,XTERM);
7736
7737         case KEY_break:
7738             FUN0(OP_BREAK);
7739
7740         case KEY_chop:
7741             UNI(OP_CHOP);
7742
7743         case KEY_continue:
7744                     /* We have to disambiguate the two senses of
7745                       "continue". If the next token is a '{' then
7746                       treat it as the start of a continue block;
7747                       otherwise treat it as a control operator.
7748                      */
7749                     s = skipspace(s);
7750                     if (*s == '{')
7751             PREBLOCK(CONTINUE);
7752                     else
7753                         FUN0(OP_CONTINUE);
7754
7755         case KEY_chdir:
7756             /* may use HOME */
7757             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7758             UNI(OP_CHDIR);
7759
7760         case KEY_close:
7761             UNI(OP_CLOSE);
7762
7763         case KEY_closedir:
7764             UNI(OP_CLOSEDIR);
7765
7766         case KEY_cmp:
7767             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7768                 return REPORT(0);
7769             Eop(OP_SCMP);
7770
7771         case KEY_caller:
7772             UNI(OP_CALLER);
7773
7774         case KEY_crypt:
7775 #ifdef FCRYPT
7776             if (!PL_cryptseen) {
7777                 PL_cryptseen = TRUE;
7778                 init_des();
7779             }
7780 #endif
7781             LOP(OP_CRYPT,XTERM);
7782
7783         case KEY_chmod:
7784             LOP(OP_CHMOD,XTERM);
7785
7786         case KEY_chown:
7787             LOP(OP_CHOWN,XTERM);
7788
7789         case KEY_connect:
7790             LOP(OP_CONNECT,XTERM);
7791
7792         case KEY_chr:
7793             UNI(OP_CHR);
7794
7795         case KEY_cos:
7796             UNI(OP_COS);
7797
7798         case KEY_chroot:
7799             UNI(OP_CHROOT);
7800
7801         case KEY_default:
7802             PREBLOCK(DEFAULT);
7803
7804         case KEY_do:
7805             s = SKIPSPACE1(s);
7806             if (*s == '{')
7807                 PRETERMBLOCK(DO);
7808             if (*s != '\'') {
7809                 *PL_tokenbuf = '&';
7810                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7811                               1, &len);
7812                 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7813                     d = SKIPSPACE1(d);
7814                     if (*d == '(') {
7815                         force_ident_maybe_lex('&');
7816                         s = d;
7817                     }
7818                 }
7819             }
7820             if (orig_keyword == KEY_do) {
7821                 orig_keyword = 0;
7822                 pl_yylval.ival = 1;
7823             }
7824             else
7825                 pl_yylval.ival = 0;
7826             OPERATOR(DO);
7827
7828         case KEY_die:
7829             PL_hints |= HINT_BLOCK_SCOPE;
7830             LOP(OP_DIE,XTERM);
7831
7832         case KEY_defined:
7833             UNI(OP_DEFINED);
7834
7835         case KEY_delete:
7836             UNI(OP_DELETE);
7837
7838         case KEY_dbmopen:
7839             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7840                               STR_WITH_LEN("NDBM_File::"),
7841                               STR_WITH_LEN("DB_File::"),
7842                               STR_WITH_LEN("GDBM_File::"),
7843                               STR_WITH_LEN("SDBM_File::"),
7844                               STR_WITH_LEN("ODBM_File::"),
7845                               NULL);
7846             LOP(OP_DBMOPEN,XTERM);
7847
7848         case KEY_dbmclose:
7849             UNI(OP_DBMCLOSE);
7850
7851         case KEY_dump:
7852             PL_expect = XOPERATOR;
7853             s = force_word(s,WORD,TRUE,FALSE);
7854             LOOPX(OP_DUMP);
7855
7856         case KEY_else:
7857             PREBLOCK(ELSE);
7858
7859         case KEY_elsif:
7860             pl_yylval.ival = CopLINE(PL_curcop);
7861             OPERATOR(ELSIF);
7862
7863         case KEY_eq:
7864             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7865                 return REPORT(0);
7866             Eop(OP_SEQ);
7867
7868         case KEY_exists:
7869             UNI(OP_EXISTS);
7870
7871         case KEY_exit:
7872             if (PL_madskills)
7873                 UNI(OP_INT);
7874             UNI(OP_EXIT);
7875
7876         case KEY_eval:
7877             s = SKIPSPACE1(s);
7878             if (*s == '{') { /* block eval */
7879                 PL_expect = XTERMBLOCK;
7880                 UNIBRACK(OP_ENTERTRY);
7881             }
7882             else { /* string eval */
7883                 PL_expect = XTERM;
7884                 UNIBRACK(OP_ENTEREVAL);
7885             }
7886
7887         case KEY_evalbytes:
7888             PL_expect = XTERM;
7889             UNIBRACK(-OP_ENTEREVAL);
7890
7891         case KEY_eof:
7892             UNI(OP_EOF);
7893
7894         case KEY_exp:
7895             UNI(OP_EXP);
7896
7897         case KEY_each:
7898             UNI(OP_EACH);
7899
7900         case KEY_exec:
7901             LOP(OP_EXEC,XREF);
7902
7903         case KEY_endhostent:
7904             FUN0(OP_EHOSTENT);
7905
7906         case KEY_endnetent:
7907             FUN0(OP_ENETENT);
7908
7909         case KEY_endservent:
7910             FUN0(OP_ESERVENT);
7911
7912         case KEY_endprotoent:
7913             FUN0(OP_EPROTOENT);
7914
7915         case KEY_endpwent:
7916             FUN0(OP_EPWENT);
7917
7918         case KEY_endgrent:
7919             FUN0(OP_EGRENT);
7920
7921         case KEY_for:
7922         case KEY_foreach:
7923             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7924                 return REPORT(0);
7925             pl_yylval.ival = CopLINE(PL_curcop);
7926             s = SKIPSPACE1(s);
7927             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7928                 char *p = s;
7929 #ifdef PERL_MAD
7930                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7931 #endif
7932
7933                 if ((PL_bufend - p) >= 3 &&
7934                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7935                     p += 2;
7936                 else if ((PL_bufend - p) >= 4 &&
7937                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7938                     p += 3;
7939                 p = PEEKSPACE(p);
7940                 if (isIDFIRST_lazy_if(p,UTF)) {
7941                     p = scan_ident(p, PL_bufend,
7942                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7943                     p = PEEKSPACE(p);
7944                 }
7945                 if (*p != '$')
7946                     Perl_croak(aTHX_ "Missing $ on loop variable");
7947 #ifdef PERL_MAD
7948                 s = SvPVX(PL_linestr) + soff;
7949 #endif
7950             }
7951             OPERATOR(FOR);
7952
7953         case KEY_formline:
7954             LOP(OP_FORMLINE,XTERM);
7955
7956         case KEY_fork:
7957             FUN0(OP_FORK);
7958
7959         case KEY_fc:
7960             UNI(OP_FC);
7961
7962         case KEY_fcntl:
7963             LOP(OP_FCNTL,XTERM);
7964
7965         case KEY_fileno:
7966             UNI(OP_FILENO);
7967
7968         case KEY_flock:
7969             LOP(OP_FLOCK,XTERM);
7970
7971         case KEY_gt:
7972             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7973                 return REPORT(0);
7974             Rop(OP_SGT);
7975
7976         case KEY_ge:
7977             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7978                 return REPORT(0);
7979             Rop(OP_SGE);
7980
7981         case KEY_grep:
7982             LOP(OP_GREPSTART, XREF);
7983
7984         case KEY_goto:
7985             PL_expect = XOPERATOR;
7986             s = force_word(s,WORD,TRUE,FALSE);
7987             LOOPX(OP_GOTO);
7988
7989         case KEY_gmtime:
7990             UNI(OP_GMTIME);
7991
7992         case KEY_getc:
7993             UNIDOR(OP_GETC);
7994
7995         case KEY_getppid:
7996             FUN0(OP_GETPPID);
7997
7998         case KEY_getpgrp:
7999             UNI(OP_GETPGRP);
8000
8001         case KEY_getpriority:
8002             LOP(OP_GETPRIORITY,XTERM);
8003
8004         case KEY_getprotobyname:
8005             UNI(OP_GPBYNAME);
8006
8007         case KEY_getprotobynumber:
8008             LOP(OP_GPBYNUMBER,XTERM);
8009
8010         case KEY_getprotoent:
8011             FUN0(OP_GPROTOENT);
8012
8013         case KEY_getpwent:
8014             FUN0(OP_GPWENT);
8015
8016         case KEY_getpwnam:
8017             UNI(OP_GPWNAM);
8018
8019         case KEY_getpwuid:
8020             UNI(OP_GPWUID);
8021
8022         case KEY_getpeername:
8023             UNI(OP_GETPEERNAME);
8024
8025         case KEY_gethostbyname:
8026             UNI(OP_GHBYNAME);
8027
8028         case KEY_gethostbyaddr:
8029             LOP(OP_GHBYADDR,XTERM);
8030
8031         case KEY_gethostent:
8032             FUN0(OP_GHOSTENT);
8033
8034         case KEY_getnetbyname:
8035             UNI(OP_GNBYNAME);
8036
8037         case KEY_getnetbyaddr:
8038             LOP(OP_GNBYADDR,XTERM);
8039
8040         case KEY_getnetent:
8041             FUN0(OP_GNETENT);
8042
8043         case KEY_getservbyname:
8044             LOP(OP_GSBYNAME,XTERM);
8045
8046         case KEY_getservbyport:
8047             LOP(OP_GSBYPORT,XTERM);
8048
8049         case KEY_getservent:
8050             FUN0(OP_GSERVENT);
8051
8052         case KEY_getsockname:
8053             UNI(OP_GETSOCKNAME);
8054
8055         case KEY_getsockopt:
8056             LOP(OP_GSOCKOPT,XTERM);
8057
8058         case KEY_getgrent:
8059             FUN0(OP_GGRENT);
8060
8061         case KEY_getgrnam:
8062             UNI(OP_GGRNAM);
8063
8064         case KEY_getgrgid:
8065             UNI(OP_GGRGID);
8066
8067         case KEY_getlogin:
8068             FUN0(OP_GETLOGIN);
8069
8070         case KEY_given:
8071             pl_yylval.ival = CopLINE(PL_curcop);
8072             Perl_ck_warner_d(aTHX_
8073                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8074                 "given is experimental");
8075             OPERATOR(GIVEN);
8076
8077         case KEY_glob:
8078             LOP(
8079              orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
8080              XTERM
8081             );
8082
8083         case KEY_hex:
8084             UNI(OP_HEX);
8085
8086         case KEY_if:
8087             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8088                 return REPORT(0);
8089             pl_yylval.ival = CopLINE(PL_curcop);
8090             OPERATOR(IF);
8091
8092         case KEY_index:
8093             LOP(OP_INDEX,XTERM);
8094
8095         case KEY_int:
8096             UNI(OP_INT);
8097
8098         case KEY_ioctl:
8099             LOP(OP_IOCTL,XTERM);
8100
8101         case KEY_join:
8102             LOP(OP_JOIN,XTERM);
8103
8104         case KEY_keys:
8105             UNI(OP_KEYS);
8106
8107         case KEY_kill:
8108             LOP(OP_KILL,XTERM);
8109
8110         case KEY_last:
8111             PL_expect = XOPERATOR;
8112             s = force_word(s,WORD,TRUE,FALSE);
8113             LOOPX(OP_LAST);
8114
8115         case KEY_lc:
8116             UNI(OP_LC);
8117
8118         case KEY_lcfirst:
8119             UNI(OP_LCFIRST);
8120
8121         case KEY_local:
8122             pl_yylval.ival = 0;
8123             OPERATOR(LOCAL);
8124
8125         case KEY_length:
8126             UNI(OP_LENGTH);
8127
8128         case KEY_lt:
8129             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8130                 return REPORT(0);
8131             Rop(OP_SLT);
8132
8133         case KEY_le:
8134             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8135                 return REPORT(0);
8136             Rop(OP_SLE);
8137
8138         case KEY_localtime:
8139             UNI(OP_LOCALTIME);
8140
8141         case KEY_log:
8142             UNI(OP_LOG);
8143
8144         case KEY_link:
8145             LOP(OP_LINK,XTERM);
8146
8147         case KEY_listen:
8148             LOP(OP_LISTEN,XTERM);
8149
8150         case KEY_lock:
8151             UNI(OP_LOCK);
8152
8153         case KEY_lstat:
8154             UNI(OP_LSTAT);
8155
8156         case KEY_m:
8157             s = scan_pat(s,OP_MATCH);
8158             TERM(sublex_start());
8159
8160         case KEY_map:
8161             LOP(OP_MAPSTART, XREF);
8162
8163         case KEY_mkdir:
8164             LOP(OP_MKDIR,XTERM);
8165
8166         case KEY_msgctl:
8167             LOP(OP_MSGCTL,XTERM);
8168
8169         case KEY_msgget:
8170             LOP(OP_MSGGET,XTERM);
8171
8172         case KEY_msgrcv:
8173             LOP(OP_MSGRCV,XTERM);
8174
8175         case KEY_msgsnd:
8176             LOP(OP_MSGSND,XTERM);
8177
8178         case KEY_our:
8179         case KEY_my:
8180         case KEY_state:
8181             PL_in_my = (U16)tmp;
8182             s = SKIPSPACE1(s);
8183             if (isIDFIRST_lazy_if(s,UTF)) {
8184 #ifdef PERL_MAD
8185                 char* start = s;
8186 #endif
8187                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8188                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8189                 {
8190                     if (!FEATURE_LEXSUBS_IS_ENABLED)
8191                         Perl_croak(aTHX_
8192                                   "Experimental \"%s\" subs not enabled",
8193                                    tmp == KEY_my    ? "my"    :
8194                                    tmp == KEY_state ? "state" : "our");
8195                     Perl_ck_warner_d(aTHX_
8196                         packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8197                         "The lexical_subs feature is experimental");
8198                     goto really_sub;
8199                 }
8200                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8201                 if (!PL_in_my_stash) {
8202                     char tmpbuf[1024];
8203                     PL_bufptr = s;
8204                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8205                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8206                 }
8207 #ifdef PERL_MAD
8208                 if (PL_madskills) {     /* just add type to declarator token */
8209                     sv_catsv(PL_thistoken, PL_nextwhite);
8210                     PL_nextwhite = 0;
8211                     sv_catpvn(PL_thistoken, start, s - start);
8212                 }
8213 #endif
8214             }
8215             pl_yylval.ival = 1;
8216             OPERATOR(MY);
8217
8218         case KEY_next:
8219             PL_expect = XOPERATOR;
8220             s = force_word(s,WORD,TRUE,FALSE);
8221             LOOPX(OP_NEXT);
8222
8223         case KEY_ne:
8224             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8225                 return REPORT(0);
8226             Eop(OP_SNE);
8227
8228         case KEY_no:
8229             s = tokenize_use(0, s);
8230             TERM(USE);
8231
8232         case KEY_not:
8233             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8234                 FUN1(OP_NOT);
8235             else {
8236                 if (!PL_lex_allbrackets &&
8237                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8238                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8239                 OPERATOR(NOTOP);
8240             }
8241
8242         case KEY_open:
8243             s = SKIPSPACE1(s);
8244             if (isIDFIRST_lazy_if(s,UTF)) {
8245           const char *t;
8246           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8247               &len);
8248                 for (t=d; isSPACE(*t);)
8249                     t++;
8250                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8251                     /* [perl #16184] */
8252                     && !(t[0] == '=' && t[1] == '>')
8253                     && !(t[0] == ':' && t[1] == ':')
8254                     && !keyword(s, d-s, 0)
8255                 ) {
8256                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8257                        "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8258                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8259                 }
8260             }
8261             LOP(OP_OPEN,XTERM);
8262
8263         case KEY_or:
8264             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8265                 return REPORT(0);
8266             pl_yylval.ival = OP_OR;
8267             OPERATOR(OROP);
8268
8269         case KEY_ord:
8270             UNI(OP_ORD);
8271
8272         case KEY_oct:
8273             UNI(OP_OCT);
8274
8275         case KEY_opendir:
8276             LOP(OP_OPEN_DIR,XTERM);
8277
8278         case KEY_print:
8279             checkcomma(s,PL_tokenbuf,"filehandle");
8280             LOP(OP_PRINT,XREF);
8281
8282         case KEY_printf:
8283             checkcomma(s,PL_tokenbuf,"filehandle");
8284             LOP(OP_PRTF,XREF);
8285
8286         case KEY_prototype:
8287             UNI(OP_PROTOTYPE);
8288
8289         case KEY_push:
8290             LOP(OP_PUSH,XTERM);
8291
8292         case KEY_pop:
8293             UNIDOR(OP_POP);
8294
8295         case KEY_pos:
8296             UNIDOR(OP_POS);
8297
8298         case KEY_pack:
8299             LOP(OP_PACK,XTERM);
8300
8301         case KEY_package:
8302             s = force_word(s,WORD,FALSE,TRUE);
8303             s = SKIPSPACE1(s);
8304             s = force_strict_version(s);
8305             PL_lex_expect = XBLOCK;
8306             OPERATOR(PACKAGE);
8307
8308         case KEY_pipe:
8309             LOP(OP_PIPE_OP,XTERM);
8310
8311         case KEY_q:
8312             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8313             if (!s)
8314                 missingterm(NULL);
8315             pl_yylval.ival = OP_CONST;
8316             TERM(sublex_start());
8317
8318         case KEY_quotemeta:
8319             UNI(OP_QUOTEMETA);
8320
8321         case KEY_qw: {
8322             OP *words = NULL;
8323             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8324             if (!s)
8325                 missingterm(NULL);
8326             PL_expect = XOPERATOR;
8327             if (SvCUR(PL_lex_stuff)) {
8328                 int warned_comma = !ckWARN(WARN_QW);
8329                 int warned_comment = warned_comma;
8330                 d = SvPV_force(PL_lex_stuff, len);
8331                 while (len) {
8332                     for (; isSPACE(*d) && len; --len, ++d)
8333                         /**/;
8334                     if (len) {
8335                         SV *sv;
8336                         const char *b = d;
8337                         if (!warned_comma || !warned_comment) {
8338                             for (; !isSPACE(*d) && len; --len, ++d) {
8339                                 if (!warned_comma && *d == ',') {
8340                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8341                                         "Possible attempt to separate words with commas");
8342                                     ++warned_comma;
8343                                 }
8344                                 else if (!warned_comment && *d == '#') {
8345                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8346                                         "Possible attempt to put comments in qw() list");
8347                                     ++warned_comment;
8348                                 }
8349                             }
8350                         }
8351                         else {
8352                             for (; !isSPACE(*d) && len; --len, ++d)
8353                                 /**/;
8354                         }
8355                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8356                         words = op_append_elem(OP_LIST, words,
8357                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8358                     }
8359                 }
8360             }
8361             if (!words)
8362                 words = newNULLLIST();
8363             if (PL_lex_stuff) {
8364                 SvREFCNT_dec(PL_lex_stuff);
8365                 PL_lex_stuff = NULL;
8366             }
8367             PL_expect = XOPERATOR;
8368             pl_yylval.opval = sawparens(words);
8369             TOKEN(QWLIST);
8370         }
8371
8372         case KEY_qq:
8373             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8374             if (!s)
8375                 missingterm(NULL);
8376             pl_yylval.ival = OP_STRINGIFY;
8377             if (SvIVX(PL_lex_stuff) == '\'')
8378                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8379             TERM(sublex_start());
8380
8381         case KEY_qr:
8382             s = scan_pat(s,OP_QR);
8383             TERM(sublex_start());
8384
8385         case KEY_qx:
8386             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8387             if (!s)
8388                 missingterm(NULL);
8389             readpipe_override();
8390             TERM(sublex_start());
8391
8392         case KEY_return:
8393             OLDLOP(OP_RETURN);
8394
8395         case KEY_require:
8396             s = SKIPSPACE1(s);
8397             PL_expect = XOPERATOR;
8398             if (isDIGIT(*s)) {
8399                 s = force_version(s, FALSE);
8400             }
8401             else if (*s != 'v' || !isDIGIT(s[1])
8402                     || (s = force_version(s, TRUE), *s == 'v'))
8403             {
8404                 *PL_tokenbuf = '\0';
8405                 s = force_word(s,WORD,TRUE,TRUE);
8406                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8407                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8408                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8409                 else if (*s == '<')
8410                     yyerror("<> should be quotes");
8411             }
8412             if (orig_keyword == KEY_require) {
8413                 orig_keyword = 0;
8414                 pl_yylval.ival = 1;
8415             }
8416             else
8417                 pl_yylval.ival = 0;
8418             PL_expect = XTERM;
8419             PL_bufptr = s;
8420             PL_last_uni = PL_oldbufptr;
8421             PL_last_lop_op = OP_REQUIRE;
8422             s = skipspace(s);
8423             return REPORT( (int)REQUIRE );
8424
8425         case KEY_reset:
8426             UNI(OP_RESET);
8427
8428         case KEY_redo:
8429             PL_expect = XOPERATOR;
8430             s = force_word(s,WORD,TRUE,FALSE);
8431             LOOPX(OP_REDO);
8432
8433         case KEY_rename:
8434             LOP(OP_RENAME,XTERM);
8435
8436         case KEY_rand:
8437             UNI(OP_RAND);
8438
8439         case KEY_rmdir:
8440             UNI(OP_RMDIR);
8441
8442         case KEY_rindex:
8443             LOP(OP_RINDEX,XTERM);
8444
8445         case KEY_read:
8446             LOP(OP_READ,XTERM);
8447
8448         case KEY_readdir:
8449             UNI(OP_READDIR);
8450
8451         case KEY_readline:
8452             UNIDOR(OP_READLINE);
8453
8454         case KEY_readpipe:
8455             UNIDOR(OP_BACKTICK);
8456
8457         case KEY_rewinddir:
8458             UNI(OP_REWINDDIR);
8459
8460         case KEY_recv:
8461             LOP(OP_RECV,XTERM);
8462
8463         case KEY_reverse:
8464             LOP(OP_REVERSE,XTERM);
8465
8466         case KEY_readlink:
8467             UNIDOR(OP_READLINK);
8468
8469         case KEY_ref:
8470             UNI(OP_REF);
8471
8472         case KEY_s:
8473             s = scan_subst(s);
8474             if (pl_yylval.opval)
8475                 TERM(sublex_start());
8476             else
8477                 TOKEN(1);       /* force error */
8478
8479         case KEY_say:
8480             checkcomma(s,PL_tokenbuf,"filehandle");
8481             LOP(OP_SAY,XREF);
8482
8483         case KEY_chomp:
8484             UNI(OP_CHOMP);
8485
8486         case KEY_scalar:
8487             UNI(OP_SCALAR);
8488
8489         case KEY_select:
8490             LOP(OP_SELECT,XTERM);
8491
8492         case KEY_seek:
8493             LOP(OP_SEEK,XTERM);
8494
8495         case KEY_semctl:
8496             LOP(OP_SEMCTL,XTERM);
8497
8498         case KEY_semget:
8499             LOP(OP_SEMGET,XTERM);
8500
8501         case KEY_semop:
8502             LOP(OP_SEMOP,XTERM);
8503
8504         case KEY_send:
8505             LOP(OP_SEND,XTERM);
8506
8507         case KEY_setpgrp:
8508             LOP(OP_SETPGRP,XTERM);
8509
8510         case KEY_setpriority:
8511             LOP(OP_SETPRIORITY,XTERM);
8512
8513         case KEY_sethostent:
8514             UNI(OP_SHOSTENT);
8515
8516         case KEY_setnetent:
8517             UNI(OP_SNETENT);
8518
8519         case KEY_setservent:
8520             UNI(OP_SSERVENT);
8521
8522         case KEY_setprotoent:
8523             UNI(OP_SPROTOENT);
8524
8525         case KEY_setpwent:
8526             FUN0(OP_SPWENT);
8527
8528         case KEY_setgrent:
8529             FUN0(OP_SGRENT);
8530
8531         case KEY_seekdir:
8532             LOP(OP_SEEKDIR,XTERM);
8533
8534         case KEY_setsockopt:
8535             LOP(OP_SSOCKOPT,XTERM);
8536
8537         case KEY_shift:
8538             UNIDOR(OP_SHIFT);
8539
8540         case KEY_shmctl:
8541             LOP(OP_SHMCTL,XTERM);
8542
8543         case KEY_shmget:
8544             LOP(OP_SHMGET,XTERM);
8545
8546         case KEY_shmread:
8547             LOP(OP_SHMREAD,XTERM);
8548
8549         case KEY_shmwrite:
8550             LOP(OP_SHMWRITE,XTERM);
8551
8552         case KEY_shutdown:
8553             LOP(OP_SHUTDOWN,XTERM);
8554
8555         case KEY_sin:
8556             UNI(OP_SIN);
8557
8558         case KEY_sleep:
8559             UNI(OP_SLEEP);
8560
8561         case KEY_socket:
8562             LOP(OP_SOCKET,XTERM);
8563
8564         case KEY_socketpair:
8565             LOP(OP_SOCKPAIR,XTERM);
8566
8567         case KEY_sort:
8568             checkcomma(s,PL_tokenbuf,"subroutine name");
8569             s = SKIPSPACE1(s);
8570             PL_expect = XTERM;
8571             s = force_word(s,WORD,TRUE,TRUE);
8572             LOP(OP_SORT,XREF);
8573
8574         case KEY_split:
8575             LOP(OP_SPLIT,XTERM);
8576
8577         case KEY_sprintf:
8578             LOP(OP_SPRINTF,XTERM);
8579
8580         case KEY_splice:
8581             LOP(OP_SPLICE,XTERM);
8582
8583         case KEY_sqrt:
8584             UNI(OP_SQRT);
8585
8586         case KEY_srand:
8587             UNI(OP_SRAND);
8588
8589         case KEY_stat:
8590             UNI(OP_STAT);
8591
8592         case KEY_study:
8593             UNI(OP_STUDY);
8594
8595         case KEY_substr:
8596             LOP(OP_SUBSTR,XTERM);
8597
8598         case KEY_format:
8599         case KEY_sub:
8600           really_sub:
8601             {
8602                 char * const tmpbuf = PL_tokenbuf + 1;
8603                 expectation attrful;
8604                 bool have_name, have_proto;
8605                 const int key = tmp;
8606 #ifndef PERL_MAD
8607                 SV *format_name = NULL;
8608 #endif
8609
8610 #ifdef PERL_MAD
8611                 SV *tmpwhite = 0;
8612
8613                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8614                 SV *subtoken = PL_madskills
8615                    ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8616                    : NULL;
8617                 PL_thistoken = 0;
8618
8619                 d = s;
8620                 s = SKIPSPACE2(s,tmpwhite);
8621 #else
8622                 d = s;
8623                 s = skipspace(s);
8624 #endif
8625
8626                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8627                     (*s == ':' && s[1] == ':'))
8628                 {
8629 #ifdef PERL_MAD
8630                     SV *nametoke = NULL;
8631 #endif
8632
8633                     PL_expect = XBLOCK;
8634                     attrful = XATTRBLOCK;
8635                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8636                                   &len);
8637 #ifdef PERL_MAD
8638                     if (PL_madskills)
8639                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8640 #else
8641                     if (key == KEY_format)
8642                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8643 #endif
8644                     *PL_tokenbuf = '&';
8645                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8646                      || pad_findmy_pvn(
8647                             PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8648                         ) != NOT_IN_PAD)
8649                         sv_setpvn(PL_subname, tmpbuf, len);
8650                     else {
8651                         sv_setsv(PL_subname,PL_curstname);
8652                         sv_catpvs(PL_subname,"::");
8653                         sv_catpvn(PL_subname,tmpbuf,len);
8654                     }
8655                     if (SvUTF8(PL_linestr))
8656                         SvUTF8_on(PL_subname);
8657                     have_name = TRUE;
8658
8659
8660 #ifdef PERL_MAD
8661                     start_force(0);
8662                     CURMAD('X', nametoke);
8663                     CURMAD('_', tmpwhite);
8664                     force_ident_maybe_lex('&');
8665
8666                     s = SKIPSPACE2(d,tmpwhite);
8667 #else
8668                     s = skipspace(d);
8669 #endif
8670                 }
8671                 else {
8672                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8673                     {
8674                         *d = '\0';
8675                         /* diag_listed_as: Missing name in "%s sub" */
8676                         Perl_croak(aTHX_
8677                                   "Missing name in \"%s\"", PL_bufptr);
8678                     }
8679                     PL_expect = XTERMBLOCK;
8680                     attrful = XATTRTERM;
8681                     sv_setpvs(PL_subname,"?");
8682                     have_name = FALSE;
8683                 }
8684
8685                 if (key == KEY_format) {
8686 #ifdef PERL_MAD
8687                     PL_thistoken = subtoken;
8688                     s = d;
8689 #else
8690                     if (format_name) {
8691                         start_force(PL_curforce);
8692                         NEXTVAL_NEXTTOKE.opval
8693                             = (OP*)newSVOP(OP_CONST,0, format_name);
8694                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8695                         force_next(WORD);
8696                     }
8697 #endif
8698                     PREBLOCK(FORMAT);
8699                 }
8700
8701                 /* Look for a prototype */
8702                 if (*s == '(') {
8703                     s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8704                     if (!s)
8705                         Perl_croak(aTHX_ "Prototype not terminated");
8706                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8707                     have_proto = TRUE;
8708
8709 #ifdef PERL_MAD
8710                     start_force(0);
8711                     CURMAD('q', PL_thisopen);
8712                     CURMAD('_', tmpwhite);
8713                     CURMAD('=', PL_thisstuff);
8714                     CURMAD('Q', PL_thisclose);
8715                     NEXTVAL_NEXTTOKE.opval =
8716                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8717                     PL_lex_stuff = NULL;
8718                     force_next(THING);
8719
8720                     s = SKIPSPACE2(s,tmpwhite);
8721 #else
8722                     s = skipspace(s);
8723 #endif
8724                 }
8725                 else
8726                     have_proto = FALSE;
8727
8728                 if (*s == ':' && s[1] != ':')
8729                     PL_expect = attrful;
8730                 else if (*s != '{' && key == KEY_sub) {
8731                     if (!have_name)
8732                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8733                     else if (*s != ';' && *s != '}')
8734                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8735                 }
8736
8737 #ifdef PERL_MAD
8738                 start_force(0);
8739                 if (tmpwhite) {
8740                     if (PL_madskills)
8741                         curmad('^', newSVpvs(""));
8742                     CURMAD('_', tmpwhite);
8743                 }
8744                 force_next(0);
8745
8746                 PL_thistoken = subtoken;
8747                 PERL_UNUSED_VAR(have_proto);
8748 #else
8749                 if (have_proto) {
8750                     NEXTVAL_NEXTTOKE.opval =
8751                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8752                     PL_lex_stuff = NULL;
8753                     force_next(THING);
8754                 }
8755 #endif
8756                 if (!have_name) {
8757                     if (PL_curstash)
8758                         sv_setpvs(PL_subname, "__ANON__");
8759                     else
8760                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8761                     TOKEN(ANONSUB);
8762                 }
8763 #ifndef PERL_MAD
8764                 force_ident_maybe_lex('&');
8765 #endif
8766                 TOKEN(SUB);
8767             }
8768
8769         case KEY_system:
8770             LOP(OP_SYSTEM,XREF);
8771
8772         case KEY_symlink:
8773             LOP(OP_SYMLINK,XTERM);
8774
8775         case KEY_syscall:
8776             LOP(OP_SYSCALL,XTERM);
8777
8778         case KEY_sysopen:
8779             LOP(OP_SYSOPEN,XTERM);
8780
8781         case KEY_sysseek:
8782             LOP(OP_SYSSEEK,XTERM);
8783
8784         case KEY_sysread:
8785             LOP(OP_SYSREAD,XTERM);
8786
8787         case KEY_syswrite:
8788             LOP(OP_SYSWRITE,XTERM);
8789
8790         case KEY_tr:
8791         case KEY_y:
8792             s = scan_trans(s);
8793             TERM(sublex_start());
8794
8795         case KEY_tell:
8796             UNI(OP_TELL);
8797
8798         case KEY_telldir:
8799             UNI(OP_TELLDIR);
8800
8801         case KEY_tie:
8802             LOP(OP_TIE,XTERM);
8803
8804         case KEY_tied:
8805             UNI(OP_TIED);
8806
8807         case KEY_time:
8808             FUN0(OP_TIME);
8809
8810         case KEY_times:
8811             FUN0(OP_TMS);
8812
8813         case KEY_truncate:
8814             LOP(OP_TRUNCATE,XTERM);
8815
8816         case KEY_uc:
8817             UNI(OP_UC);
8818
8819         case KEY_ucfirst:
8820             UNI(OP_UCFIRST);
8821
8822         case KEY_untie:
8823             UNI(OP_UNTIE);
8824
8825         case KEY_until:
8826             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8827                 return REPORT(0);
8828             pl_yylval.ival = CopLINE(PL_curcop);
8829             OPERATOR(UNTIL);
8830
8831         case KEY_unless:
8832             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8833                 return REPORT(0);
8834             pl_yylval.ival = CopLINE(PL_curcop);
8835             OPERATOR(UNLESS);
8836
8837         case KEY_unlink:
8838             LOP(OP_UNLINK,XTERM);
8839
8840         case KEY_undef:
8841             UNIDOR(OP_UNDEF);
8842
8843         case KEY_unpack:
8844             LOP(OP_UNPACK,XTERM);
8845
8846         case KEY_utime:
8847             LOP(OP_UTIME,XTERM);
8848
8849         case KEY_umask:
8850             UNIDOR(OP_UMASK);
8851
8852         case KEY_unshift:
8853             LOP(OP_UNSHIFT,XTERM);
8854
8855         case KEY_use:
8856             s = tokenize_use(1, s);
8857             OPERATOR(USE);
8858
8859         case KEY_values:
8860             UNI(OP_VALUES);
8861
8862         case KEY_vec:
8863             LOP(OP_VEC,XTERM);
8864
8865         case KEY_when:
8866             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8867                 return REPORT(0);
8868             pl_yylval.ival = CopLINE(PL_curcop);
8869             Perl_ck_warner_d(aTHX_
8870                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8871                 "when is experimental");
8872             OPERATOR(WHEN);
8873
8874         case KEY_while:
8875             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8876                 return REPORT(0);
8877             pl_yylval.ival = CopLINE(PL_curcop);
8878             OPERATOR(WHILE);
8879
8880         case KEY_warn:
8881             PL_hints |= HINT_BLOCK_SCOPE;
8882             LOP(OP_WARN,XTERM);
8883
8884         case KEY_wait:
8885             FUN0(OP_WAIT);
8886
8887         case KEY_waitpid:
8888             LOP(OP_WAITPID,XTERM);
8889
8890         case KEY_wantarray:
8891             FUN0(OP_WANTARRAY);
8892
8893         case KEY_write:
8894 #ifdef EBCDIC
8895         {
8896             char ctl_l[2];
8897             ctl_l[0] = toCTRL('L');
8898             ctl_l[1] = '\0';
8899             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8900         }
8901 #else
8902             /* Make sure $^L is defined */
8903             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8904 #endif
8905             UNI(OP_ENTERWRITE);
8906
8907         case KEY_x:
8908             if (PL_expect == XOPERATOR) {
8909                 if (*s == '=' && !PL_lex_allbrackets &&
8910                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8911                     return REPORT(0);
8912                 Mop(OP_REPEAT);
8913             }
8914             check_uni();
8915             goto just_a_word;
8916
8917         case KEY_xor:
8918             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8919                 return REPORT(0);
8920             pl_yylval.ival = OP_XOR;
8921             OPERATOR(OROP);
8922         }
8923     }}
8924 }
8925 #ifdef __SC__
8926 #pragma segment Main
8927 #endif
8928
8929 /*
8930   S_pending_ident
8931
8932   Looks up an identifier in the pad or in a package
8933
8934   Returns:
8935     PRIVATEREF if this is a lexical name.
8936     WORD       if this belongs to a package.
8937
8938   Structure:
8939       if we're in a my declaration
8940           croak if they tried to say my($foo::bar)
8941           build the ops for a my() declaration
8942       if it's an access to a my() variable
8943           build ops for access to a my() variable
8944       if in a dq string, and they've said @foo and we can't find @foo
8945           warn
8946       build ops for a bareword
8947 */
8948
8949 static int
8950 S_pending_ident(pTHX)
8951 {
8952     dVAR;
8953     PADOFFSET tmp = 0;
8954     const char pit = (char)pl_yylval.ival;
8955     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8956     /* All routes through this function want to know if there is a colon.  */
8957     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8958
8959     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8960           "### Pending identifier '%s'\n", PL_tokenbuf); });
8961
8962     /* if we're in a my(), we can't allow dynamics here.
8963        $foo'bar has already been turned into $foo::bar, so
8964        just check for colons.
8965
8966        if it's a legal name, the OP is a PADANY.
8967     */
8968     if (PL_in_my) {
8969         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8970             if (has_colon)
8971                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8972                                   "variable %s in \"our\"",
8973                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8974             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8975         }
8976         else {
8977             if (has_colon)
8978                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8979                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8980                             UTF ? SVf_UTF8 : 0);
8981
8982             pl_yylval.opval = newOP(OP_PADANY, 0);
8983             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8984                                                         UTF ? SVf_UTF8 : 0);
8985             return PRIVATEREF;
8986         }
8987     }
8988
8989     /*
8990        build the ops for accesses to a my() variable.
8991     */
8992
8993     if (!has_colon) {
8994         if (!PL_in_my)
8995             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8996                                     UTF ? SVf_UTF8 : 0);
8997         if (tmp != NOT_IN_PAD) {
8998             /* might be an "our" variable" */
8999             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9000                 /* build ops for a bareword */
9001                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9002                 HEK * const stashname = HvNAME_HEK(stash);
9003                 SV *  const sym = newSVhek(stashname);
9004                 sv_catpvs(sym, "::");
9005                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9006                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9007                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9008                 if (pit != '&')
9009                   gv_fetchsv(sym,
9010                     (PL_in_eval
9011                         ? (GV_ADDMULTI | GV_ADDINEVAL)
9012                         : GV_ADDMULTI
9013                     ),
9014                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9015                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9016                      : SVt_PVHV));
9017                 return WORD;
9018             }
9019
9020             pl_yylval.opval = newOP(OP_PADANY, 0);
9021             pl_yylval.opval->op_targ = tmp;
9022             return PRIVATEREF;
9023         }
9024     }
9025
9026     /*
9027        Whine if they've said @foo in a doublequoted string,
9028        and @foo isn't a variable we can find in the symbol
9029        table.
9030     */
9031     if (ckWARN(WARN_AMBIGUOUS) &&
9032         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9033         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9034                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9035         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9036                 /* DO NOT warn for @- and @+ */
9037                 && !( PL_tokenbuf[2] == '\0' &&
9038                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9039            )
9040         {
9041             /* Downgraded from fatal to warning 20000522 mjd */
9042             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9043                         "Possible unintended interpolation of %"UTF8f
9044                         " in string",
9045                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9046         }
9047     }
9048
9049     /* build ops for a bareword */
9050     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9051                                    newSVpvn_flags(PL_tokenbuf + 1,
9052                                                       tokenbuf_len - 1,
9053                                                       UTF ? SVf_UTF8 : 0 ));
9054     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9055     if (pit != '&')
9056         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9057                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9058                      | ( UTF ? SVf_UTF8 : 0 ),
9059                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9060                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9061                       : SVt_PVHV));
9062     return WORD;
9063 }
9064
9065 STATIC void
9066 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9067 {
9068     dVAR;
9069
9070     PERL_ARGS_ASSERT_CHECKCOMMA;
9071
9072     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9073         if (ckWARN(WARN_SYNTAX)) {
9074             int level = 1;
9075             const char *w;
9076             for (w = s+2; *w && level; w++) {
9077                 if (*w == '(')
9078                     ++level;
9079                 else if (*w == ')')
9080                     --level;
9081             }
9082             while (isSPACE(*w))
9083                 ++w;
9084             /* the list of chars below is for end of statements or
9085              * block / parens, boolean operators (&&, ||, //) and branch
9086              * constructs (or, and, if, until, unless, while, err, for).
9087              * Not a very solid hack... */
9088             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9089                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9090                             "%s (...) interpreted as function",name);
9091         }
9092     }
9093     while (s < PL_bufend && isSPACE(*s))
9094         s++;
9095     if (*s == '(')
9096         s++;
9097     while (s < PL_bufend && isSPACE(*s))
9098         s++;
9099     if (isIDFIRST_lazy_if(s,UTF)) {
9100         const char * const w = s;
9101         s += UTF ? UTF8SKIP(s) : 1;
9102         while (isWORDCHAR_lazy_if(s,UTF))
9103             s += UTF ? UTF8SKIP(s) : 1;
9104         while (s < PL_bufend && isSPACE(*s))
9105             s++;
9106         if (*s == ',') {
9107             GV* gv;
9108             if (keyword(w, s - w, 0))
9109                 return;
9110
9111             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9112             if (gv && GvCVu(gv))
9113                 return;
9114             Perl_croak(aTHX_ "No comma allowed after %s", what);
9115         }
9116     }
9117 }
9118
9119 /* S_new_constant(): do any overload::constant lookup.
9120
9121    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9122    Best used as sv=new_constant(..., sv, ...).
9123    If s, pv are NULL, calls subroutine with one argument,
9124    and <type> is used with error messages only.
9125    <type> is assumed to be well formed UTF-8 */
9126
9127 STATIC SV *
9128 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9129                SV *sv, SV *pv, const char *type, STRLEN typelen)
9130 {
9131     dVAR; dSP;
9132     HV * table = GvHV(PL_hintgv);                /* ^H */
9133     SV *res;
9134     SV *errsv = NULL;
9135     SV **cvp;
9136     SV *cv, *typesv;
9137     const char *why1 = "", *why2 = "", *why3 = "";
9138
9139     PERL_ARGS_ASSERT_NEW_CONSTANT;
9140     /* We assume that this is true: */
9141     if (*key == 'c') { assert (strEQ(key, "charnames")); }
9142     assert(type || s);
9143
9144     /* charnames doesn't work well if there have been errors found */
9145     if (PL_error_count > 0 && *key == 'c')
9146     {
9147         SvREFCNT_dec_NN(sv);
9148         return &PL_sv_undef;
9149     }
9150
9151     sv_2mortal(sv);                     /* Parent created it permanently */
9152     if (!table
9153         || ! (PL_hints & HINT_LOCALIZE_HH)
9154         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9155         || ! SvOK(*cvp))
9156     {
9157         char *msg;
9158
9159         /* Here haven't found what we're looking for.  If it is charnames,
9160          * perhaps it needs to be loaded.  Try doing that before giving up */
9161         if (*key == 'c') {
9162             Perl_load_module(aTHX_
9163                             0,
9164                             newSVpvs("_charnames"),
9165                              /* version parameter; no need to specify it, as if
9166                               * we get too early a version, will fail anyway,
9167                               * not being able to find '_charnames' */
9168                             NULL,
9169                             newSVpvs(":full"),
9170                             newSVpvs(":short"),
9171                             NULL);
9172             SPAGAIN;
9173             table = GvHV(PL_hintgv);
9174             if (table
9175                 && (PL_hints & HINT_LOCALIZE_HH)
9176                 && (cvp = hv_fetch(table, key, keylen, FALSE))
9177                 && SvOK(*cvp))
9178             {
9179                 goto now_ok;
9180             }
9181         }
9182         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9183             msg = Perl_form(aTHX_
9184                                "Constant(%.*s) unknown",
9185                                 (int)(type ? typelen : len),
9186                                 (type ? type: s));
9187         }
9188         else {
9189             why1 = "$^H{";
9190             why2 = key;
9191             why3 = "} is not defined";
9192         report:
9193             if (*key == 'c') {
9194                 msg = Perl_form(aTHX_
9195                             /* The +3 is for '\N{'; -4 for that, plus '}' */
9196                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9197                       );
9198             }
9199             else {
9200                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9201                                     (int)(type ? typelen : len),
9202                                     (type ? type: s), why1, why2, why3);
9203             }
9204         }
9205         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9206         return SvREFCNT_inc_simple_NN(sv);
9207     }
9208 now_ok:
9209     cv = *cvp;
9210     if (!pv && s)
9211         pv = newSVpvn_flags(s, len, SVs_TEMP);
9212     if (type && pv)
9213         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9214     else
9215         typesv = &PL_sv_undef;
9216
9217     PUSHSTACKi(PERLSI_OVERLOAD);
9218     ENTER ;
9219     SAVETMPS;
9220
9221     PUSHMARK(SP) ;
9222     EXTEND(sp, 3);
9223     if (pv)
9224         PUSHs(pv);
9225     PUSHs(sv);
9226     if (pv)
9227         PUSHs(typesv);
9228     PUTBACK;
9229     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9230
9231     SPAGAIN ;
9232
9233     /* Check the eval first */
9234     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9235         STRLEN errlen;
9236         const char * errstr;
9237         sv_catpvs(errsv, "Propagated");
9238         errstr = SvPV_const(errsv, errlen);
9239         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9240         (void)POPs;
9241         res = SvREFCNT_inc_simple_NN(sv);
9242     }
9243     else {
9244         res = POPs;
9245         SvREFCNT_inc_simple_void_NN(res);
9246     }
9247
9248     PUTBACK ;
9249     FREETMPS ;
9250     LEAVE ;
9251     POPSTACK;
9252
9253     if (!SvOK(res)) {
9254         why1 = "Call to &{$^H{";
9255         why2 = key;
9256         why3 = "}} did not return a defined value";
9257         sv = res;
9258         (void)sv_2mortal(sv);
9259         goto report;
9260     }
9261
9262     return res;
9263 }
9264
9265 PERL_STATIC_INLINE void
9266 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9267     dVAR;
9268     PERL_ARGS_ASSERT_PARSE_IDENT;
9269
9270     for (;;) {
9271         if (*d >= e)
9272             Perl_croak(aTHX_ "%s", ident_too_long);
9273         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9274              /* The UTF-8 case must come first, otherwise things
9275              * like c\N{COMBINING TILDE} would start failing, as the
9276              * isWORDCHAR_A case below would gobble the 'c' up.
9277              */
9278
9279             char *t = *s + UTF8SKIP(*s);
9280             while (isIDCONT_utf8((U8*)t))
9281                 t += UTF8SKIP(t);
9282             if (*d + (t - *s) > e)
9283                 Perl_croak(aTHX_ "%s", ident_too_long);
9284             Copy(*s, *d, t - *s, char);
9285             *d += t - *s;
9286             *s = t;
9287         }
9288         else if ( isWORDCHAR_A(**s) ) {
9289             do {
9290                 *(*d)++ = *(*s)++;
9291             } while isWORDCHAR_A(**s);
9292         }
9293         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9294             *(*d)++ = ':';
9295             *(*d)++ = ':';
9296             (*s)++;
9297         }
9298         else if (allow_package && **s == ':' && (*s)[1] == ':'
9299            /* Disallow things like Foo::$bar. For the curious, this is
9300             * the code path that triggers the "Bad name after" warning
9301             * when looking for barewords.
9302             */
9303            && (*s)[2] != '$') {
9304             *(*d)++ = *(*s)++;
9305             *(*d)++ = *(*s)++;
9306         }
9307         else
9308             break;
9309     }
9310     return;
9311 }
9312
9313 /* Returns a NUL terminated string, with the length of the string written to
9314    *slp
9315    */
9316 STATIC char *
9317 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9318 {
9319     dVAR;
9320     char *d = dest;
9321     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9322     bool is_utf8 = cBOOL(UTF);
9323
9324     PERL_ARGS_ASSERT_SCAN_WORD;
9325
9326     parse_ident(&s, &d, e, allow_package, is_utf8);
9327     *d = '\0';
9328     *slp = d - dest;
9329     return s;
9330 }
9331
9332 STATIC char *
9333 S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9334 {
9335     dVAR;
9336     char *bracket = NULL;
9337     char funny = *s++;
9338     char *d = dest;
9339     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9340     bool is_utf8 = cBOOL(UTF);
9341
9342     PERL_ARGS_ASSERT_SCAN_IDENT;
9343
9344     if (isSPACE(*s))
9345         s = PEEKSPACE(s);
9346     if (isDIGIT(*s)) {
9347         while (isDIGIT(*s)) {
9348             if (d >= e)
9349                 Perl_croak(aTHX_ "%s", ident_too_long);
9350             *d++ = *s++;
9351         }
9352     }
9353     else {
9354         parse_ident(&s, &d, e, 1, is_utf8);
9355     }
9356     *d = '\0';
9357     d = dest;
9358     if (*d) {
9359         /* Either a digit variable, or parse_ident() found an identifier
9360            (anything valid as a bareword), so job done and return.  */
9361         if (PL_lex_state != LEX_NORMAL)
9362             PL_lex_state = LEX_INTERPENDMAYBE;
9363         return s;
9364     }
9365     if (*s == '$' && s[1] &&
9366       (isIDFIRST_lazy_if(s+1,is_utf8)
9367          || isDIGIT_A((U8)s[1])
9368          || s[1] == '$'
9369          || s[1] == '{'
9370          || strnEQ(s+1,"::",2)) )
9371     {
9372         /* Dereferencing a value in a scalar variable.
9373            The alternatives are different syntaxes for a scalar variable.
9374            Using ' as a leading package separator isn't allowed. :: is.   */
9375         return s;
9376     }
9377     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9378     if (*s == '{') {
9379         bracket = s;
9380         s++;
9381         while (s < send && SPACE_OR_TAB(*s))
9382            s++;
9383     }
9384
9385 #define VALID_LEN_ONE_IDENT(d, u)     (isPUNCT_A((U8)(d))     \
9386                                         || isCNTRL_A((U8)(d)) \
9387                                         || isDIGIT_A((U8)(d)) \
9388                                         || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
9389     if (s < send
9390         && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9391     {
9392         if (is_utf8) {
9393             const STRLEN skip = UTF8SKIP(s);
9394             STRLEN i;
9395             d[skip] = '\0';
9396             for ( i = 0; i < skip; i++ )
9397                 d[i] = *s++;
9398         }
9399         else {
9400             *d = *s++;
9401             d[1] = '\0';
9402         }
9403     }
9404     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9405     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9406         *d = toCTRL(*s);
9407         s++;
9408     }
9409     /* Warn about ambiguous code after unary operators if {...} notation isn't
9410        used.  There's no difference in ambiguity; it's merely a heuristic
9411        about when not to warn.  */
9412     else if (ck_uni && !bracket)
9413         check_uni();
9414     if (bracket) {
9415         /* If we were processing {...} notation then...  */
9416         if (isIDFIRST_lazy_if(d,is_utf8)) {
9417             /* if it starts as a valid identifier, assume that it is one.
9418                (the later check for } being at the expected point will trap
9419                cases where this doesn't pan out.)  */
9420         d += is_utf8 ? UTF8SKIP(d) : 1;
9421         parse_ident(&s, &d, e, 1, is_utf8);
9422             *d = '\0';
9423             while (s < send && SPACE_OR_TAB(*s))
9424                 s++;
9425             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9426                 /* ${foo[0]} and ${foo{bar}} notation.  */
9427                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9428                     const char * const brack =
9429                         (const char *)
9430                         ((*s == '[') ? "[...]" : "{...}");
9431    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9432                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9433                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9434                         funny, dest, brack, funny, dest, brack);
9435                 }
9436                 bracket++;
9437                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9438                 PL_lex_allbrackets++;
9439                 return s;
9440             }
9441         }
9442         /* Handle extended ${^Foo} variables
9443          * 1999-02-27 mjd-perl-patch@plover.com */
9444         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9445                  && isWORDCHAR(*s))
9446         {
9447             d++;
9448             while (isWORDCHAR(*s) && d < e) {
9449                 *d++ = *s++;
9450             }
9451             if (d >= e)
9452                 Perl_croak(aTHX_ "%s", ident_too_long);
9453             *d = '\0';
9454         }
9455
9456         while (s < send && SPACE_OR_TAB(*s))
9457             s++;
9458
9459         /* Expect to find a closing } after consuming any trailing whitespace.
9460          */
9461         if (*s == '}') {
9462             s++;
9463             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9464                 PL_lex_state = LEX_INTERPEND;
9465                 PL_expect = XREF;
9466             }
9467             if (PL_lex_state == LEX_NORMAL) {
9468                 if (ckWARN(WARN_AMBIGUOUS) &&
9469                     (keyword(dest, d - dest, 0)
9470                      || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9471                 {
9472                     SV *tmp = newSVpvn_flags( dest, d - dest,
9473                                             SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9474                     if (funny == '#')
9475                         funny = '@';
9476                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9477                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9478                         funny, tmp, funny, tmp);
9479                 }
9480             }
9481         }
9482         else {
9483             /* Didn't find the closing } at the point we expected, so restore
9484                state such that the next thing to process is the opening { and */
9485             s = bracket;                /* let the parser handle it */
9486             *dest = '\0';
9487         }
9488     }
9489     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9490         PL_lex_state = LEX_INTERPEND;
9491     return s;
9492 }
9493
9494 static bool
9495 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9496
9497     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9498      * the parse starting at 's', based on the subset that are valid in this
9499      * context input to this routine in 'valid_flags'. Advances s.  Returns
9500      * TRUE if the input should be treated as a valid flag, so the next char
9501      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9502      * first call on the current regex.  This routine will set it to any
9503      * charset modifier found.  The caller shouldn't change it.  This way,
9504      * another charset modifier encountered in the parse can be detected as an
9505      * error, as we have decided to allow only one */
9506
9507     const char c = **s;
9508     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9509
9510     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9511         if (isWORDCHAR_lazy_if(*s, UTF)) {
9512             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9513                        UTF ? SVf_UTF8 : 0);
9514             (*s) += charlen;
9515             /* Pretend that it worked, so will continue processing before
9516              * dieing */
9517             return TRUE;
9518         }
9519         return FALSE;
9520     }
9521
9522     switch (c) {
9523
9524         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9525         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9526         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9527         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9528         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9529         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9530         case LOCALE_PAT_MOD:
9531             if (*charset) {
9532                 goto multiple_charsets;
9533             }
9534             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9535             *charset = c;
9536             break;
9537         case UNICODE_PAT_MOD:
9538             if (*charset) {
9539                 goto multiple_charsets;
9540             }
9541             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9542             *charset = c;
9543             break;
9544         case ASCII_RESTRICT_PAT_MOD:
9545             if (! *charset) {
9546                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9547             }
9548             else {
9549
9550                 /* Error if previous modifier wasn't an 'a', but if it was, see
9551                  * if, and accept, a second occurrence (only) */
9552                 if (*charset != 'a'
9553                     || get_regex_charset(*pmfl)
9554                         != REGEX_ASCII_RESTRICTED_CHARSET)
9555                 {
9556                         goto multiple_charsets;
9557                 }
9558                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9559             }
9560             *charset = c;
9561             break;
9562         case DEPENDS_PAT_MOD:
9563             if (*charset) {
9564                 goto multiple_charsets;
9565             }
9566             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9567             *charset = c;
9568             break;
9569     }
9570
9571     (*s)++;
9572     return TRUE;
9573
9574     multiple_charsets:
9575         if (*charset != c) {
9576             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9577         }
9578         else if (c == 'a') {
9579             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9580         }
9581         else {
9582             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9583         }
9584
9585         /* Pretend that it worked, so will continue processing before dieing */
9586         (*s)++;
9587         return TRUE;
9588 }
9589
9590 STATIC char *
9591 S_scan_pat(pTHX_ char *start, I32 type)
9592 {
9593     dVAR;
9594     PMOP *pm;
9595     char *s;
9596     const char * const valid_flags =
9597         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9598     char charset = '\0';    /* character set modifier */
9599 #ifdef PERL_MAD
9600     char *modstart;
9601 #endif
9602
9603     PERL_ARGS_ASSERT_SCAN_PAT;
9604
9605     s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9606                        TRUE /* look for escaped bracketed metas */ );
9607
9608     if (!s) {
9609         const char * const delimiter = skipspace(start);
9610         Perl_croak(aTHX_
9611                    (const char *)
9612                    (*delimiter == '?'
9613                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9614                     : "Search pattern not terminated" ));
9615     }
9616
9617     pm = (PMOP*)newPMOP(type, 0);
9618     if (PL_multi_open == '?') {
9619         /* This is the only point in the code that sets PMf_ONCE:  */
9620         pm->op_pmflags |= PMf_ONCE;
9621
9622         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9623            allows us to restrict the list needed by reset to just the ??
9624            matches.  */
9625         assert(type != OP_TRANS);
9626         if (PL_curstash) {
9627             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9628             U32 elements;
9629             if (!mg) {
9630                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9631                                  0);
9632             }
9633             elements = mg->mg_len / sizeof(PMOP**);
9634             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9635             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9636             mg->mg_len = elements * sizeof(PMOP**);
9637             PmopSTASH_set(pm,PL_curstash);
9638         }
9639     }
9640 #ifdef PERL_MAD
9641     modstart = s;
9642 #endif
9643
9644     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9645      * anon CV. False positives like qr/[(?{]/ are harmless */
9646
9647     if (type == OP_QR) {
9648         STRLEN len;
9649         char *e, *p = SvPV(PL_lex_stuff, len);
9650         e = p + len;
9651         for (; p < e; p++) {
9652             if (p[0] == '(' && p[1] == '?'
9653                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9654             {
9655                 pm->op_pmflags |= PMf_HAS_CV;
9656                 break;
9657             }
9658         }
9659         pm->op_pmflags |= PMf_IS_QR;
9660     }
9661
9662     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9663 #ifdef PERL_MAD
9664     if (PL_madskills && modstart != s) {
9665         SV* tmptoken = newSVpvn(modstart, s - modstart);
9666         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9667     }
9668 #endif
9669     /* issue a warning if /c is specified,but /g is not */
9670     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9671     {
9672         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9673                        "Use of /c modifier is meaningless without /g" );
9674     }
9675
9676     PL_lex_op = (OP*)pm;
9677     pl_yylval.ival = OP_MATCH;
9678     return s;
9679 }
9680
9681 STATIC char *
9682 S_scan_subst(pTHX_ char *start)
9683 {
9684     dVAR;
9685     char *s;
9686     PMOP *pm;
9687     I32 first_start;
9688     I32 es = 0;
9689     char charset = '\0';    /* character set modifier */
9690 #ifdef PERL_MAD
9691     char *modstart;
9692 #endif
9693
9694     PERL_ARGS_ASSERT_SCAN_SUBST;
9695
9696     pl_yylval.ival = OP_NULL;
9697
9698     s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9699                  TRUE /* look for escaped bracketed metas */ );
9700
9701     if (!s)
9702         Perl_croak(aTHX_ "Substitution pattern not terminated");
9703
9704     if (s[-1] == PL_multi_open)
9705         s--;
9706 #ifdef PERL_MAD
9707     if (PL_madskills) {
9708         CURMAD('q', PL_thisopen);
9709         CURMAD('_', PL_thiswhite);
9710         CURMAD('E', PL_thisstuff);
9711         CURMAD('Q', PL_thisclose);
9712         PL_realtokenstart = s - SvPVX(PL_linestr);
9713     }
9714 #endif
9715
9716     first_start = PL_multi_start;
9717     s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9718     if (!s) {
9719         if (PL_lex_stuff) {
9720             SvREFCNT_dec(PL_lex_stuff);
9721             PL_lex_stuff = NULL;
9722         }
9723         Perl_croak(aTHX_ "Substitution replacement not terminated");
9724     }
9725     PL_multi_start = first_start;       /* so whole substitution is taken together */
9726
9727     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9728
9729 #ifdef PERL_MAD
9730     if (PL_madskills) {
9731         CURMAD('z', PL_thisopen);
9732         CURMAD('R', PL_thisstuff);
9733         CURMAD('Z', PL_thisclose);
9734     }
9735     modstart = s;
9736 #endif
9737
9738     while (*s) {
9739         if (*s == EXEC_PAT_MOD) {
9740             s++;
9741             es++;
9742         }
9743         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9744         {
9745             break;
9746         }
9747     }
9748
9749 #ifdef PERL_MAD
9750     if (PL_madskills) {
9751         if (modstart != s)
9752             curmad('m', newSVpvn(modstart, s - modstart));
9753         append_madprops(PL_thismad, (OP*)pm, 0);
9754         PL_thismad = 0;
9755     }
9756 #endif
9757     if ((pm->op_pmflags & PMf_CONTINUE)) {
9758         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9759     }
9760
9761     if (es) {
9762         SV * const repl = newSVpvs("");
9763
9764         PL_multi_end = 0;
9765         pm->op_pmflags |= PMf_EVAL;
9766         while (es-- > 0) {
9767             if (es)
9768                 sv_catpvs(repl, "eval ");
9769             else
9770                 sv_catpvs(repl, "do ");
9771         }
9772         sv_catpvs(repl, "{");
9773         sv_catsv(repl, PL_sublex_info.repl);
9774         sv_catpvs(repl, "}");
9775         SvEVALED_on(repl);
9776         SvREFCNT_dec(PL_sublex_info.repl);
9777         PL_sublex_info.repl = repl;
9778     }
9779
9780     PL_lex_op = (OP*)pm;
9781     pl_yylval.ival = OP_SUBST;
9782     return s;
9783 }
9784
9785 STATIC char *
9786 S_scan_trans(pTHX_ char *start)
9787 {
9788     dVAR;
9789     char* s;
9790     OP *o;
9791     U8 squash;
9792     U8 del;
9793     U8 complement;
9794     bool nondestruct = 0;
9795 #ifdef PERL_MAD
9796     char *modstart;
9797 #endif
9798
9799     PERL_ARGS_ASSERT_SCAN_TRANS;
9800
9801     pl_yylval.ival = OP_NULL;
9802
9803     s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
9804     if (!s)
9805         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9806
9807     if (s[-1] == PL_multi_open)
9808         s--;
9809 #ifdef PERL_MAD
9810     if (PL_madskills) {
9811         CURMAD('q', PL_thisopen);
9812         CURMAD('_', PL_thiswhite);
9813         CURMAD('E', PL_thisstuff);
9814         CURMAD('Q', PL_thisclose);
9815         PL_realtokenstart = s - SvPVX(PL_linestr);
9816     }
9817 #endif
9818
9819     s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9820     if (!s) {
9821         if (PL_lex_stuff) {
9822             SvREFCNT_dec(PL_lex_stuff);
9823             PL_lex_stuff = NULL;
9824         }
9825         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9826     }
9827     if (PL_madskills) {
9828         CURMAD('z', PL_thisopen);
9829         CURMAD('R', PL_thisstuff);
9830         CURMAD('Z', PL_thisclose);
9831     }
9832
9833     complement = del = squash = 0;
9834 #ifdef PERL_MAD
9835     modstart = s;
9836 #endif
9837     while (1) {
9838         switch (*s) {
9839         case 'c':
9840             complement = OPpTRANS_COMPLEMENT;
9841             break;
9842         case 'd':
9843             del = OPpTRANS_DELETE;
9844             break;
9845         case 's':
9846             squash = OPpTRANS_SQUASH;
9847             break;
9848         case 'r':
9849             nondestruct = 1;
9850             break;
9851         default:
9852             goto no_more;
9853         }
9854         s++;
9855     }
9856   no_more:
9857
9858     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9859     o->op_private &= ~OPpTRANS_ALL;
9860     o->op_private |= del|squash|complement|
9861       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9862       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9863
9864     PL_lex_op = o;
9865     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9866
9867 #ifdef PERL_MAD
9868     if (PL_madskills) {
9869         if (modstart != s)
9870             curmad('m', newSVpvn(modstart, s - modstart));
9871         append_madprops(PL_thismad, o, 0);
9872         PL_thismad = 0;
9873     }
9874 #endif
9875
9876     return s;
9877 }
9878
9879 /* scan_heredoc
9880    Takes a pointer to the first < in <<FOO.
9881    Returns a pointer to the byte following <<FOO.
9882
9883    This function scans a heredoc, which involves different methods
9884    depending on whether we are in a string eval, quoted construct, etc.
9885    This is because PL_linestr could containing a single line of input, or
9886    a whole string being evalled, or the contents of the current quote-
9887    like operator.
9888
9889    The two basic methods are:
9890     - Steal lines from the input stream
9891     - Scan the heredoc in PL_linestr and remove it therefrom
9892
9893    In a file scope or filtered eval, the first method is used; in a
9894    string eval, the second.
9895
9896    In a quote-like operator, we have to choose between the two,
9897    depending on where we can find a newline.  We peek into outer lex-
9898    ing scopes until we find one with a newline in it.  If we reach the
9899    outermost lexing scope and it is a file, we use the stream method.
9900    Otherwise it is treated as an eval.
9901 */
9902
9903 STATIC char *
9904 S_scan_heredoc(pTHX_ char *s)
9905 {
9906     dVAR;
9907     I32 op_type = OP_SCALAR;
9908     I32 len;
9909     SV *tmpstr;
9910     char term;
9911     char *d;
9912     char *e;
9913     char *peek;
9914     const bool infile = PL_rsfp || PL_parser->filtered;
9915     LEXSHARED *shared = PL_parser->lex_shared;
9916 #ifdef PERL_MAD
9917     I32 stuffstart = s - SvPVX(PL_linestr);
9918     char *tstart;
9919
9920     PL_realtokenstart = -1;
9921 #endif
9922
9923     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9924
9925     s += 2;
9926     d = PL_tokenbuf + 1;
9927     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9928     *PL_tokenbuf = '\n';
9929     peek = s;
9930     while (SPACE_OR_TAB(*peek))
9931         peek++;
9932     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9933         s = peek;
9934         term = *s++;
9935         s = delimcpy(d, e, s, PL_bufend, term, &len);
9936         if (s == PL_bufend)
9937             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9938         d += len;
9939         s++;
9940     }
9941     else {
9942         if (*s == '\\')
9943             /* <<\FOO is equivalent to <<'FOO' */
9944             s++, term = '\'';
9945         else
9946             term = '"';
9947         if (!isWORDCHAR_lazy_if(s,UTF))
9948             deprecate("bare << to mean <<\"\"");
9949         for (; isWORDCHAR_lazy_if(s,UTF); s++) {
9950             if (d < e)
9951                 *d++ = *s;
9952         }
9953     }
9954     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9955         Perl_croak(aTHX_ "Delimiter for here document is too long");
9956     *d++ = '\n';
9957     *d = '\0';
9958     len = d - PL_tokenbuf;
9959
9960 #ifdef PERL_MAD
9961     if (PL_madskills) {
9962         tstart = PL_tokenbuf + 1;
9963         PL_thisclose = newSVpvn(tstart, len - 1);
9964         tstart = SvPVX(PL_linestr) + stuffstart;
9965         PL_thisopen = newSVpvn(tstart, s - tstart);
9966         stuffstart = s - SvPVX(PL_linestr);
9967     }
9968 #endif
9969 #ifndef PERL_STRICT_CR
9970     d = strchr(s, '\r');
9971     if (d) {
9972         char * const olds = s;
9973         s = d;
9974         while (s < PL_bufend) {
9975             if (*s == '\r') {
9976                 *d++ = '\n';
9977                 if (*++s == '\n')
9978                     s++;
9979             }
9980             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9981                 *d++ = *s++;
9982                 s++;
9983             }
9984             else
9985                 *d++ = *s++;
9986         }
9987         *d = '\0';
9988         PL_bufend = d;
9989         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9990         s = olds;
9991     }
9992 #endif
9993 #ifdef PERL_MAD
9994     if (PL_madskills) {
9995         tstart = SvPVX(PL_linestr) + stuffstart;
9996         if (PL_thisstuff)
9997             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9998         else
9999             PL_thisstuff = newSVpvn(tstart, s - tstart);
10000     }
10001
10002     stuffstart = s - SvPVX(PL_linestr);
10003 #endif
10004
10005     tmpstr = newSV_type(SVt_PVIV);
10006     SvGROW(tmpstr, 80);
10007     if (term == '\'') {
10008         op_type = OP_CONST;
10009         SvIV_set(tmpstr, -1);
10010     }
10011     else if (term == '`') {
10012         op_type = OP_BACKTICK;
10013         SvIV_set(tmpstr, '\\');
10014     }
10015
10016     PL_multi_start = CopLINE(PL_curcop) + 1;
10017     PL_multi_open = PL_multi_close = '<';
10018     /* inside a string eval or quote-like operator */
10019     if (!infile || PL_lex_inwhat) {
10020         SV *linestr;
10021         char *bufend;
10022         char * const olds = s;
10023         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10024         /* These two fields are not set until an inner lexing scope is
10025            entered.  But we need them set here. */
10026         shared->ls_bufptr  = s;
10027         shared->ls_linestr = PL_linestr;
10028         if (PL_lex_inwhat)
10029           /* Look for a newline.  If the current buffer does not have one,
10030              peek into the line buffer of the parent lexing scope, going
10031              up as many levels as necessary to find one with a newline
10032              after bufptr.
10033            */
10034           while (!(s = (char *)memchr(
10035                     (void *)shared->ls_bufptr, '\n',
10036                     SvEND(shared->ls_linestr)-shared->ls_bufptr
10037                 ))) {
10038             shared = shared->ls_prev;
10039             /* shared is only null if we have gone beyond the outermost
10040                lexing scope.  In a file, we will have broken out of the
10041                loop in the previous iteration.  In an eval, the string buf-
10042                fer ends with "\n;", so the while condition below will have
10043                evaluated to false.  So shared can never be null. */
10044             assert(shared);
10045             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10046                most lexing scope.  In a file, shared->ls_linestr at that
10047                level is just one line, so there is no body to steal. */
10048             if (infile && !shared->ls_prev) {
10049                 s = olds;
10050                 goto streaming;
10051             }
10052           }
10053         else {  /* eval */
10054             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10055             assert(s);
10056         }
10057         linestr = shared->ls_linestr;
10058         bufend = SvEND(linestr);
10059         d = s;
10060         while (s < bufend - len + 1 &&
10061           memNE(s,PL_tokenbuf,len) ) {
10062             if (*s++ == '\n')
10063                 ++shared->herelines;
10064         }
10065         if (s >= bufend - len + 1) {
10066             goto interminable;
10067         }
10068         sv_setpvn(tmpstr,d+1,s-d);
10069 #ifdef PERL_MAD
10070         if (PL_madskills) {
10071             if (PL_thisstuff)
10072                 sv_catpvn(PL_thisstuff, d + 1, s - d);
10073             else
10074                 PL_thisstuff = newSVpvn(d + 1, s - d);
10075             stuffstart = s - SvPVX(PL_linestr);
10076         }
10077 #endif
10078         s += len - 1;
10079         /* the preceding stmt passes a newline */
10080         shared->herelines++;
10081
10082         /* s now points to the newline after the heredoc terminator.
10083            d points to the newline before the body of the heredoc.
10084          */
10085
10086         /* We are going to modify linestr in place here, so set
10087            aside copies of the string if necessary for re-evals or
10088            (caller $n)[6]. */
10089         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10090            check shared->re_eval_str. */
10091         if (shared->re_eval_start || shared->re_eval_str) {
10092             /* Set aside the rest of the regexp */
10093             if (!shared->re_eval_str)
10094                 shared->re_eval_str =
10095                        newSVpvn(shared->re_eval_start,
10096                                 bufend - shared->re_eval_start);
10097             shared->re_eval_start -= s-d;
10098         }
10099         if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10100             CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10101             cx->blk_eval.cur_text == linestr)
10102         {
10103             cx->blk_eval.cur_text = newSVsv(linestr);
10104             SvSCREAM_on(cx->blk_eval.cur_text);
10105         }
10106         /* Copy everything from s onwards back to d. */
10107         Move(s,d,bufend-s + 1,char);
10108         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10109         /* Setting PL_bufend only applies when we have not dug deeper
10110            into other scopes, because sublex_done sets PL_bufend to
10111            SvEND(PL_linestr). */
10112         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10113         s = olds;
10114     }
10115     else
10116     {
10117       SV *linestr_save;
10118      streaming:
10119       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
10120       term = PL_tokenbuf[1];
10121       len--;
10122       linestr_save = PL_linestr; /* must restore this afterwards */
10123       d = s;                     /* and this */
10124       PL_linestr = newSVpvs("");
10125       PL_bufend = SvPVX(PL_linestr);
10126       while (1) {
10127 #ifdef PERL_MAD
10128         if (PL_madskills) {
10129             tstart = SvPVX(PL_linestr) + stuffstart;
10130             if (PL_thisstuff)
10131                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10132             else
10133                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10134         }
10135 #endif
10136         PL_bufptr = PL_bufend;
10137         CopLINE_set(PL_curcop,
10138                     PL_multi_start + shared->herelines);
10139         if (!lex_next_chunk(LEX_NO_TERM)
10140          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10141             SvREFCNT_dec(linestr_save);
10142             goto interminable;
10143         }
10144         CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10145         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10146             lex_grow_linestr(SvCUR(PL_linestr) + 2);
10147             sv_catpvs(PL_linestr, "\n\0");
10148         }
10149         s = PL_bufptr;
10150 #ifdef PERL_MAD
10151         stuffstart = s - SvPVX(PL_linestr);
10152 #endif
10153         shared->herelines++;
10154         PL_last_lop = PL_last_uni = NULL;
10155 #ifndef PERL_STRICT_CR
10156         if (PL_bufend - PL_linestart >= 2) {
10157             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10158                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10159             {
10160                 PL_bufend[-2] = '\n';
10161                 PL_bufend--;
10162                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10163             }
10164             else if (PL_bufend[-1] == '\r')
10165                 PL_bufend[-1] = '\n';
10166         }
10167         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10168             PL_bufend[-1] = '\n';
10169 #endif
10170         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10171             SvREFCNT_dec(PL_linestr);
10172             PL_linestr = linestr_save;
10173             PL_linestart = SvPVX(linestr_save);
10174             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10175             s = d;
10176             break;
10177         }
10178         else {
10179             sv_catsv(tmpstr,PL_linestr);
10180         }
10181       }
10182     }
10183     PL_multi_end = CopLINE(PL_curcop);
10184     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10185         SvPV_shrink_to_cur(tmpstr);
10186     }
10187     if (!IN_BYTES) {
10188         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10189             SvUTF8_on(tmpstr);
10190         else if (PL_encoding)
10191             sv_recode_to_utf8(tmpstr, PL_encoding);
10192     }
10193     PL_lex_stuff = tmpstr;
10194     pl_yylval.ival = op_type;
10195     return s;
10196
10197   interminable:
10198     SvREFCNT_dec(tmpstr);
10199     CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10200     missingterm(PL_tokenbuf + 1);
10201 }
10202
10203 /* scan_inputsymbol
10204    takes: current position in input buffer
10205    returns: new position in input buffer
10206    side-effects: pl_yylval and lex_op are set.
10207
10208    This code handles:
10209
10210    <>           read from ARGV
10211    <FH>         read from filehandle
10212    <pkg::FH>    read from package qualified filehandle
10213    <pkg'FH>     read from package qualified filehandle
10214    <$fh>        read from filehandle in $fh
10215    <*.h>        filename glob
10216
10217 */
10218
10219 STATIC char *
10220 S_scan_inputsymbol(pTHX_ char *start)
10221 {
10222     dVAR;
10223     char *s = start;            /* current position in buffer */
10224     char *end;
10225     I32 len;
10226     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10227     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10228
10229     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10230
10231     end = strchr(s, '\n');
10232     if (!end)
10233         end = PL_bufend;
10234     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
10235
10236     /* die if we didn't have space for the contents of the <>,
10237        or if it didn't end, or if we see a newline
10238     */
10239
10240     if (len >= (I32)sizeof PL_tokenbuf)
10241         Perl_croak(aTHX_ "Excessively long <> operator");
10242     if (s >= end)
10243         Perl_croak(aTHX_ "Unterminated <> operator");
10244
10245     s++;
10246
10247     /* check for <$fh>
10248        Remember, only scalar variables are interpreted as filehandles by
10249        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10250        treated as a glob() call.
10251        This code makes use of the fact that except for the $ at the front,
10252        a scalar variable and a filehandle look the same.
10253     */
10254     if (*d == '$' && d[1]) d++;
10255
10256     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10257     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10258         d += UTF ? UTF8SKIP(d) : 1;
10259
10260     /* If we've tried to read what we allow filehandles to look like, and
10261        there's still text left, then it must be a glob() and not a getline.
10262        Use scan_str to pull out the stuff between the <> and treat it
10263        as nothing more than a string.
10264     */
10265
10266     if (d - PL_tokenbuf != len) {
10267         pl_yylval.ival = OP_GLOB;
10268         s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
10269         if (!s)
10270            Perl_croak(aTHX_ "Glob not terminated");
10271         return s;
10272     }
10273     else {
10274         bool readline_overriden = FALSE;
10275         GV *gv_readline;
10276         GV **gvp;
10277         /* we're in a filehandle read situation */
10278         d = PL_tokenbuf;
10279
10280         /* turn <> into <ARGV> */
10281         if (!len)
10282             Copy("ARGV",d,5,char);
10283
10284         /* Check whether readline() is overriden */
10285         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10286         if ((gv_readline
10287                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10288                 ||
10289                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10290                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
10291                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10292             readline_overriden = TRUE;
10293
10294         /* if <$fh>, create the ops to turn the variable into a
10295            filehandle
10296         */
10297         if (*d == '$') {
10298             /* try to find it in the pad for this block, otherwise find
10299                add symbol table ops
10300             */
10301             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10302             if (tmp != NOT_IN_PAD) {
10303                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10304                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10305                     HEK * const stashname = HvNAME_HEK(stash);
10306                     SV * const sym = sv_2mortal(newSVhek(stashname));
10307                     sv_catpvs(sym, "::");
10308                     sv_catpv(sym, d+1);
10309                     d = SvPVX(sym);
10310                     goto intro_sym;
10311                 }
10312                 else {
10313                     OP * const o = newOP(OP_PADSV, 0);
10314                     o->op_targ = tmp;
10315                     PL_lex_op = readline_overriden
10316                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10317                                 op_append_elem(OP_LIST, o,
10318                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10319                         : (OP*)newUNOP(OP_READLINE, 0, o);
10320                 }
10321             }
10322             else {
10323                 GV *gv;
10324                 ++d;
10325 intro_sym:
10326                 gv = gv_fetchpv(d,
10327                                 (PL_in_eval
10328                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
10329                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10330                                 SVt_PV);
10331                 PL_lex_op = readline_overriden
10332                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10333                             op_append_elem(OP_LIST,
10334                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10335                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10336                     : (OP*)newUNOP(OP_READLINE, 0,
10337                             newUNOP(OP_RV2SV, 0,
10338                                 newGVOP(OP_GV, 0, gv)));
10339             }
10340             if (!readline_overriden)
10341                 PL_lex_op->op_flags |= OPf_SPECIAL;
10342             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10343             pl_yylval.ival = OP_NULL;
10344         }
10345
10346         /* If it's none of the above, it must be a literal filehandle
10347            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10348         else {
10349             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10350             PL_lex_op = readline_overriden
10351                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10352                         op_append_elem(OP_LIST,
10353                             newGVOP(OP_GV, 0, gv),
10354                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10355                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10356             pl_yylval.ival = OP_NULL;
10357         }
10358     }
10359
10360     return s;
10361 }
10362
10363
10364 /* scan_str
10365    takes: start position in buffer
10366           keep_quoted preserve \ on the embedded delimiter(s)
10367           keep_delims preserve the delimiters around the string
10368           re_reparse  compiling a run-time /(?{})/:
10369                         collapse // to /,  and skip encoding src
10370    returns: position to continue reading from buffer
10371    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10372         updates the read buffer.
10373
10374    This subroutine pulls a string out of the input.  It is called for:
10375         q               single quotes           q(literal text)
10376         '               single quotes           'literal text'
10377         qq              double quotes           qq(interpolate $here please)
10378         "               double quotes           "interpolate $here please"
10379         qx              backticks               qx(/bin/ls -l)
10380         `               backticks               `/bin/ls -l`
10381         qw              quote words             @EXPORT_OK = qw( func() $spam )
10382         m//             regexp match            m/this/
10383         s///            regexp substitute       s/this/that/
10384         tr///           string transliterate    tr/this/that/
10385         y///            string transliterate    y/this/that/
10386         ($*@)           sub prototypes          sub foo ($)
10387         (stuff)         sub attr parameters     sub foo : attr(stuff)
10388         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10389
10390    In most of these cases (all but <>, patterns and transliterate)
10391    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10392    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10393    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10394    calls scan_str().
10395
10396    It skips whitespace before the string starts, and treats the first
10397    character as the delimiter.  If the delimiter is one of ([{< then
10398    the corresponding "close" character )]}> is used as the closing
10399    delimiter.  It allows quoting of delimiters, and if the string has
10400    balanced delimiters ([{<>}]) it allows nesting.
10401
10402    On success, the SV with the resulting string is put into lex_stuff or,
10403    if that is already non-NULL, into lex_repl. The second case occurs only
10404    when parsing the RHS of the special constructs s/// and tr/// (y///).
10405    For convenience, the terminating delimiter character is stuffed into
10406    SvIVX of the SV.
10407 */
10408
10409 STATIC char *
10410 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10411         bool deprecate_escaped_meta /* Should we issue a deprecation warning
10412                                        for certain paired metacharacters that
10413                                        appear escaped within it */
10414     )
10415 {
10416     dVAR;
10417     SV *sv;                     /* scalar value: string */
10418     const char *tmps;           /* temp string, used for delimiter matching */
10419     char *s = start;            /* current position in the buffer */
10420     char term;                  /* terminating character */
10421     char *to;                   /* current position in the sv's data */
10422     I32 brackets = 1;           /* bracket nesting level */
10423     bool has_utf8 = FALSE;      /* is there any utf8 content? */
10424     I32 termcode;               /* terminating char. code */
10425     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
10426     STRLEN termlen;             /* length of terminating string */
10427     int last_off = 0;           /* last position for nesting bracket */
10428     char *escaped_open = NULL;
10429 #ifdef PERL_MAD
10430     int stuffstart;
10431     char *tstart;
10432 #endif
10433
10434     PERL_ARGS_ASSERT_SCAN_STR;
10435
10436     /* skip space before the delimiter */
10437     if (isSPACE(*s)) {
10438         s = PEEKSPACE(s);
10439     }
10440
10441 #ifdef PERL_MAD
10442     if (PL_realtokenstart >= 0) {
10443         stuffstart = PL_realtokenstart;
10444         PL_realtokenstart = -1;
10445     }
10446     else
10447         stuffstart = start - SvPVX(PL_linestr);
10448 #endif
10449     /* mark where we are, in case we need to report errors */
10450     CLINE;
10451
10452     /* after skipping whitespace, the next character is the terminator */
10453     term = *s;
10454     if (!UTF) {
10455         termcode = termstr[0] = term;
10456         termlen = 1;
10457     }
10458     else {
10459         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10460         Copy(s, termstr, termlen, U8);
10461         if (!UTF8_IS_INVARIANT(term))
10462             has_utf8 = TRUE;
10463     }
10464
10465     /* mark where we are */
10466     PL_multi_start = CopLINE(PL_curcop);
10467     PL_multi_open = term;
10468
10469     /* find corresponding closing delimiter */
10470     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10471         termcode = termstr[0] = term = tmps[5];
10472
10473     PL_multi_close = term;
10474
10475     /* A warning is raised if the input parameter requires it for escaped (by a
10476      * backslash) paired metacharacters {} [] and () when the delimiters are
10477      * those same characters, and the backslash is ineffective.  This doesn't
10478      * happen for <>, as they aren't metas. */
10479     if (deprecate_escaped_meta
10480         && (PL_multi_open == PL_multi_close
10481             || ! ckWARN_d(WARN_DEPRECATED)
10482             || PL_multi_open == '<'))
10483     {
10484         deprecate_escaped_meta = FALSE;
10485     }
10486
10487     /* create a new SV to hold the contents.  79 is the SV's initial length.
10488        What a random number. */
10489     sv = newSV_type(SVt_PVIV);
10490     SvGROW(sv, 80);
10491     SvIV_set(sv, termcode);
10492     (void)SvPOK_only(sv);               /* validate pointer */
10493
10494     /* move past delimiter and try to read a complete string */
10495     if (keep_delims)
10496         sv_catpvn(sv, s, termlen);
10497     s += termlen;
10498 #ifdef PERL_MAD
10499     tstart = SvPVX(PL_linestr) + stuffstart;
10500     if (PL_madskills && !PL_thisopen && !keep_delims) {
10501         PL_thisopen = newSVpvn(tstart, s - tstart);
10502         stuffstart = s - SvPVX(PL_linestr);
10503     }
10504 #endif
10505     for (;;) {
10506         if (PL_encoding && !UTF && !re_reparse) {
10507             bool cont = TRUE;
10508
10509             while (cont) {
10510                 int offset = s - SvPVX_const(PL_linestr);
10511                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10512                                            &offset, (char*)termstr, termlen);
10513                 const char * const ns = SvPVX_const(PL_linestr) + offset;
10514                 char * const svlast = SvEND(sv) - 1;
10515
10516                 for (; s < ns; s++) {
10517                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10518                         COPLINE_INC_WITH_HERELINES;
10519                 }
10520                 if (!found)
10521                     goto read_more_line;
10522                 else {
10523                     /* handle quoted delimiters */
10524                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10525                         const char *t;
10526                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10527                             t--;
10528                         if ((svlast-1 - t) % 2) {
10529                             if (!keep_quoted) {
10530                                 *(svlast-1) = term;
10531                                 *svlast = '\0';
10532                                 SvCUR_set(sv, SvCUR(sv) - 1);
10533                             }
10534                             continue;
10535                         }
10536                     }
10537                     if (PL_multi_open == PL_multi_close) {
10538                         cont = FALSE;
10539                     }
10540                     else {
10541                         const char *t;
10542                         char *w;
10543                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10544                             /* At here, all closes are "was quoted" one,
10545                                so we don't check PL_multi_close. */
10546                             if (*t == '\\') {
10547                                 if (!keep_quoted && *(t+1) == PL_multi_open)
10548                                     t++;
10549                                 else
10550                                     *w++ = *t++;
10551                             }
10552                             else if (*t == PL_multi_open)
10553                                 brackets++;
10554
10555                             *w = *t;
10556                         }
10557                         if (w < t) {
10558                             *w++ = term;
10559                             *w = '\0';
10560                             SvCUR_set(sv, w - SvPVX_const(sv));
10561                         }
10562                         last_off = w - SvPVX(sv);
10563                         if (--brackets <= 0)
10564                             cont = FALSE;
10565                     }
10566                 }
10567             }
10568             if (!keep_delims) {
10569                 SvCUR_set(sv, SvCUR(sv) - 1);
10570                 *SvEND(sv) = '\0';
10571             }
10572             break;
10573         }
10574
10575         /* extend sv if need be */
10576         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10577         /* set 'to' to the next character in the sv's string */
10578         to = SvPVX(sv)+SvCUR(sv);
10579
10580         /* if open delimiter is the close delimiter read unbridle */
10581         if (PL_multi_open == PL_multi_close) {
10582             for (; s < PL_bufend; s++,to++) {
10583                 /* embedded newlines increment the current line number */
10584                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10585                     COPLINE_INC_WITH_HERELINES;
10586                 /* handle quoted delimiters */
10587                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10588                     if (!keep_quoted
10589                         && (s[1] == term
10590                             || (re_reparse && s[1] == '\\'))
10591                     )
10592                         s++;
10593                     /* any other quotes are simply copied straight through */
10594                     else
10595                         *to++ = *s++;
10596                 }
10597                 /* terminate when run out of buffer (the for() condition), or
10598                    have found the terminator */
10599                 else if (*s == term) {
10600                     if (termlen == 1)
10601                         break;
10602                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10603                         break;
10604                 }
10605                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10606                     has_utf8 = TRUE;
10607                 *to = *s;
10608             }
10609         }
10610
10611         /* if the terminator isn't the same as the start character (e.g.,
10612            matched brackets), we have to allow more in the quoting, and
10613            be prepared for nested brackets.
10614         */
10615         else {
10616             /* read until we run out of string, or we find the terminator */
10617             for (; s < PL_bufend; s++,to++) {
10618                 /* embedded newlines increment the line count */
10619                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10620                     COPLINE_INC_WITH_HERELINES;
10621                 /* backslashes can escape the open or closing characters */
10622                 if (*s == '\\' && s+1 < PL_bufend) {
10623                     if (!keep_quoted &&
10624                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10625                     {
10626                         s++;
10627
10628                         /* Here, 'deprecate_escaped_meta' is true iff the
10629                          * delimiters are paired metacharacters, and 's' points
10630                          * to an occurrence of one of them within the string,
10631                          * which was preceded by a backslash.  If this is a
10632                          * context where the delimiter is also a metacharacter,
10633                          * the backslash is useless, and deprecated.  () and []
10634                          * are meta in any context. {} are meta only when
10635                          * appearing in a quantifier or in things like '\p{'.
10636                          * They also aren't meta unless there is a matching
10637                          * closed, escaped char later on within the string.
10638                          * If 's' points to an open, set a flag; if to a close,
10639                          * test that flag, and raise a warning if it was set */
10640
10641                         if (deprecate_escaped_meta) {
10642                             if (*s == PL_multi_open) {
10643                                 if (*s != '{') {
10644                                     escaped_open = s;
10645                                 }
10646                                 else if (regcurly(s,
10647                                                   TRUE /* Look for a closing
10648                                                           '\}' */)
10649                                          || (s - start > 2  /* Look for e.g.
10650                                                                '\x{' */
10651                                              && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
10652                                 {
10653                                     escaped_open = s;
10654                                 }
10655                             }
10656                             else if (escaped_open) {
10657                                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10658                                     "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10659                                 escaped_open = NULL;
10660                             }
10661                         }
10662                     }
10663                     else
10664                         *to++ = *s++;
10665                 }
10666                 /* allow nested opens and closes */
10667                 else if (*s == PL_multi_close && --brackets <= 0)
10668                     break;
10669                 else if (*s == PL_multi_open)
10670                     brackets++;
10671                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10672                     has_utf8 = TRUE;
10673                 *to = *s;
10674             }
10675         }
10676         /* terminate the copied string and update the sv's end-of-string */
10677         *to = '\0';
10678         SvCUR_set(sv, to - SvPVX_const(sv));
10679
10680         /*
10681          * this next chunk reads more into the buffer if we're not done yet
10682          */
10683
10684         if (s < PL_bufend)
10685             break;              /* handle case where we are done yet :-) */
10686
10687 #ifndef PERL_STRICT_CR
10688         if (to - SvPVX_const(sv) >= 2) {
10689             if ((to[-2] == '\r' && to[-1] == '\n') ||
10690                 (to[-2] == '\n' && to[-1] == '\r'))
10691             {
10692                 to[-2] = '\n';
10693                 to--;
10694                 SvCUR_set(sv, to - SvPVX_const(sv));
10695             }
10696             else if (to[-1] == '\r')
10697                 to[-1] = '\n';
10698         }
10699         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10700             to[-1] = '\n';
10701 #endif
10702
10703      read_more_line:
10704         /* if we're out of file, or a read fails, bail and reset the current
10705            line marker so we can report where the unterminated string began
10706         */
10707 #ifdef PERL_MAD
10708         if (PL_madskills) {
10709             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10710             if (PL_thisstuff)
10711                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10712             else
10713                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10714         }
10715 #endif
10716         COPLINE_INC_WITH_HERELINES;
10717         PL_bufptr = PL_bufend;
10718         if (!lex_next_chunk(0)) {
10719             sv_free(sv);
10720             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10721             return NULL;
10722         }
10723         s = PL_bufptr;
10724 #ifdef PERL_MAD
10725         stuffstart = 0;
10726 #endif
10727     }
10728
10729     /* at this point, we have successfully read the delimited string */
10730
10731     if (!PL_encoding || UTF || re_reparse) {
10732 #ifdef PERL_MAD
10733         if (PL_madskills) {
10734             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10735             const int len = s - tstart;
10736             if (PL_thisstuff)
10737                 sv_catpvn(PL_thisstuff, tstart, len);
10738             else
10739                 PL_thisstuff = newSVpvn(tstart, len);
10740             if (!PL_thisclose && !keep_delims)
10741                 PL_thisclose = newSVpvn(s,termlen);
10742         }
10743 #endif
10744
10745         if (keep_delims)
10746             sv_catpvn(sv, s, termlen);
10747         s += termlen;
10748     }
10749 #ifdef PERL_MAD
10750     else {
10751         if (PL_madskills) {
10752             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10753             const int len = s - tstart - termlen;
10754             if (PL_thisstuff)
10755                 sv_catpvn(PL_thisstuff, tstart, len);
10756             else
10757                 PL_thisstuff = newSVpvn(tstart, len);
10758             if (!PL_thisclose && !keep_delims)
10759                 PL_thisclose = newSVpvn(s - termlen,termlen);
10760         }
10761     }
10762 #endif
10763     if (has_utf8 || (PL_encoding && !re_reparse))
10764         SvUTF8_on(sv);
10765
10766     PL_multi_end = CopLINE(PL_curcop);
10767
10768     /* if we allocated too much space, give some back */
10769     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10770         SvLEN_set(sv, SvCUR(sv) + 1);
10771         SvPV_renew(sv, SvLEN(sv));
10772     }
10773
10774     /* decide whether this is the first or second quoted string we've read
10775        for this op
10776     */
10777
10778     if (PL_lex_stuff)
10779         PL_sublex_info.repl = sv;
10780     else
10781         PL_lex_stuff = sv;
10782     return s;
10783 }
10784
10785 /*
10786   scan_num
10787   takes: pointer to position in buffer
10788   returns: pointer to new position in buffer
10789   side-effects: builds ops for the constant in pl_yylval.op
10790
10791   Read a number in any of the formats that Perl accepts:
10792
10793   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10794   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10795   0b[01](_?[01])*
10796   0[0-7](_?[0-7])*
10797   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10798
10799   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10800   thing it reads.
10801
10802   If it reads a number without a decimal point or an exponent, it will
10803   try converting the number to an integer and see if it can do so
10804   without loss of precision.
10805 */
10806
10807 char *
10808 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10809 {
10810     dVAR;
10811     const char *s = start;      /* current position in buffer */
10812     char *d;                    /* destination in temp buffer */
10813     char *e;                    /* end of temp buffer */
10814     NV nv;                              /* number read, as a double */
10815     SV *sv = NULL;                      /* place to put the converted number */
10816     bool floatit;                       /* boolean: int or float? */
10817     const char *lastub = NULL;          /* position of last underbar */
10818     static const char* const number_too_long = "Number too long";
10819
10820     PERL_ARGS_ASSERT_SCAN_NUM;
10821
10822     /* We use the first character to decide what type of number this is */
10823
10824     switch (*s) {
10825     default:
10826         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10827
10828     /* if it starts with a 0, it could be an octal number, a decimal in
10829        0.13 disguise, or a hexadecimal number, or a binary number. */
10830     case '0':
10831         {
10832           /* variables:
10833              u          holds the "number so far"
10834              shift      the power of 2 of the base
10835                         (hex == 4, octal == 3, binary == 1)
10836              overflowed was the number more than we can hold?
10837
10838              Shift is used when we add a digit.  It also serves as an "are
10839              we in octal/hex/binary?" indicator to disallow hex characters
10840              when in octal mode.
10841            */
10842             NV n = 0.0;
10843             UV u = 0;
10844             I32 shift;
10845             bool overflowed = FALSE;
10846             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10847             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10848             static const char* const bases[5] =
10849               { "", "binary", "", "octal", "hexadecimal" };
10850             static const char* const Bases[5] =
10851               { "", "Binary", "", "Octal", "Hexadecimal" };
10852             static const char* const maxima[5] =
10853               { "",
10854                 "0b11111111111111111111111111111111",
10855                 "",
10856                 "037777777777",
10857                 "0xffffffff" };
10858             const char *base, *Base, *max;
10859
10860             /* check for hex */
10861             if (s[1] == 'x' || s[1] == 'X') {
10862                 shift = 4;
10863                 s += 2;
10864                 just_zero = FALSE;
10865             } else if (s[1] == 'b' || s[1] == 'B') {
10866                 shift = 1;
10867                 s += 2;
10868                 just_zero = FALSE;
10869             }
10870             /* check for a decimal in disguise */
10871             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10872                 goto decimal;
10873             /* so it must be octal */
10874             else {
10875                 shift = 3;
10876                 s++;
10877             }
10878
10879             if (*s == '_') {
10880                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10881                                "Misplaced _ in number");
10882                lastub = s++;
10883             }
10884
10885             base = bases[shift];
10886             Base = Bases[shift];
10887             max  = maxima[shift];
10888
10889             /* read the rest of the number */
10890             for (;;) {
10891                 /* x is used in the overflow test,
10892                    b is the digit we're adding on. */
10893                 UV x, b;
10894
10895                 switch (*s) {
10896
10897                 /* if we don't mention it, we're done */
10898                 default:
10899                     goto out;
10900
10901                 /* _ are ignored -- but warned about if consecutive */
10902                 case '_':
10903                     if (lastub && s == lastub + 1)
10904                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10905                                        "Misplaced _ in number");
10906                     lastub = s++;
10907                     break;
10908
10909                 /* 8 and 9 are not octal */
10910                 case '8': case '9':
10911                     if (shift == 3)
10912                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10913                     /* FALL THROUGH */
10914
10915                 /* octal digits */
10916                 case '2': case '3': case '4':
10917                 case '5': case '6': case '7':
10918                     if (shift == 1)
10919                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10920                     /* FALL THROUGH */
10921
10922                 case '0': case '1':
10923                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10924                     goto digit;
10925
10926                 /* hex digits */
10927                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10928                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10929                     /* make sure they said 0x */
10930                     if (shift != 4)
10931                         goto out;
10932                     b = (*s++ & 7) + 9;
10933
10934                     /* Prepare to put the digit we have onto the end
10935                        of the number so far.  We check for overflows.
10936                     */
10937
10938                   digit:
10939                     just_zero = FALSE;
10940                     if (!overflowed) {
10941                         x = u << shift; /* make room for the digit */
10942
10943                         if ((x >> shift) != u
10944                             && !(PL_hints & HINT_NEW_BINARY)) {
10945                             overflowed = TRUE;
10946                             n = (NV) u;
10947                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10948                                              "Integer overflow in %s number",
10949                                              base);
10950                         } else
10951                             u = x | b;          /* add the digit to the end */
10952                     }
10953                     if (overflowed) {
10954                         n *= nvshift[shift];
10955                         /* If an NV has not enough bits in its
10956                          * mantissa to represent an UV this summing of
10957                          * small low-order numbers is a waste of time
10958                          * (because the NV cannot preserve the
10959                          * low-order bits anyway): we could just
10960                          * remember when did we overflow and in the
10961                          * end just multiply n by the right
10962                          * amount. */
10963                         n += (NV) b;
10964                     }
10965                     break;
10966                 }
10967             }
10968
10969           /* if we get here, we had success: make a scalar value from
10970              the number.
10971           */
10972           out:
10973
10974             /* final misplaced underbar check */
10975             if (s[-1] == '_') {
10976                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10977             }
10978
10979             if (overflowed) {
10980                 if (n > 4294967295.0)
10981                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10982                                    "%s number > %s non-portable",
10983                                    Base, max);
10984                 sv = newSVnv(n);
10985             }
10986             else {
10987 #if UVSIZE > 4
10988                 if (u > 0xffffffff)
10989                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10990                                    "%s number > %s non-portable",
10991                                    Base, max);
10992 #endif
10993                 sv = newSVuv(u);
10994             }
10995             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10996                 sv = new_constant(start, s - start, "integer",
10997                                   sv, NULL, NULL, 0);
10998             else if (PL_hints & HINT_NEW_BINARY)
10999                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11000         }
11001         break;
11002
11003     /*
11004       handle decimal numbers.
11005       we're also sent here when we read a 0 as the first digit
11006     */
11007     case '1': case '2': case '3': case '4': case '5':
11008     case '6': case '7': case '8': case '9': case '.':
11009       decimal:
11010         d = PL_tokenbuf;
11011         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11012         floatit = FALSE;
11013
11014         /* read next group of digits and _ and copy into d */
11015         while (isDIGIT(*s) || *s == '_') {
11016             /* skip underscores, checking for misplaced ones
11017                if -w is on
11018             */
11019             if (*s == '_') {
11020                 if (lastub && s == lastub + 1)
11021                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11022                                    "Misplaced _ in number");
11023                 lastub = s++;
11024             }
11025             else {
11026                 /* check for end of fixed-length buffer */
11027                 if (d >= e)
11028                     Perl_croak(aTHX_ "%s", number_too_long);
11029                 /* if we're ok, copy the character */
11030                 *d++ = *s++;
11031             }
11032         }
11033
11034         /* final misplaced underbar check */
11035         if (lastub && s == lastub + 1) {
11036             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11037         }
11038
11039         /* read a decimal portion if there is one.  avoid
11040            3..5 being interpreted as the number 3. followed
11041            by .5
11042         */
11043         if (*s == '.' && s[1] != '.') {
11044             floatit = TRUE;
11045             *d++ = *s++;
11046
11047             if (*s == '_') {
11048                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11049                                "Misplaced _ in number");
11050                 lastub = s;
11051             }
11052
11053             /* copy, ignoring underbars, until we run out of digits.
11054             */
11055             for (; isDIGIT(*s) || *s == '_'; s++) {
11056                 /* fixed length buffer check */
11057                 if (d >= e)
11058                     Perl_croak(aTHX_ "%s", number_too_long);
11059                 if (*s == '_') {
11060                    if (lastub && s == lastub + 1)
11061                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11062                                       "Misplaced _ in number");
11063                    lastub = s;
11064                 }
11065                 else
11066                     *d++ = *s;
11067             }
11068             /* fractional part ending in underbar? */
11069             if (s[-1] == '_') {
11070                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11071                                "Misplaced _ in number");
11072             }
11073             if (*s == '.' && isDIGIT(s[1])) {
11074                 /* oops, it's really a v-string, but without the "v" */
11075                 s = start;
11076                 goto vstring;
11077             }
11078         }
11079
11080         /* read exponent part, if present */
11081         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11082             floatit = TRUE;
11083             s++;
11084
11085             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11086             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
11087
11088             /* stray preinitial _ */
11089             if (*s == '_') {
11090                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11091                                "Misplaced _ in number");
11092                 lastub = s++;
11093             }
11094
11095             /* allow positive or negative exponent */
11096             if (*s == '+' || *s == '-')
11097                 *d++ = *s++;
11098
11099             /* stray initial _ */
11100             if (*s == '_') {
11101                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11102                                "Misplaced _ in number");
11103                 lastub = s++;
11104             }
11105
11106             /* read digits of exponent */
11107             while (isDIGIT(*s) || *s == '_') {
11108                 if (isDIGIT(*s)) {
11109                     if (d >= e)
11110                         Perl_croak(aTHX_ "%s", number_too_long);
11111                     *d++ = *s++;
11112                 }
11113                 else {
11114                    if (((lastub && s == lastub + 1) ||
11115                         (!isDIGIT(s[1]) && s[1] != '_')))
11116                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11117                                       "Misplaced _ in number");
11118                    lastub = s++;
11119                 }
11120             }
11121         }
11122
11123
11124         /*
11125            We try to do an integer conversion first if no characters
11126            indicating "float" have been found.
11127          */
11128
11129         if (!floatit) {
11130             UV uv;
11131             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11132
11133             if (flags == IS_NUMBER_IN_UV) {
11134               if (uv <= IV_MAX)
11135                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11136               else
11137                 sv = newSVuv(uv);
11138             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11139               if (uv <= (UV) IV_MIN)
11140                 sv = newSViv(-(IV)uv);
11141               else
11142                 floatit = TRUE;
11143             } else
11144               floatit = TRUE;
11145         }
11146         if (floatit) {
11147             /* terminate the string */
11148             *d = '\0';
11149             nv = Atof(PL_tokenbuf);
11150             sv = newSVnv(nv);
11151         }
11152
11153         if ( floatit
11154              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11155             const char *const key = floatit ? "float" : "integer";
11156             const STRLEN keylen = floatit ? 5 : 7;
11157             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11158                                 key, keylen, sv, NULL, NULL, 0);
11159         }
11160         break;
11161
11162     /* if it starts with a v, it could be a v-string */
11163     case 'v':
11164 vstring:
11165                 sv = newSV(5); /* preallocate storage space */
11166                 ENTER_with_name("scan_vstring");
11167                 SAVEFREESV(sv);
11168                 s = scan_vstring(s, PL_bufend, sv);
11169                 SvREFCNT_inc_simple_void_NN(sv);
11170                 LEAVE_with_name("scan_vstring");
11171         break;
11172     }
11173
11174     /* make the op for the constant and return */
11175
11176     if (sv)
11177         lvalp->opval = newSVOP(OP_CONST, 0, sv);
11178     else
11179         lvalp->opval = NULL;
11180
11181     return (char *)s;
11182 }
11183
11184 STATIC char *
11185 S_scan_formline(pTHX_ char *s)
11186 {
11187     dVAR;
11188     char *eol;
11189     char *t;
11190     SV * const stuff = newSVpvs("");
11191     bool needargs = FALSE;
11192     bool eofmt = FALSE;
11193 #ifdef PERL_MAD
11194     char *tokenstart = s;
11195     SV* savewhite = NULL;
11196
11197     if (PL_madskills) {
11198         savewhite = PL_thiswhite;
11199         PL_thiswhite = 0;
11200     }
11201 #endif
11202
11203     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11204
11205     while (!needargs) {
11206         if (*s == '.') {
11207             t = s+1;
11208 #ifdef PERL_STRICT_CR
11209             while (SPACE_OR_TAB(*t))
11210                 t++;
11211 #else
11212             while (SPACE_OR_TAB(*t) || *t == '\r')
11213                 t++;
11214 #endif
11215             if (*t == '\n' || t == PL_bufend) {
11216                 eofmt = TRUE;
11217                 break;
11218             }
11219         }
11220         eol = (char *) memchr(s,'\n',PL_bufend-s);
11221         if (!eol++)
11222                 eol = PL_bufend;
11223         if (*s != '#') {
11224             for (t = s; t < eol; t++) {
11225                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11226                     needargs = FALSE;
11227                     goto enough;        /* ~~ must be first line in formline */
11228                 }
11229                 if (*t == '@' || *t == '^')
11230                     needargs = TRUE;
11231             }
11232             if (eol > s) {
11233                 sv_catpvn(stuff, s, eol-s);
11234 #ifndef PERL_STRICT_CR
11235                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11236                     char *end = SvPVX(stuff) + SvCUR(stuff);
11237                     end[-2] = '\n';
11238                     end[-1] = '\0';
11239                     SvCUR_set(stuff, SvCUR(stuff) - 1);
11240                 }
11241 #endif
11242             }
11243             else
11244               break;
11245         }
11246         s = (char*)eol;
11247         if ((PL_rsfp || PL_parser->filtered)
11248          && PL_parser->form_lex_state == LEX_NORMAL) {
11249             bool got_some;
11250 #ifdef PERL_MAD
11251             if (PL_madskills) {
11252                 if (PL_thistoken)
11253                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11254                 else
11255                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11256             }
11257 #endif
11258             PL_bufptr = PL_bufend;
11259             COPLINE_INC_WITH_HERELINES;
11260             got_some = lex_next_chunk(0);
11261             CopLINE_dec(PL_curcop);
11262             s = PL_bufptr;
11263 #ifdef PERL_MAD
11264             tokenstart = PL_bufptr;
11265 #endif
11266             if (!got_some)
11267                 break;
11268         }
11269         incline(s);
11270     }
11271   enough:
11272     if (!SvCUR(stuff) || needargs)
11273         PL_lex_state = PL_parser->form_lex_state;
11274     if (SvCUR(stuff)) {
11275         PL_expect = XSTATE;
11276         if (needargs) {
11277             start_force(PL_curforce);
11278             NEXTVAL_NEXTTOKE.ival = 0;
11279             force_next(FORMLBRACK);
11280         }
11281         if (!IN_BYTES) {
11282             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11283                 SvUTF8_on(stuff);
11284             else if (PL_encoding)
11285                 sv_recode_to_utf8(stuff, PL_encoding);
11286         }
11287         start_force(PL_curforce);
11288         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11289         force_next(THING);
11290     }
11291     else {
11292         SvREFCNT_dec(stuff);
11293         if (eofmt)
11294             PL_lex_formbrack = 0;
11295     }
11296 #ifdef PERL_MAD
11297     if (PL_madskills) {
11298         if (PL_thistoken)
11299             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11300         else
11301             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11302         PL_thiswhite = savewhite;
11303     }
11304 #endif
11305     return s;
11306 }
11307
11308 I32
11309 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11310 {
11311     dVAR;
11312     const I32 oldsavestack_ix = PL_savestack_ix;
11313     CV* const outsidecv = PL_compcv;
11314
11315     SAVEI32(PL_subline);
11316     save_item(PL_subname);
11317     SAVESPTR(PL_compcv);
11318
11319     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11320     CvFLAGS(PL_compcv) |= flags;
11321
11322     PL_subline = CopLINE(PL_curcop);
11323     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11324     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11325     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11326     if (outsidecv && CvPADLIST(outsidecv))
11327         CvPADLIST(PL_compcv)->xpadl_outid =
11328             PadlistNAMES(CvPADLIST(outsidecv));
11329
11330     return oldsavestack_ix;
11331 }
11332
11333 #ifdef __SC__
11334 #pragma segment Perl_yylex
11335 #endif
11336 static int
11337 S_yywarn(pTHX_ const char *const s, U32 flags)
11338 {
11339     dVAR;
11340
11341     PERL_ARGS_ASSERT_YYWARN;
11342
11343     PL_in_eval |= EVAL_WARNONLY;
11344     yyerror_pv(s, flags);
11345     PL_in_eval &= ~EVAL_WARNONLY;
11346     return 0;
11347 }
11348
11349 int
11350 Perl_yyerror(pTHX_ const char *const s)
11351 {
11352     PERL_ARGS_ASSERT_YYERROR;
11353     return yyerror_pvn(s, strlen(s), 0);
11354 }
11355
11356 int
11357 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11358 {
11359     PERL_ARGS_ASSERT_YYERROR_PV;
11360     return yyerror_pvn(s, strlen(s), flags);
11361 }
11362
11363 int
11364 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11365 {
11366     dVAR;
11367     const char *context = NULL;
11368     int contlen = -1;
11369     SV *msg;
11370     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11371     int yychar  = PL_parser->yychar;
11372
11373     PERL_ARGS_ASSERT_YYERROR_PVN;
11374
11375     if (!yychar || (yychar == ';' && !PL_rsfp))
11376         sv_catpvs(where_sv, "at EOF");
11377     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11378       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11379       PL_oldbufptr != PL_bufptr) {
11380         /*
11381                 Only for NetWare:
11382                 The code below is removed for NetWare because it abends/crashes on NetWare
11383                 when the script has error such as not having the closing quotes like:
11384                     if ($var eq "value)
11385                 Checking of white spaces is anyway done in NetWare code.
11386         */
11387 #ifndef NETWARE
11388         while (isSPACE(*PL_oldoldbufptr))
11389             PL_oldoldbufptr++;
11390 #endif
11391         context = PL_oldoldbufptr;
11392         contlen = PL_bufptr - PL_oldoldbufptr;
11393     }
11394     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11395       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11396         /*
11397                 Only for NetWare:
11398                 The code below is removed for NetWare because it abends/crashes on NetWare
11399                 when the script has error such as not having the closing quotes like:
11400                     if ($var eq "value)
11401                 Checking of white spaces is anyway done in NetWare code.
11402         */
11403 #ifndef NETWARE
11404         while (isSPACE(*PL_oldbufptr))
11405             PL_oldbufptr++;
11406 #endif
11407         context = PL_oldbufptr;
11408         contlen = PL_bufptr - PL_oldbufptr;
11409     }
11410     else if (yychar > 255)
11411         sv_catpvs(where_sv, "next token ???");
11412     else if (yychar == -2) { /* YYEMPTY */
11413         if (PL_lex_state == LEX_NORMAL ||
11414            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11415             sv_catpvs(where_sv, "at end of line");
11416         else if (PL_lex_inpat)
11417             sv_catpvs(where_sv, "within pattern");
11418         else
11419             sv_catpvs(where_sv, "within string");
11420     }
11421     else {
11422         sv_catpvs(where_sv, "next char ");
11423         if (yychar < 32)
11424             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11425         else if (isPRINT_LC(yychar)) {
11426             const char string = yychar;
11427             sv_catpvn(where_sv, &string, 1);
11428         }
11429         else
11430             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11431     }
11432     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11433     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11434         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11435     if (context)
11436         Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11437                              UTF8fARG(UTF, contlen, context));
11438     else
11439         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11440     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11441         Perl_sv_catpvf(aTHX_ msg,
11442         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11443                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11444         PL_multi_end = 0;
11445     }
11446     if (PL_in_eval & EVAL_WARNONLY) {
11447         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11448     }
11449     else
11450         qerror(msg);
11451     if (PL_error_count >= 10) {
11452         SV * errsv;
11453         if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11454             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11455                        SVfARG(errsv), OutCopFILE(PL_curcop));
11456         else
11457             Perl_croak(aTHX_ "%s has too many errors.\n",
11458             OutCopFILE(PL_curcop));
11459     }
11460     PL_in_my = 0;
11461     PL_in_my_stash = NULL;
11462     return 0;
11463 }
11464 #ifdef __SC__
11465 #pragma segment Main
11466 #endif
11467
11468 STATIC char*
11469 S_swallow_bom(pTHX_ U8 *s)
11470 {
11471     dVAR;
11472     const STRLEN slen = SvCUR(PL_linestr);
11473
11474     PERL_ARGS_ASSERT_SWALLOW_BOM;
11475
11476     switch (s[0]) {
11477     case 0xFF:
11478         if (s[1] == 0xFE) {
11479             /* UTF-16 little-endian? (or UTF-32LE?) */
11480             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11481                 /* diag_listed_as: Unsupported script encoding %s */
11482                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11483 #ifndef PERL_NO_UTF16_FILTER
11484             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11485             s += 2;
11486             if (PL_bufend > (char*)s) {
11487                 s = add_utf16_textfilter(s, TRUE);
11488             }
11489 #else
11490             /* diag_listed_as: Unsupported script encoding %s */
11491             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11492 #endif
11493         }
11494         break;
11495     case 0xFE:
11496         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11497 #ifndef PERL_NO_UTF16_FILTER
11498             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11499             s += 2;
11500             if (PL_bufend > (char *)s) {
11501                 s = add_utf16_textfilter(s, FALSE);
11502             }
11503 #else
11504             /* diag_listed_as: Unsupported script encoding %s */
11505             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11506 #endif
11507         }
11508         break;
11509     case 0xEF:
11510         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11511             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11512             s += 3;                      /* UTF-8 */
11513         }
11514         break;
11515     case 0:
11516         if (slen > 3) {
11517              if (s[1] == 0) {
11518                   if (s[2] == 0xFE && s[3] == 0xFF) {
11519                        /* UTF-32 big-endian */
11520                        /* diag_listed_as: Unsupported script encoding %s */
11521                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11522                   }
11523              }
11524              else if (s[2] == 0 && s[3] != 0) {
11525                   /* Leading bytes
11526                    * 00 xx 00 xx
11527                    * are a good indicator of UTF-16BE. */
11528 #ifndef PERL_NO_UTF16_FILTER
11529                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11530                   s = add_utf16_textfilter(s, FALSE);
11531 #else
11532                   /* diag_listed_as: Unsupported script encoding %s */
11533                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11534 #endif
11535              }
11536         }
11537 #ifdef EBCDIC
11538     case 0xDD:
11539         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11540             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11541             s += 4;                      /* UTF-8 */
11542         }
11543         break;
11544 #endif
11545
11546     default:
11547          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11548                   /* Leading bytes
11549                    * xx 00 xx 00
11550                    * are a good indicator of UTF-16LE. */
11551 #ifndef PERL_NO_UTF16_FILTER
11552               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11553               s = add_utf16_textfilter(s, TRUE);
11554 #else
11555               /* diag_listed_as: Unsupported script encoding %s */
11556               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11557 #endif
11558          }
11559     }
11560     return (char*)s;
11561 }
11562
11563
11564 #ifndef PERL_NO_UTF16_FILTER
11565 static I32
11566 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11567 {
11568     dVAR;
11569     SV *const filter = FILTER_DATA(idx);
11570     /* We re-use this each time round, throwing the contents away before we
11571        return.  */
11572     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11573     SV *const utf8_buffer = filter;
11574     IV status = IoPAGE(filter);
11575     const bool reverse = cBOOL(IoLINES(filter));
11576     I32 retval;
11577
11578     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11579
11580     /* As we're automatically added, at the lowest level, and hence only called
11581        from this file, we can be sure that we're not called in block mode. Hence
11582        don't bother writing code to deal with block mode.  */
11583     if (maxlen) {
11584         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11585     }
11586     if (status < 0) {
11587         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11588     }
11589     DEBUG_P(PerlIO_printf(Perl_debug_log,
11590                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11591                           FPTR2DPTR(void *, S_utf16_textfilter),
11592                           reverse ? 'l' : 'b', idx, maxlen, status,
11593                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11594
11595     while (1) {
11596         STRLEN chars;
11597         STRLEN have;
11598         I32 newlen;
11599         U8 *end;
11600         /* First, look in our buffer of existing UTF-8 data:  */
11601         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11602
11603         if (nl) {
11604             ++nl;
11605         } else if (status == 0) {
11606             /* EOF */
11607             IoPAGE(filter) = 0;
11608             nl = SvEND(utf8_buffer);
11609         }
11610         if (nl) {
11611             STRLEN got = nl - SvPVX(utf8_buffer);
11612             /* Did we have anything to append?  */
11613             retval = got != 0;
11614             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11615             /* Everything else in this code works just fine if SVp_POK isn't
11616                set.  This, however, needs it, and we need it to work, else
11617                we loop infinitely because the buffer is never consumed.  */
11618             sv_chop(utf8_buffer, nl);
11619             break;
11620         }
11621
11622         /* OK, not a complete line there, so need to read some more UTF-16.
11623            Read an extra octect if the buffer currently has an odd number. */
11624         while (1) {
11625             if (status <= 0)
11626                 break;
11627             if (SvCUR(utf16_buffer) >= 2) {
11628                 /* Location of the high octet of the last complete code point.
11629                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11630                    *coupled* with all the benefits of partial reads and
11631                    endianness.  */
11632                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11633                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11634
11635                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11636                     break;
11637                 }
11638
11639                 /* We have the first half of a surrogate. Read more.  */
11640                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11641             }
11642
11643             status = FILTER_READ(idx + 1, utf16_buffer,
11644                                  160 + (SvCUR(utf16_buffer) & 1));
11645             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11646             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11647             if (status < 0) {
11648                 /* Error */
11649                 IoPAGE(filter) = status;
11650                 return status;
11651             }
11652         }
11653
11654         chars = SvCUR(utf16_buffer) >> 1;
11655         have = SvCUR(utf8_buffer);
11656         SvGROW(utf8_buffer, have + chars * 3 + 1);
11657
11658         if (reverse) {
11659             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11660                                          (U8*)SvPVX_const(utf8_buffer) + have,
11661                                          chars * 2, &newlen);
11662         } else {
11663             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11664                                 (U8*)SvPVX_const(utf8_buffer) + have,
11665                                 chars * 2, &newlen);
11666         }
11667         SvCUR_set(utf8_buffer, have + newlen);
11668         *end = '\0';
11669
11670         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11671            it's private to us, and utf16_to_utf8{,reversed} take a
11672            (pointer,length) pair, rather than a NUL-terminated string.  */
11673         if(SvCUR(utf16_buffer) & 1) {
11674             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11675             SvCUR_set(utf16_buffer, 1);
11676         } else {
11677             SvCUR_set(utf16_buffer, 0);
11678         }
11679     }
11680     DEBUG_P(PerlIO_printf(Perl_debug_log,
11681                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11682                           status,
11683                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11684     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11685     return retval;
11686 }
11687
11688 static U8 *
11689 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11690 {
11691     SV *filter = filter_add(S_utf16_textfilter, NULL);
11692
11693     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11694
11695     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11696     sv_setpvs(filter, "");
11697     IoLINES(filter) = reversed;
11698     IoPAGE(filter) = 1; /* Not EOF */
11699
11700     /* Sadly, we have to return a valid pointer, come what may, so we have to
11701        ignore any error return from this.  */
11702     SvCUR_set(PL_linestr, 0);
11703     if (FILTER_READ(0, PL_linestr, 0)) {
11704         SvUTF8_on(PL_linestr);
11705     } else {
11706         SvUTF8_on(PL_linestr);
11707     }
11708     PL_bufend = SvEND(PL_linestr);
11709     return (U8*)SvPVX(PL_linestr);
11710 }
11711 #endif
11712
11713 /*
11714 Returns a pointer to the next character after the parsed
11715 vstring, as well as updating the passed in sv.
11716
11717 Function must be called like
11718
11719         sv = sv_2mortal(newSV(5));
11720         s = scan_vstring(s,e,sv);
11721
11722 where s and e are the start and end of the string.
11723 The sv should already be large enough to store the vstring
11724 passed in, for performance reasons.
11725
11726 This function may croak if fatal warnings are enabled in the
11727 calling scope, hence the sv_2mortal in the example (to prevent
11728 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11729 sv_2mortal.
11730
11731 */
11732
11733 char *
11734 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11735 {
11736     dVAR;
11737     const char *pos = s;
11738     const char *start = s;
11739
11740     PERL_ARGS_ASSERT_SCAN_VSTRING;
11741
11742     if (*pos == 'v') pos++;  /* get past 'v' */
11743     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11744         pos++;
11745     if ( *pos != '.') {
11746         /* this may not be a v-string if followed by => */
11747         const char *next = pos;
11748         while (next < e && isSPACE(*next))
11749             ++next;
11750         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11751             /* return string not v-string */
11752             sv_setpvn(sv,(char *)s,pos-s);
11753             return (char *)pos;
11754         }
11755     }
11756
11757     if (!isALPHA(*pos)) {
11758         U8 tmpbuf[UTF8_MAXBYTES+1];
11759
11760         if (*s == 'v')
11761             s++;  /* get past 'v' */
11762
11763         sv_setpvs(sv, "");
11764
11765         for (;;) {
11766             /* this is atoi() that tolerates underscores */
11767             U8 *tmpend;
11768             UV rev = 0;
11769             const char *end = pos;
11770             UV mult = 1;
11771             while (--end >= s) {
11772                 if (*end != '_') {
11773                     const UV orev = rev;
11774                     rev += (*end - '0') * mult;
11775                     mult *= 10;
11776                     if (orev > rev)
11777                         /* diag_listed_as: Integer overflow in %s number */
11778                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11779                                          "Integer overflow in decimal number");
11780                 }
11781             }
11782 #ifdef EBCDIC
11783             if (rev > 0x7FFFFFFF)
11784                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11785 #endif
11786             /* Append native character for the rev point */
11787             tmpend = uvchr_to_utf8(tmpbuf, rev);
11788             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11789             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11790                  SvUTF8_on(sv);
11791             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11792                  s = ++pos;
11793             else {
11794                  s = pos;
11795                  break;
11796             }
11797             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11798                  pos++;
11799         }
11800         SvPOK_on(sv);
11801         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11802         SvRMAGICAL_on(sv);
11803     }
11804     return (char *)s;
11805 }
11806
11807 int
11808 Perl_keyword_plugin_standard(pTHX_
11809         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11810 {
11811     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11812     PERL_UNUSED_CONTEXT;
11813     PERL_UNUSED_ARG(keyword_ptr);
11814     PERL_UNUSED_ARG(keyword_len);
11815     PERL_UNUSED_ARG(op_ptr);
11816     return KEYWORD_PLUGIN_DECLINE;
11817 }
11818
11819 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11820 static void
11821 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11822 {
11823     SAVEI32(PL_lex_brackets);
11824     if (PL_lex_brackets > 100)
11825         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11826     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11827     SAVEI32(PL_lex_allbrackets);
11828     PL_lex_allbrackets = 0;
11829     SAVEI8(PL_lex_fakeeof);
11830     PL_lex_fakeeof = (U8)fakeeof;
11831     if(yyparse(gramtype) && !PL_parser->error_count)
11832         qerror(Perl_mess(aTHX_ "Parse error"));
11833 }
11834
11835 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11836 static OP *
11837 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11838 {
11839     OP *o;
11840     ENTER;
11841     SAVEVPTR(PL_eval_root);
11842     PL_eval_root = NULL;
11843     parse_recdescent(gramtype, fakeeof);
11844     o = PL_eval_root;
11845     LEAVE;
11846     return o;
11847 }
11848
11849 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11850 static OP *
11851 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11852 {
11853     OP *exprop;
11854     if (flags & ~PARSE_OPTIONAL)
11855         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11856     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11857     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11858         if (!PL_parser->error_count)
11859             qerror(Perl_mess(aTHX_ "Parse error"));
11860         exprop = newOP(OP_NULL, 0);
11861     }
11862     return exprop;
11863 }
11864
11865 /*
11866 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11867
11868 Parse a Perl arithmetic expression.  This may contain operators of precedence
11869 down to the bit shift operators.  The expression must be followed (and thus
11870 terminated) either by a comparison or lower-precedence operator or by
11871 something that would normally terminate an expression such as semicolon.
11872 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11873 otherwise it is mandatory.  It is up to the caller to ensure that the
11874 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11875 the source of the code to be parsed and the lexical context for the
11876 expression.
11877
11878 The op tree representing the expression is returned.  If an optional
11879 expression is absent, a null pointer is returned, otherwise the pointer
11880 will be non-null.
11881
11882 If an error occurs in parsing or compilation, in most cases a valid op
11883 tree is returned anyway.  The error is reflected in the parser state,
11884 normally resulting in a single exception at the top level of parsing
11885 which covers all the compilation errors that occurred.  Some compilation
11886 errors, however, will throw an exception immediately.
11887
11888 =cut
11889 */
11890
11891 OP *
11892 Perl_parse_arithexpr(pTHX_ U32 flags)
11893 {
11894     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11895 }
11896
11897 /*
11898 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11899
11900 Parse a Perl term expression.  This may contain operators of precedence
11901 down to the assignment operators.  The expression must be followed (and thus
11902 terminated) either by a comma or lower-precedence operator or by
11903 something that would normally terminate an expression such as semicolon.
11904 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11905 otherwise it is mandatory.  It is up to the caller to ensure that the
11906 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11907 the source of the code to be parsed and the lexical context for the
11908 expression.
11909
11910 The op tree representing the expression is returned.  If an optional
11911 expression is absent, a null pointer is returned, otherwise the pointer
11912 will be non-null.
11913
11914 If an error occurs in parsing or compilation, in most cases a valid op
11915 tree is returned anyway.  The error is reflected in the parser state,
11916 normally resulting in a single exception at the top level of parsing
11917 which covers all the compilation errors that occurred.  Some compilation
11918 errors, however, will throw an exception immediately.
11919
11920 =cut
11921 */
11922
11923 OP *
11924 Perl_parse_termexpr(pTHX_ U32 flags)
11925 {
11926     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11927 }
11928
11929 /*
11930 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11931
11932 Parse a Perl list expression.  This may contain operators of precedence
11933 down to the comma operator.  The expression must be followed (and thus
11934 terminated) either by a low-precedence logic operator such as C<or> or by
11935 something that would normally terminate an expression such as semicolon.
11936 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11937 otherwise it is mandatory.  It is up to the caller to ensure that the
11938 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11939 the source of the code to be parsed and the lexical context for the
11940 expression.
11941
11942 The op tree representing the expression is returned.  If an optional
11943 expression is absent, a null pointer is returned, otherwise the pointer
11944 will be non-null.
11945
11946 If an error occurs in parsing or compilation, in most cases a valid op
11947 tree is returned anyway.  The error is reflected in the parser state,
11948 normally resulting in a single exception at the top level of parsing
11949 which covers all the compilation errors that occurred.  Some compilation
11950 errors, however, will throw an exception immediately.
11951
11952 =cut
11953 */
11954
11955 OP *
11956 Perl_parse_listexpr(pTHX_ U32 flags)
11957 {
11958     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11959 }
11960
11961 /*
11962 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11963
11964 Parse a single complete Perl expression.  This allows the full
11965 expression grammar, including the lowest-precedence operators such
11966 as C<or>.  The expression must be followed (and thus terminated) by a
11967 token that an expression would normally be terminated by: end-of-file,
11968 closing bracketing punctuation, semicolon, or one of the keywords that
11969 signals a postfix expression-statement modifier.  If I<flags> includes
11970 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11971 mandatory.  It is up to the caller to ensure that the dynamic parser
11972 state (L</PL_parser> et al) is correctly set to reflect the source of
11973 the code to be parsed and the lexical context for the expression.
11974
11975 The op tree representing the expression is returned.  If an optional
11976 expression is absent, a null pointer is returned, otherwise the pointer
11977 will be non-null.
11978
11979 If an error occurs in parsing or compilation, in most cases a valid op
11980 tree is returned anyway.  The error is reflected in the parser state,
11981 normally resulting in a single exception at the top level of parsing
11982 which covers all the compilation errors that occurred.  Some compilation
11983 errors, however, will throw an exception immediately.
11984
11985 =cut
11986 */
11987
11988 OP *
11989 Perl_parse_fullexpr(pTHX_ U32 flags)
11990 {
11991     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11992 }
11993
11994 /*
11995 =for apidoc Amx|OP *|parse_block|U32 flags
11996
11997 Parse a single complete Perl code block.  This consists of an opening
11998 brace, a sequence of statements, and a closing brace.  The block
11999 constitutes a lexical scope, so C<my> variables and various compile-time
12000 effects can be contained within it.  It is up to the caller to ensure
12001 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12002 reflect the source of the code to be parsed and the lexical context for
12003 the statement.
12004
12005 The op tree representing the code block is returned.  This is always a
12006 real op, never a null pointer.  It will normally be a C<lineseq> list,
12007 including C<nextstate> or equivalent ops.  No ops to construct any kind
12008 of runtime scope are included by virtue of it being a block.
12009
12010 If an error occurs in parsing or compilation, in most cases a valid op
12011 tree (most likely null) is returned anyway.  The error is reflected in
12012 the parser state, normally resulting in a single exception at the top
12013 level of parsing which covers all the compilation errors that occurred.
12014 Some compilation errors, however, will throw an exception immediately.
12015
12016 The I<flags> parameter is reserved for future use, and must always
12017 be zero.
12018
12019 =cut
12020 */
12021
12022 OP *
12023 Perl_parse_block(pTHX_ U32 flags)
12024 {
12025     if (flags)
12026         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12027     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12028 }
12029
12030 /*
12031 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12032
12033 Parse a single unadorned Perl statement.  This may be a normal imperative
12034 statement or a declaration that has compile-time effect.  It does not
12035 include any label or other affixture.  It is up to the caller to ensure
12036 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12037 reflect the source of the code to be parsed and the lexical context for
12038 the statement.
12039
12040 The op tree representing the statement is returned.  This may be a
12041 null pointer if the statement is null, for example if it was actually
12042 a subroutine definition (which has compile-time side effects).  If not
12043 null, it will be ops directly implementing the statement, suitable to
12044 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12045 equivalent op (except for those embedded in a scope contained entirely
12046 within the statement).
12047
12048 If an error occurs in parsing or compilation, in most cases a valid op
12049 tree (most likely null) is returned anyway.  The error is reflected in
12050 the parser state, normally resulting in a single exception at the top
12051 level of parsing which covers all the compilation errors that occurred.
12052 Some compilation errors, however, will throw an exception immediately.
12053
12054 The I<flags> parameter is reserved for future use, and must always
12055 be zero.
12056
12057 =cut
12058 */
12059
12060 OP *
12061 Perl_parse_barestmt(pTHX_ U32 flags)
12062 {
12063     if (flags)
12064         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12065     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12066 }
12067
12068 /*
12069 =for apidoc Amx|SV *|parse_label|U32 flags
12070
12071 Parse a single label, possibly optional, of the type that may prefix a
12072 Perl statement.  It is up to the caller to ensure that the dynamic parser
12073 state (L</PL_parser> et al) is correctly set to reflect the source of
12074 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
12075 label is optional, otherwise it is mandatory.
12076
12077 The name of the label is returned in the form of a fresh scalar.  If an
12078 optional label is absent, a null pointer is returned.
12079
12080 If an error occurs in parsing, which can only occur if the label is
12081 mandatory, a valid label is returned anyway.  The error is reflected in
12082 the parser state, normally resulting in a single exception at the top
12083 level of parsing which covers all the compilation errors that occurred.
12084
12085 =cut
12086 */
12087
12088 SV *
12089 Perl_parse_label(pTHX_ U32 flags)
12090 {
12091     if (flags & ~PARSE_OPTIONAL)
12092         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12093     if (PL_lex_state == LEX_KNOWNEXT) {
12094         PL_parser->yychar = yylex();
12095         if (PL_parser->yychar == LABEL) {
12096             char * const lpv = pl_yylval.pval;
12097             STRLEN llen = strlen(lpv);
12098             PL_parser->yychar = YYEMPTY;
12099             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12100         } else {
12101             yyunlex();
12102             goto no_label;
12103         }
12104     } else {
12105         char *s, *t;
12106         STRLEN wlen, bufptr_pos;
12107         lex_read_space(0);
12108         t = s = PL_bufptr;
12109         if (!isIDFIRST_lazy_if(s, UTF))
12110             goto no_label;
12111         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12112         if (word_takes_any_delimeter(s, wlen))
12113             goto no_label;
12114         bufptr_pos = s - SvPVX(PL_linestr);
12115         PL_bufptr = t;
12116         lex_read_space(LEX_KEEP_PREVIOUS);
12117         t = PL_bufptr;
12118         s = SvPVX(PL_linestr) + bufptr_pos;
12119         if (t[0] == ':' && t[1] != ':') {
12120             PL_oldoldbufptr = PL_oldbufptr;
12121             PL_oldbufptr = s;
12122             PL_bufptr = t+1;
12123             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12124         } else {
12125             PL_bufptr = s;
12126             no_label:
12127             if (flags & PARSE_OPTIONAL) {
12128                 return NULL;
12129             } else {
12130                 qerror(Perl_mess(aTHX_ "Parse error"));
12131                 return newSVpvs("x");
12132             }
12133         }
12134     }
12135 }
12136
12137 /*
12138 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12139
12140 Parse a single complete Perl statement.  This may be a normal imperative
12141 statement or a declaration that has compile-time effect, and may include
12142 optional labels.  It is up to the caller to ensure that the dynamic
12143 parser state (L</PL_parser> et al) is correctly set to reflect the source
12144 of the code to be parsed and the lexical context for the statement.
12145
12146 The op tree representing the statement is returned.  This may be a
12147 null pointer if the statement is null, for example if it was actually
12148 a subroutine definition (which has compile-time side effects).  If not
12149 null, it will be the result of a L</newSTATEOP> call, normally including
12150 a C<nextstate> or equivalent op.
12151
12152 If an error occurs in parsing or compilation, in most cases a valid op
12153 tree (most likely null) is returned anyway.  The error is reflected in
12154 the parser state, normally resulting in a single exception at the top
12155 level of parsing which covers all the compilation errors that occurred.
12156 Some compilation errors, however, will throw an exception immediately.
12157
12158 The I<flags> parameter is reserved for future use, and must always
12159 be zero.
12160
12161 =cut
12162 */
12163
12164 OP *
12165 Perl_parse_fullstmt(pTHX_ U32 flags)
12166 {
12167     if (flags)
12168         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12169     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12170 }
12171
12172 /*
12173 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12174
12175 Parse a sequence of zero or more Perl statements.  These may be normal
12176 imperative statements, including optional labels, or declarations
12177 that have compile-time effect, or any mixture thereof.  The statement
12178 sequence ends when a closing brace or end-of-file is encountered in a
12179 place where a new statement could have validly started.  It is up to
12180 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12181 is correctly set to reflect the source of the code to be parsed and the
12182 lexical context for the statements.
12183
12184 The op tree representing the statement sequence is returned.  This may
12185 be a null pointer if the statements were all null, for example if there
12186 were no statements or if there were only subroutine definitions (which
12187 have compile-time side effects).  If not null, it will be a C<lineseq>
12188 list, normally including C<nextstate> or equivalent ops.
12189
12190 If an error occurs in parsing or compilation, in most cases a valid op
12191 tree is returned anyway.  The error is reflected in the parser state,
12192 normally resulting in a single exception at the top level of parsing
12193 which covers all the compilation errors that occurred.  Some compilation
12194 errors, however, will throw an exception immediately.
12195
12196 The I<flags> parameter is reserved for future use, and must always
12197 be zero.
12198
12199 =cut
12200 */
12201
12202 OP *
12203 Perl_parse_stmtseq(pTHX_ U32 flags)
12204 {
12205     OP *stmtseqop;
12206     I32 c;
12207     if (flags)
12208         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12209     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12210     c = lex_peek_unichar(0);
12211     if (c != -1 && c != /*{*/'}')
12212         qerror(Perl_mess(aTHX_ "Parse error"));
12213     return stmtseqop;
12214 }
12215
12216 /*
12217  * Local variables:
12218  * c-indentation-style: bsd
12219  * c-basic-offset: 4
12220  * indent-tabs-mode: nil
12221  * End:
12222  *
12223  * ex: set ts=8 sts=4 sw=4 et:
12224  */