toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_preambled            (PL_parser->preambled)
  70 #define PL_sublex_info          (PL_parser->sublex_info)
  71 #define PL_linestr              (PL_parser->linestr)
  72 #define PL_expect               (PL_parser->expect)
  73 #define PL_copline              (PL_parser->copline)
  74 #define PL_bufptr               (PL_parser->bufptr)
  75 #define PL_oldbufptr            (PL_parser->oldbufptr)
  76 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  77 #define PL_linestart            (PL_parser->linestart)
  78 #define PL_bufend               (PL_parser->bufend)
  79 #define PL_last_uni             (PL_parser->last_uni)
  80 #define PL_last_lop             (PL_parser->last_lop)
  81 #define PL_last_lop_op          (PL_parser->last_lop_op)
  82 #define PL_lex_state            (PL_parser->lex_state)
  83 #define PL_rsfp                 (PL_parser->rsfp)
  84 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  85 #define PL_in_my                (PL_parser->in_my)
  86 #define PL_in_my_stash          (PL_parser->in_my_stash)
  87 #define PL_tokenbuf             (PL_parser->tokenbuf)
  88 #define PL_multi_end            (PL_parser->multi_end)
  89 #define PL_error_count          (PL_parser->error_count)
  90
  91 #ifdef PERL_MAD
  92 #  define PL_endwhite           (PL_parser->endwhite)
  93 #  define PL_faketokens         (PL_parser->faketokens)
  94 #  define PL_lasttoke           (PL_parser->lasttoke)
  95 #  define PL_nextwhite          (PL_parser->nextwhite)
  96 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  97 #  define PL_skipwhite          (PL_parser->skipwhite)
  98 #  define PL_thisclose          (PL_parser->thisclose)
  99 #  define PL_thismad            (PL_parser->thismad)
 100 #  define PL_thisopen           (PL_parser->thisopen)
 101 #  define PL_thisstuff          (PL_parser->thisstuff)
 102 #  define PL_thistoken          (PL_parser->thistoken)
 103 #  define PL_thiswhite          (PL_parser->thiswhite)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_nexttoke           (PL_parser->nexttoke)
 106 #  define PL_curforce           (PL_parser->curforce)
 107 #else
 108 #  define PL_nexttoke           (PL_parser->nexttoke)
 109 #  define PL_nexttype           (PL_parser->nexttype)
 110 #  define PL_nextval            (PL_parser->nextval)
 111 #endif
 112
 113 static const char* const ident_too_long = "Identifier too long";
 114
 115 #ifdef PERL_MAD
 116 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 117 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 118 #else
 119 #  define CURMAD(slot,sv)
 120 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 121 #endif
 122
 123 #define XENUMMASK  0x3f
 124 #define XFAKEEOF   0x40
 125 #define XFAKEBRACK 0x80
 126
 127 #ifdef USE_UTF8_SCRIPTS
 128 #   define UTF (!IN_BYTES)
 129 #else
 130 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 131 #endif
 132
 133 /* The maximum number of characters preceding the unrecognized one to display */
 134 #define UNRECOGNIZED_PRECEDE_COUNT 10
 135
 136 /* In variables named $^X, these are the legal values for X.
 137  * 1999-02-27 mjd-perl-patch@plover.com */
 138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 139
 140 #define SPACE_OR_TAB(c) isBLANK_A(c)
 141
 142 /* LEX_* are values for PL_lex_state, the state of the lexer.
 143  * They are arranged oddly so that the guard on the switch statement
 144  * can get by with a single comparison (if the compiler is smart enough).
 145  *
 146  * These values refer to the various states within a sublex parse,
 147  * i.e. within a double quotish string
 148  */
 149
 150 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 151
 152 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 153 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 154 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 155 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 156 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 157
 158                                    /* at end of code, eg "$x" followed by:  */
 159 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 160 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 161
 162 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 163                                         string or after \E, $foo, etc       */
 164 #define LEX_INTERPCONST          2 /* NOT USED */
 165 #define LEX_FORMLINE             1 /* expecting a format line               */
 166 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 167
 168
 169 #ifdef DEBUGGING
 170 static const char* const lex_state_names[] = {
 171     "KNOWNEXT",
 172     "FORMLINE",
 173     "INTERPCONST",
 174     "INTERPCONCAT",
 175     "INTERPENDMAYBE",
 176     "INTERPEND",
 177     "INTERPSTART",
 178     "INTERPPUSH",
 179     "INTERPCASEMOD",
 180     "INTERPNORMAL",
 181     "NORMAL"
 182 };
 183 #endif
 184
 185 #ifdef ff_next
 186 #undef ff_next
 187 #endif
 188
 189 #include "keywords.h"
 190
 191 /* CLINE is a macro that ensures PL_copline has a sane value */
 192
 193 #ifdef CLINE
 194 #undef CLINE
 195 #endif
 196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 197
 198 #ifdef PERL_MAD
 199 #  define SKIPSPACE0(s) skipspace0(s)
 200 #  define SKIPSPACE1(s) skipspace1(s)
 201 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 202 #  define PEEKSPACE(s) skipspace2(s,0)
 203 #else
 204 #  define SKIPSPACE0(s) skipspace(s)
 205 #  define SKIPSPACE1(s) skipspace(s)
 206 #  define SKIPSPACE2(s,tsv) skipspace(s)
 207 #  define PEEKSPACE(s) skipspace(s)
 208 #endif
 209
 210 /*
 211  * Convenience functions to return different tokens and prime the
 212  * lexer for the next token.  They all take an argument.
 213  *
 214  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 215  * OPERATOR     : generic operator
 216  * AOPERATOR    : assignment operator
 217  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 218  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 219  * PREREF       : *EXPR where EXPR is not a simple identifier
 220  * TERM         : expression term
 221  * LOOPX        : loop exiting command (goto, last, dump, etc)
 222  * FTST         : file test operator
 223  * FUN0         : zero-argument function
 224  * FUN0OP       : zero-argument function, with its op created in this file
 225  * FUN1         : not used, except for not, which isn't a UNIOP
 226  * BOop         : bitwise or or xor
 227  * BAop         : bitwise and
 228  * SHop         : shift operator
 229  * PWop         : power operator
 230  * PMop         : pattern-matching operator
 231  * Aop          : addition-level operator
 232  * Mop          : multiplication-level operator
 233  * Eop          : equality-testing operator
 234  * Rop          : relational operator <= != gt
 235  *
 236  * Also see LOP and lop() below.
 237  */
 238
 239 #ifdef DEBUGGING /* Serve -DT. */
 240 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 241 #else
 242 #   define REPORT(retval) (retval)
 243 #endif
 244
 245 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 246 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 247 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 248 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 249 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 251 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 252 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 253 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 254 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 255 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI3(f,x,have_x) { \
 273         pl_yylval.ival = f; \
 274         if (have_x) PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI3(f,XTERM,1)
 284 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 285 #define UNIPROTO(f,optional) { \
 286         if (optional) PL_last_uni = PL_oldbufptr; \
 287         OPERATOR(f); \
 288         }
 289
 290 #define UNIBRACK(f) UNI3(f,0,0)
 291
 292 /* grandfather return to old style */
 293 #define OLDLOP(f) \
 294         do { \
 295             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 296                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 297             pl_yylval.ival = (f); \
 298             PL_expect = XTERM; \
 299             PL_bufptr = s; \
 300             return (int)LSTOP; \
 301         } while(0)
 302
 303 #define COPLINE_INC_WITH_HERELINES                  \
 304     STMT_START {                                     \
 305         CopLINE_inc(PL_curcop);                       \
 306         if (PL_parser->lex_shared->herelines)          \
 307             CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
 308             PL_parser->lex_shared->herelines = 0;                    \
 309     } STMT_END
 310
 311
 312 #ifdef DEBUGGING
 313
 314 /* how to interpret the pl_yylval associated with the token */
 315 enum token_type {
 316     TOKENTYPE_NONE,
 317     TOKENTYPE_IVAL,
 318     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 319     TOKENTYPE_PVAL,
 320     TOKENTYPE_OPVAL
 321 };
 322
 323 static struct debug_tokens {
 324     const int token;
 325     enum token_type type;
 326     const char *name;
 327 } const debug_tokens[] =
 328 {
 329     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 330     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 331     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 332     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 333     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 334     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 335     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 336     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 337     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 338     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 339     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 340     { DO,               TOKENTYPE_NONE,         "DO" },
 341     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 342     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 343     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 344     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 345     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 346     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 347     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 348     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 349     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 350     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 351     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 352     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 353     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 354     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 355     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 356     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 357     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 358     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 359     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 360     { IF,               TOKENTYPE_IVAL,         "IF" },
 361     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 362     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 363     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 364     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 365     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 366     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 367     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 368     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 369     { MY,               TOKENTYPE_IVAL,         "MY" },
 370     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 371     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 372     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 373     { OROR,             TOKENTYPE_NONE,         "OROR" },
 374     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 375     { PEG,              TOKENTYPE_NONE,         "PEG" },
 376     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 377     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 378     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 379     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 380     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 381     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 382     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 383     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 384     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 385     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 386     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 387     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 388     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 389     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 390     { SUB,              TOKENTYPE_NONE,         "SUB" },
 391     { THING,            TOKENTYPE_OPVAL,        "THING" },
 392     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 393     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 394     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 395     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 396     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 397     { USE,              TOKENTYPE_IVAL,         "USE" },
 398     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 399     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 400     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 401     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 402     { 0,                TOKENTYPE_NONE,         NULL }
 403 };
 404
 405 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 406
 407 STATIC int
 408 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 409 {
 410     dVAR;
 411
 412     PERL_ARGS_ASSERT_TOKEREPORT;
 413
 414     if (DEBUG_T_TEST) {
 415         const char *name = NULL;
 416         enum token_type type = TOKENTYPE_NONE;
 417         const struct debug_tokens *p;
 418         SV* const report = newSVpvs("<== ");
 419
 420         for (p = debug_tokens; p->token; p++) {
 421             if (p->token == (int)rv) {
 422                 name = p->name;
 423                 type = p->type;
 424                 break;
 425             }
 426         }
 427         if (name)
 428             Perl_sv_catpv(aTHX_ report, name);
 429         else if ((char)rv > ' ' && (char)rv <= '~')
 430         {
 431             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 432             if ((char)rv == 'p')
 433                 sv_catpvs(report, " (pending identifier)");
 434         }
 435         else if (!rv)
 436             sv_catpvs(report, "EOF");
 437         else
 438             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 439         switch (type) {
 440         case TOKENTYPE_NONE:
 441             break;
 442         case TOKENTYPE_IVAL:
 443             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 444             break;
 445         case TOKENTYPE_OPNUM:
 446             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 447                                     PL_op_name[lvalp->ival]);
 448             break;
 449         case TOKENTYPE_PVAL:
 450             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 451             break;
 452         case TOKENTYPE_OPVAL:
 453             if (lvalp->opval) {
 454                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 455                                     PL_op_name[lvalp->opval->op_type]);
 456                 if (lvalp->opval->op_type == OP_CONST) {
 457                     Perl_sv_catpvf(aTHX_ report, " %s",
 458                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 459                 }
 460
 461             }
 462             else
 463                 sv_catpvs(report, "(opval=null)");
 464             break;
 465         }
 466         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 467     };
 468     return (int)rv;
 469 }
 470
 471
 472 /* print the buffer with suitable escapes */
 473
 474 STATIC void
 475 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 476 {
 477     SV* const tmp = newSVpvs("");
 478
 479     PERL_ARGS_ASSERT_PRINTBUF;
 480
 481     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 482     SvREFCNT_dec(tmp);
 483 }
 484
 485 #endif
 486
 487 static int
 488 S_deprecate_commaless_var_list(pTHX) {
 489     PL_expect = XTERM;
 490     deprecate("comma-less variable list");
 491     return REPORT(','); /* grandfather non-comma-format format */
 492 }
 493
 494 /*
 495  * S_ao
 496  *
 497  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 498  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 499  */
 500
 501 STATIC int
 502 S_ao(pTHX_ int toketype)
 503 {
 504     dVAR;
 505     if (*PL_bufptr == '=') {
 506         PL_bufptr++;
 507         if (toketype == ANDAND)
 508             pl_yylval.ival = OP_ANDASSIGN;
 509         else if (toketype == OROR)
 510             pl_yylval.ival = OP_ORASSIGN;
 511         else if (toketype == DORDOR)
 512             pl_yylval.ival = OP_DORASSIGN;
 513         toketype = ASSIGNOP;
 514     }
 515     return toketype;
 516 }
 517
 518 /*
 519  * S_no_op
 520  * When Perl expects an operator and finds something else, no_op
 521  * prints the warning.  It always prints "<something> found where
 522  * operator expected.  It prints "Missing semicolon on previous line?"
 523  * if the surprise occurs at the start of the line.  "do you need to
 524  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 525  * where the compiler doesn't know if foo is a method call or a function.
 526  * It prints "Missing operator before end of line" if there's nothing
 527  * after the missing operator, or "... before <...>" if there is something
 528  * after the missing operator.
 529  */
 530
 531 STATIC void
 532 S_no_op(pTHX_ const char *const what, char *s)
 533 {
 534     dVAR;
 535     char * const oldbp = PL_bufptr;
 536     const bool is_first = (PL_oldbufptr == PL_linestart);
 537
 538     PERL_ARGS_ASSERT_NO_OP;
 539
 540     if (!s)
 541         s = oldbp;
 542     else
 543         PL_bufptr = s;
 544     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 545     if (ckWARN_d(WARN_SYNTAX)) {
 546         if (is_first)
 547             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 548                     "\t(Missing semicolon on previous line?)\n");
 549         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 550             const char *t;
 551             for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
 552                                                             t += UTF ? UTF8SKIP(t) : 1)
 553                 NOOP;
 554             if (t < PL_bufptr && isSPACE(*t))
 555                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 556                         "\t(Do you need to predeclare %"UTF8f"?)\n",
 557                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 558         }
 559         else {
 560             assert(s >= oldbp);
 561             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 562                     "\t(Missing operator before %"UTF8f"?)\n",
 563                      UTF8fARG(UTF, s - oldbp, oldbp));
 564         }
 565     }
 566     PL_bufptr = oldbp;
 567 }
 568
 569 /*
 570  * S_missingterm
 571  * Complain about missing quote/regexp/heredoc terminator.
 572  * If it's called with NULL then it cauterizes the line buffer.
 573  * If we're in a delimited string and the delimiter is a control
 574  * character, it's reformatted into a two-char sequence like ^C.
 575  * This is fatal.
 576  */
 577
 578 STATIC void
 579 S_missingterm(pTHX_ char *s)
 580 {
 581     dVAR;
 582     char tmpbuf[3];
 583     char q;
 584     if (s) {
 585         char * const nl = strrchr(s,'\n');
 586         if (nl)
 587             *nl = '\0';
 588     }
 589     else if (isCNTRL(PL_multi_close)) {
 590         *tmpbuf = '^';
 591         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 592         tmpbuf[2] = '\0';
 593         s = tmpbuf;
 594     }
 595     else {
 596         *tmpbuf = (char)PL_multi_close;
 597         tmpbuf[1] = '\0';
 598         s = tmpbuf;
 599     }
 600     q = strchr(s,'"') ? '\'' : '"';
 601     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 602 }
 603
 604 #include "feature.h"
 605
 606 /*
 607  * Check whether the named feature is enabled.
 608  */
 609 bool
 610 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 611 {
 612     dVAR;
 613     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 614
 615     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 616
 617     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 618
 619     if (namelen > MAX_FEATURE_LEN)
 620         return FALSE;
 621     memcpy(&he_name[8], name, namelen);
 622
 623     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 624                                      REFCOUNTED_HE_EXISTS));
 625 }
 626
 627 /*
 628  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 629  * utf16-to-utf8-reversed.
 630  */
 631
 632 #ifdef PERL_CR_FILTER
 633 static void
 634 strip_return(SV *sv)
 635 {
 636     const char *s = SvPVX_const(sv);
 637     const char * const e = s + SvCUR(sv);
 638
 639     PERL_ARGS_ASSERT_STRIP_RETURN;
 640
 641     /* outer loop optimized to do nothing if there are no CR-LFs */
 642     while (s < e) {
 643         if (*s++ == '\r' && *s == '\n') {
 644             /* hit a CR-LF, need to copy the rest */
 645             char *d = s - 1;
 646             *d++ = *s++;
 647             while (s < e) {
 648                 if (*s == '\r' && s[1] == '\n')
 649                     s++;
 650                 *d++ = *s++;
 651             }
 652             SvCUR(sv) -= s - d;
 653             return;
 654         }
 655     }
 656 }
 657
 658 STATIC I32
 659 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 660 {
 661     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 662     if (count > 0 && !maxlen)
 663         strip_return(sv);
 664     return count;
 665 }
 666 #endif
 667
 668 /*
 669 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 670
 671 Creates and initialises a new lexer/parser state object, supplying
 672 a context in which to lex and parse from a new source of Perl code.
 673 A pointer to the new state object is placed in L</PL_parser>.  An entry
 674 is made on the save stack so that upon unwinding the new state object
 675 will be destroyed and the former value of L</PL_parser> will be restored.
 676 Nothing else need be done to clean up the parsing context.
 677
 678 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 679 non-null, provides a string (in SV form) containing code to be parsed.
 680 A copy of the string is made, so subsequent modification of I<line>
 681 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 682 from which code will be read to be parsed.  If both are non-null, the
 683 code in I<line> comes first and must consist of complete lines of input,
 684 and I<rsfp> supplies the remainder of the source.
 685
 686 The I<flags> parameter is reserved for future use.  Currently it is only
 687 used by perl internally, so extensions should always pass zero.
 688
 689 =cut
 690 */
 691
 692 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 693    can share filters with the current parser.
 694    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 695    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 696    destruction. This is used to handle the case of defaulting to reading the
 697    script from the standard input because no filename was given on the command
 698    line (without getting confused by situation where STDIN has been closed, so
 699    the script handle is opened on fd 0)  */
 700
 701 void
 702 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 703 {
 704     dVAR;
 705     const char *s = NULL;
 706     yy_parser *parser, *oparser;
 707     if (flags && flags & ~LEX_START_FLAGS)
 708         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 709
 710     /* create and initialise a parser */
 711
 712     Newxz(parser, 1, yy_parser);
 713     parser->old_parser = oparser = PL_parser;
 714     PL_parser = parser;
 715
 716     parser->stack = NULL;
 717     parser->ps = NULL;
 718     parser->stack_size = 0;
 719
 720     /* on scope exit, free this parser and restore any outer one */
 721     SAVEPARSER(parser);
 722     parser->saved_curcop = PL_curcop;
 723
 724     /* initialise lexer state */
 725
 726 #ifdef PERL_MAD
 727     parser->curforce = -1;
 728 #else
 729     parser->nexttoke = 0;
 730 #endif
 731     parser->error_count = oparser ? oparser->error_count : 0;
 732     parser->copline = NOLINE;
 733     parser->lex_state = LEX_NORMAL;
 734     parser->expect = XSTATE;
 735     parser->rsfp = rsfp;
 736     parser->rsfp_filters =
 737       !(flags & LEX_START_SAME_FILTER) || !oparser
 738         ? NULL
 739         : MUTABLE_AV(SvREFCNT_inc(
 740             oparser->rsfp_filters
 741              ? oparser->rsfp_filters
 742              : (oparser->rsfp_filters = newAV())
 743           ));
 744
 745     Newx(parser->lex_brackstack, 120, char);
 746     Newx(parser->lex_casestack, 12, char);
 747     *parser->lex_casestack = '\0';
 748     Newxz(parser->lex_shared, 1, LEXSHARED);
 749
 750     if (line) {
 751         STRLEN len;
 752         s = SvPV_const(line, len);
 753         parser->linestr = flags & LEX_START_COPIED
 754                             ? SvREFCNT_inc_simple_NN(line)
 755                             : newSVpvn_flags(s, len, SvUTF8(line));
 756         sv_catpvs(parser->linestr, "\n;");
 757     } else {
 758         parser->linestr = newSVpvs("\n;");
 759     }
 760     parser->oldoldbufptr =
 761         parser->oldbufptr =
 762         parser->bufptr =
 763         parser->linestart = SvPVX(parser->linestr);
 764     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 765     parser->last_lop = parser->last_uni = NULL;
 766     parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 767                                  |LEX_DONT_CLOSE_RSFP);
 768
 769     parser->in_pod = parser->filtered = 0;
 770 }
 771
 772
 773 /* delete a parser object */
 774
 775 void
 776 Perl_parser_free(pTHX_  const yy_parser *parser)
 777 {
 778     PERL_ARGS_ASSERT_PARSER_FREE;
 779
 780     PL_curcop = parser->saved_curcop;
 781     SvREFCNT_dec(parser->linestr);
 782
 783     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 784         PerlIO_clearerr(parser->rsfp);
 785     else if (parser->rsfp && (!parser->old_parser ||
 786                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 787         PerlIO_close(parser->rsfp);
 788     SvREFCNT_dec(parser->rsfp_filters);
 789     SvREFCNT_dec(parser->lex_stuff);
 790     SvREFCNT_dec(parser->sublex_info.repl);
 791
 792     Safefree(parser->lex_brackstack);
 793     Safefree(parser->lex_casestack);
 794     Safefree(parser->lex_shared);
 795     PL_parser = parser->old_parser;
 796     Safefree(parser);
 797 }
 798
 799 void
 800 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 801 {
 802 #ifdef PERL_MAD
 803     I32 nexttoke = parser->lasttoke;
 804 #else
 805     I32 nexttoke = parser->nexttoke;
 806 #endif
 807     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 808     while (nexttoke--) {
 809 #ifdef PERL_MAD
 810         if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
 811                                 & 0xffff)
 812          && parser->nexttoke[nexttoke].next_val.opval
 813          && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
 814          && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
 815                 op_free(parser->nexttoke[nexttoke].next_val.opval);
 816                 parser->nexttoke[nexttoke].next_val.opval = NULL;
 817         }
 818 #else
 819         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 820          && parser->nextval[nexttoke].opval
 821          && parser->nextval[nexttoke].opval->op_slabbed
 822          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 823             op_free(parser->nextval[nexttoke].opval);
 824             parser->nextval[nexttoke].opval = NULL;
 825         }
 826 #endif
 827     }
 828 }
 829
 830
 831 /*
 832 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 833
 834 Buffer scalar containing the chunk currently under consideration of the
 835 text currently being lexed.  This is always a plain string scalar (for
 836 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 837 normal scalar means; instead refer to the buffer directly by the pointer
 838 variables described below.
 839
 840 The lexer maintains various C<char*> pointers to things in the
 841 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 842 reallocated, all of these pointers must be updated.  Don't attempt to
 843 do this manually, but rather use L</lex_grow_linestr> if you need to
 844 reallocate the buffer.
 845
 846 The content of the text chunk in the buffer is commonly exactly one
 847 complete line of input, up to and including a newline terminator,
 848 but there are situations where it is otherwise.  The octets of the
 849 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 850 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 851 flag on this scalar, which may disagree with it.
 852
 853 For direct examination of the buffer, the variable
 854 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 855 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 856 of these pointers is usually preferable to examination of the scalar
 857 through normal scalar means.
 858
 859 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 860
 861 Direct pointer to the end of the chunk of text currently being lexed, the
 862 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 863 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 864 always located at the end of the buffer, and does not count as part of
 865 the buffer's contents.
 866
 867 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 868
 869 Points to the current position of lexing inside the lexer buffer.
 870 Characters around this point may be freely examined, within
 871 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 872 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 873 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 874
 875 Lexing code (whether in the Perl core or not) moves this pointer past
 876 the characters that it consumes.  It is also expected to perform some
 877 bookkeeping whenever a newline character is consumed.  This movement
 878 can be more conveniently performed by the function L</lex_read_to>,
 879 which handles newlines appropriately.
 880
 881 Interpretation of the buffer's octets can be abstracted out by
 882 using the slightly higher-level functions L</lex_peek_unichar> and
 883 L</lex_read_unichar>.
 884
 885 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 886
 887 Points to the start of the current line inside the lexer buffer.
 888 This is useful for indicating at which column an error occurred, and
 889 not much else.  This must be updated by any lexing code that consumes
 890 a newline; the function L</lex_read_to> handles this detail.
 891
 892 =cut
 893 */
 894
 895 /*
 896 =for apidoc Amx|bool|lex_bufutf8
 897
 898 Indicates whether the octets in the lexer buffer
 899 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 900 of Unicode characters.  If not, they should be interpreted as Latin-1
 901 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 902
 903 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 904 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 905 encoding.
 906
 907 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 908 is significant, but not the whole story regarding the input character
 909 encoding.  Normally, when a file is being read, the scalar contains octets
 910 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 911 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 912 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 913 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 914 is in effect.  This logic may change in the future; use this function
 915 instead of implementing the logic yourself.
 916
 917 =cut
 918 */
 919
 920 bool
 921 Perl_lex_bufutf8(pTHX)
 922 {
 923     return UTF;
 924 }
 925
 926 /*
 927 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 928
 929 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 930 at least I<len> octets (including terminating NUL).  Returns a
 931 pointer to the reallocated buffer.  This is necessary before making
 932 any direct modification of the buffer that would increase its length.
 933 L</lex_stuff_pvn> provides a more convenient way to insert text into
 934 the buffer.
 935
 936 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 937 this function updates all of the lexer's variables that point directly
 938 into the buffer.
 939
 940 =cut
 941 */
 942
 943 char *
 944 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 945 {
 946     SV *linestr;
 947     char *buf;
 948     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 949     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 950     linestr = PL_parser->linestr;
 951     buf = SvPVX(linestr);
 952     if (len <= SvLEN(linestr))
 953         return buf;
 954     bufend_pos = PL_parser->bufend - buf;
 955     bufptr_pos = PL_parser->bufptr - buf;
 956     oldbufptr_pos = PL_parser->oldbufptr - buf;
 957     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 958     linestart_pos = PL_parser->linestart - buf;
 959     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 960     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 961     re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
 962                             PL_parser->lex_shared->re_eval_start - buf : 0;
 963
 964     buf = sv_grow(linestr, len);
 965
 966     PL_parser->bufend = buf + bufend_pos;
 967     PL_parser->bufptr = buf + bufptr_pos;
 968     PL_parser->oldbufptr = buf + oldbufptr_pos;
 969     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 970     PL_parser->linestart = buf + linestart_pos;
 971     if (PL_parser->last_uni)
 972         PL_parser->last_uni = buf + last_uni_pos;
 973     if (PL_parser->last_lop)
 974         PL_parser->last_lop = buf + last_lop_pos;
 975     if (PL_parser->lex_shared->re_eval_start)
 976         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 977     return buf;
 978 }
 979
 980 /*
 981 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 982
 983 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 984 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 985 reallocating the buffer if necessary.  This means that lexing code that
 986 runs later will see the characters as if they had appeared in the input.
 987 It is not recommended to do this as part of normal parsing, and most
 988 uses of this facility run the risk of the inserted characters being
 989 interpreted in an unintended manner.
 990
 991 The string to be inserted is represented by I<len> octets starting
 992 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 993 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 994 The characters are recoded for the lexer buffer, according to how the
 995 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 996 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 997 function is more convenient.
 998
 999 =cut
1000 */
1001
1002 void
1003 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1004 {
1005     dVAR;
1006     char *bufptr;
1007     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1008     if (flags & ~(LEX_STUFF_UTF8))
1009         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1010     if (UTF) {
1011         if (flags & LEX_STUFF_UTF8) {
1012             goto plain_copy;
1013         } else {
1014             STRLEN highhalf = 0;    /* Count of variants */
1015             const char *p, *e = pv+len;
1016             for (p = pv; p != e; p++) {
1017                 if (! UTF8_IS_INVARIANT(*p)) {
1018                     highhalf++;
1019                 }
1020             }
1021             if (!highhalf)
1022                 goto plain_copy;
1023             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1024             bufptr = PL_parser->bufptr;
1025             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1026             SvCUR_set(PL_parser->linestr,
1027                 SvCUR(PL_parser->linestr) + len+highhalf);
1028             PL_parser->bufend += len+highhalf;
1029             for (p = pv; p != e; p++) {
1030                 U8 c = (U8)*p;
1031                 if (! UTF8_IS_INVARIANT(c)) {
1032                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
1033                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
1034                 } else {
1035                     *bufptr++ = (char)c;
1036                 }
1037             }
1038         }
1039     } else {
1040         if (flags & LEX_STUFF_UTF8) {
1041             STRLEN highhalf = 0;
1042             const char *p, *e = pv+len;
1043             for (p = pv; p != e; p++) {
1044                 U8 c = (U8)*p;
1045                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1046                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1047                                 "non-Latin-1 character into Latin-1 input");
1048                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1049                     p++;
1050                     highhalf++;
1051                 } else if (! UTF8_IS_INVARIANT(c)) {
1052                     /* malformed UTF-8 */
1053                     ENTER;
1054                     SAVESPTR(PL_warnhook);
1055                     PL_warnhook = PERL_WARNHOOK_FATAL;
1056                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1057                     LEAVE;
1058                 }
1059             }
1060             if (!highhalf)
1061                 goto plain_copy;
1062             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1063             bufptr = PL_parser->bufptr;
1064             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1065             SvCUR_set(PL_parser->linestr,
1066                 SvCUR(PL_parser->linestr) + len-highhalf);
1067             PL_parser->bufend += len-highhalf;
1068             p = pv;
1069             while (p < e) {
1070                 if (UTF8_IS_INVARIANT(*p)) {
1071                     *bufptr++ = *p;
1072                     p++;
1073                 }
1074                 else {
1075                     assert(p < e -1 );
1076                     *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
1077                     p += 2;
1078                 }
1079             }
1080         } else {
1081           plain_copy:
1082             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1083             bufptr = PL_parser->bufptr;
1084             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1085             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1086             PL_parser->bufend += len;
1087             Copy(pv, bufptr, len, char);
1088         }
1089     }
1090 }
1091
1092 /*
1093 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1094
1095 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1096 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1097 reallocating the buffer if necessary.  This means that lexing code that
1098 runs later will see the characters as if they had appeared in the input.
1099 It is not recommended to do this as part of normal parsing, and most
1100 uses of this facility run the risk of the inserted characters being
1101 interpreted in an unintended manner.
1102
1103 The string to be inserted is represented by octets starting at I<pv>
1104 and continuing to the first nul.  These octets are interpreted as either
1105 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1106 in I<flags>.  The characters are recoded for the lexer buffer, according
1107 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1108 If it is not convenient to nul-terminate a string to be inserted, the
1109 L</lex_stuff_pvn> function is more appropriate.
1110
1111 =cut
1112 */
1113
1114 void
1115 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1116 {
1117     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1118     lex_stuff_pvn(pv, strlen(pv), flags);
1119 }
1120
1121 /*
1122 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1123
1124 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1125 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1126 reallocating the buffer if necessary.  This means that lexing code that
1127 runs later will see the characters as if they had appeared in the input.
1128 It is not recommended to do this as part of normal parsing, and most
1129 uses of this facility run the risk of the inserted characters being
1130 interpreted in an unintended manner.
1131
1132 The string to be inserted is the string value of I<sv>.  The characters
1133 are recoded for the lexer buffer, according to how the buffer is currently
1134 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1135 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1136 need to construct a scalar.
1137
1138 =cut
1139 */
1140
1141 void
1142 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1143 {
1144     char *pv;
1145     STRLEN len;
1146     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1147     if (flags)
1148         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1149     pv = SvPV(sv, len);
1150     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1151 }
1152
1153 /*
1154 =for apidoc Amx|void|lex_unstuff|char *ptr
1155
1156 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1157 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1158 This hides the discarded text from any lexing code that runs later,
1159 as if the text had never appeared.
1160
1161 This is not the normal way to consume lexed text.  For that, use
1162 L</lex_read_to>.
1163
1164 =cut
1165 */
1166
1167 void
1168 Perl_lex_unstuff(pTHX_ char *ptr)
1169 {
1170     char *buf, *bufend;
1171     STRLEN unstuff_len;
1172     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1173     buf = PL_parser->bufptr;
1174     if (ptr < buf)
1175         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1176     if (ptr == buf)
1177         return;
1178     bufend = PL_parser->bufend;
1179     if (ptr > bufend)
1180         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1181     unstuff_len = ptr - buf;
1182     Move(ptr, buf, bufend+1-ptr, char);
1183     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1184     PL_parser->bufend = bufend - unstuff_len;
1185 }
1186
1187 /*
1188 =for apidoc Amx|void|lex_read_to|char *ptr
1189
1190 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1191 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1192 performing the correct bookkeeping whenever a newline character is passed.
1193 This is the normal way to consume lexed text.
1194
1195 Interpretation of the buffer's octets can be abstracted out by
1196 using the slightly higher-level functions L</lex_peek_unichar> and
1197 L</lex_read_unichar>.
1198
1199 =cut
1200 */
1201
1202 void
1203 Perl_lex_read_to(pTHX_ char *ptr)
1204 {
1205     char *s;
1206     PERL_ARGS_ASSERT_LEX_READ_TO;
1207     s = PL_parser->bufptr;
1208     if (ptr < s || ptr > PL_parser->bufend)
1209         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1210     for (; s != ptr; s++)
1211         if (*s == '\n') {
1212             COPLINE_INC_WITH_HERELINES;
1213             PL_parser->linestart = s+1;
1214         }
1215     PL_parser->bufptr = ptr;
1216 }
1217
1218 /*
1219 =for apidoc Amx|void|lex_discard_to|char *ptr
1220
1221 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1222 up to I<ptr>.  The remaining content of the buffer will be moved, and
1223 all pointers into the buffer updated appropriately.  I<ptr> must not
1224 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1225 it is not permitted to discard text that has yet to be lexed.
1226
1227 Normally it is not necessarily to do this directly, because it suffices to
1228 use the implicit discarding behaviour of L</lex_next_chunk> and things
1229 based on it.  However, if a token stretches across multiple lines,
1230 and the lexing code has kept multiple lines of text in the buffer for
1231 that purpose, then after completion of the token it would be wise to
1232 explicitly discard the now-unneeded earlier lines, to avoid future
1233 multi-line tokens growing the buffer without bound.
1234
1235 =cut
1236 */
1237
1238 void
1239 Perl_lex_discard_to(pTHX_ char *ptr)
1240 {
1241     char *buf;
1242     STRLEN discard_len;
1243     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1244     buf = SvPVX(PL_parser->linestr);
1245     if (ptr < buf)
1246         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1247     if (ptr == buf)
1248         return;
1249     if (ptr > PL_parser->bufptr)
1250         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1251     discard_len = ptr - buf;
1252     if (PL_parser->oldbufptr < ptr)
1253         PL_parser->oldbufptr = ptr;
1254     if (PL_parser->oldoldbufptr < ptr)
1255         PL_parser->oldoldbufptr = ptr;
1256     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1257         PL_parser->last_uni = NULL;
1258     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1259         PL_parser->last_lop = NULL;
1260     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1261     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1262     PL_parser->bufend -= discard_len;
1263     PL_parser->bufptr -= discard_len;
1264     PL_parser->oldbufptr -= discard_len;
1265     PL_parser->oldoldbufptr -= discard_len;
1266     if (PL_parser->last_uni)
1267         PL_parser->last_uni -= discard_len;
1268     if (PL_parser->last_lop)
1269         PL_parser->last_lop -= discard_len;
1270 }
1271
1272 /*
1273 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1274
1275 Reads in the next chunk of text to be lexed, appending it to
1276 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1277 looked to the end of the current chunk and wants to know more.  It is
1278 usual, but not necessary, for lexing to have consumed the entirety of
1279 the current chunk at this time.
1280
1281 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1282 chunk (i.e., the current chunk has been entirely consumed), normally the
1283 current chunk will be discarded at the same time that the new chunk is
1284 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1285 will not be discarded.  If the current chunk has not been entirely
1286 consumed, then it will not be discarded regardless of the flag.
1287
1288 Returns true if some new text was added to the buffer, or false if the
1289 buffer has reached the end of the input text.
1290
1291 =cut
1292 */
1293
1294 #define LEX_FAKE_EOF 0x80000000
1295 #define LEX_NO_TERM  0x40000000
1296
1297 bool
1298 Perl_lex_next_chunk(pTHX_ U32 flags)
1299 {
1300     SV *linestr;
1301     char *buf;
1302     STRLEN old_bufend_pos, new_bufend_pos;
1303     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1304     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1305     bool got_some_for_debugger = 0;
1306     bool got_some;
1307     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1308         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1309     linestr = PL_parser->linestr;
1310     buf = SvPVX(linestr);
1311     if (!(flags & LEX_KEEP_PREVIOUS) &&
1312             PL_parser->bufptr == PL_parser->bufend) {
1313         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1314         linestart_pos = 0;
1315         if (PL_parser->last_uni != PL_parser->bufend)
1316             PL_parser->last_uni = NULL;
1317         if (PL_parser->last_lop != PL_parser->bufend)
1318             PL_parser->last_lop = NULL;
1319         last_uni_pos = last_lop_pos = 0;
1320         *buf = 0;
1321         SvCUR(linestr) = 0;
1322     } else {
1323         old_bufend_pos = PL_parser->bufend - buf;
1324         bufptr_pos = PL_parser->bufptr - buf;
1325         oldbufptr_pos = PL_parser->oldbufptr - buf;
1326         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1327         linestart_pos = PL_parser->linestart - buf;
1328         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1329         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1330     }
1331     if (flags & LEX_FAKE_EOF) {
1332         goto eof;
1333     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1334         got_some = 0;
1335     } else if (filter_gets(linestr, old_bufend_pos)) {
1336         got_some = 1;
1337         got_some_for_debugger = 1;
1338     } else if (flags & LEX_NO_TERM) {
1339         got_some = 0;
1340     } else {
1341         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1342             sv_setpvs(linestr, "");
1343         eof:
1344         /* End of real input.  Close filehandle (unless it was STDIN),
1345          * then add implicit termination.
1346          */
1347         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1348             PerlIO_clearerr(PL_parser->rsfp);
1349         else if (PL_parser->rsfp)
1350             (void)PerlIO_close(PL_parser->rsfp);
1351         PL_parser->rsfp = NULL;
1352         PL_parser->in_pod = PL_parser->filtered = 0;
1353 #ifdef PERL_MAD
1354         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1355             PL_faketokens = 1;
1356 #endif
1357         if (!PL_in_eval && PL_minus_p) {
1358             sv_catpvs(linestr,
1359                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1360             PL_minus_n = PL_minus_p = 0;
1361         } else if (!PL_in_eval && PL_minus_n) {
1362             sv_catpvs(linestr, /*{*/";}");
1363             PL_minus_n = 0;
1364         } else
1365             sv_catpvs(linestr, ";");
1366         got_some = 1;
1367     }
1368     buf = SvPVX(linestr);
1369     new_bufend_pos = SvCUR(linestr);
1370     PL_parser->bufend = buf + new_bufend_pos;
1371     PL_parser->bufptr = buf + bufptr_pos;
1372     PL_parser->oldbufptr = buf + oldbufptr_pos;
1373     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1374     PL_parser->linestart = buf + linestart_pos;
1375     if (PL_parser->last_uni)
1376         PL_parser->last_uni = buf + last_uni_pos;
1377     if (PL_parser->last_lop)
1378         PL_parser->last_lop = buf + last_lop_pos;
1379     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1380             PL_curstash != PL_debstash) {
1381         /* debugger active and we're not compiling the debugger code,
1382          * so store the line into the debugger's array of lines
1383          */
1384         update_debugger_info(NULL, buf+old_bufend_pos,
1385             new_bufend_pos-old_bufend_pos);
1386     }
1387     return got_some;
1388 }
1389
1390 /*
1391 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1392
1393 Looks ahead one (Unicode) character in the text currently being lexed.
1394 Returns the codepoint (unsigned integer value) of the next character,
1395 or -1 if lexing has reached the end of the input text.  To consume the
1396 peeked character, use L</lex_read_unichar>.
1397
1398 If the next character is in (or extends into) the next chunk of input
1399 text, the next chunk will be read in.  Normally the current chunk will be
1400 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1401 then the current chunk will not be discarded.
1402
1403 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1404 is encountered, an exception is generated.
1405
1406 =cut
1407 */
1408
1409 I32
1410 Perl_lex_peek_unichar(pTHX_ U32 flags)
1411 {
1412     dVAR;
1413     char *s, *bufend;
1414     if (flags & ~(LEX_KEEP_PREVIOUS))
1415         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1416     s = PL_parser->bufptr;
1417     bufend = PL_parser->bufend;
1418     if (UTF) {
1419         U8 head;
1420         I32 unichar;
1421         STRLEN len, retlen;
1422         if (s == bufend) {
1423             if (!lex_next_chunk(flags))
1424                 return -1;
1425             s = PL_parser->bufptr;
1426             bufend = PL_parser->bufend;
1427         }
1428         head = (U8)*s;
1429         if (UTF8_IS_INVARIANT(head))
1430             return head;
1431         if (UTF8_IS_START(head)) {
1432             len = UTF8SKIP(&head);
1433             while ((STRLEN)(bufend-s) < len) {
1434                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1435                     break;
1436                 s = PL_parser->bufptr;
1437                 bufend = PL_parser->bufend;
1438             }
1439         }
1440         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1441         if (retlen == (STRLEN)-1) {
1442             /* malformed UTF-8 */
1443             ENTER;
1444             SAVESPTR(PL_warnhook);
1445             PL_warnhook = PERL_WARNHOOK_FATAL;
1446             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1447             LEAVE;
1448         }
1449         return unichar;
1450     } else {
1451         if (s == bufend) {
1452             if (!lex_next_chunk(flags))
1453                 return -1;
1454             s = PL_parser->bufptr;
1455         }
1456         return (U8)*s;
1457     }
1458 }
1459
1460 /*
1461 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1462
1463 Reads the next (Unicode) character in the text currently being lexed.
1464 Returns the codepoint (unsigned integer value) of the character read,
1465 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1466 if lexing has reached the end of the input text.  To non-destructively
1467 examine the next character, use L</lex_peek_unichar> instead.
1468
1469 If the next character is in (or extends into) the next chunk of input
1470 text, the next chunk will be read in.  Normally the current chunk will be
1471 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1472 then the current chunk will not be discarded.
1473
1474 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1475 is encountered, an exception is generated.
1476
1477 =cut
1478 */
1479
1480 I32
1481 Perl_lex_read_unichar(pTHX_ U32 flags)
1482 {
1483     I32 c;
1484     if (flags & ~(LEX_KEEP_PREVIOUS))
1485         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1486     c = lex_peek_unichar(flags);
1487     if (c != -1) {
1488         if (c == '\n')
1489             COPLINE_INC_WITH_HERELINES;
1490         if (UTF)
1491             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1492         else
1493             ++(PL_parser->bufptr);
1494     }
1495     return c;
1496 }
1497
1498 /*
1499 =for apidoc Amx|void|lex_read_space|U32 flags
1500
1501 Reads optional spaces, in Perl style, in the text currently being
1502 lexed.  The spaces may include ordinary whitespace characters and
1503 Perl-style comments.  C<#line> directives are processed if encountered.
1504 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1505 at a non-space character (or the end of the input text).
1506
1507 If spaces extend into the next chunk of input text, the next chunk will
1508 be read in.  Normally the current chunk will be discarded at the same
1509 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1510 chunk will not be discarded.
1511
1512 =cut
1513 */
1514
1515 #define LEX_NO_INCLINE    0x40000000
1516 #define LEX_NO_NEXT_CHUNK 0x80000000
1517
1518 void
1519 Perl_lex_read_space(pTHX_ U32 flags)
1520 {
1521     char *s, *bufend;
1522     const bool can_incline = !(flags & LEX_NO_INCLINE);
1523     bool need_incline = 0;
1524     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1525         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1526 #ifdef PERL_MAD
1527     if (PL_skipwhite) {
1528         sv_free(PL_skipwhite);
1529         PL_skipwhite = NULL;
1530     }
1531     if (PL_madskills)
1532         PL_skipwhite = newSVpvs("");
1533 #endif /* PERL_MAD */
1534     s = PL_parser->bufptr;
1535     bufend = PL_parser->bufend;
1536     while (1) {
1537         char c = *s;
1538         if (c == '#') {
1539             do {
1540                 c = *++s;
1541             } while (!(c == '\n' || (c == 0 && s == bufend)));
1542         } else if (c == '\n') {
1543             s++;
1544             if (can_incline) {
1545                 PL_parser->linestart = s;
1546                 if (s == bufend)
1547                     need_incline = 1;
1548                 else
1549                     incline(s);
1550             }
1551         } else if (isSPACE(c)) {
1552             s++;
1553         } else if (c == 0 && s == bufend) {
1554             bool got_more;
1555 #ifdef PERL_MAD
1556             if (PL_madskills)
1557                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1558 #endif /* PERL_MAD */
1559             if (flags & LEX_NO_NEXT_CHUNK)
1560                 break;
1561             PL_parser->bufptr = s;
1562             if (can_incline) COPLINE_INC_WITH_HERELINES;
1563             got_more = lex_next_chunk(flags);
1564             if (can_incline) CopLINE_dec(PL_curcop);
1565             s = PL_parser->bufptr;
1566             bufend = PL_parser->bufend;
1567             if (!got_more)
1568                 break;
1569             if (can_incline && need_incline && PL_parser->rsfp) {
1570                 incline(s);
1571                 need_incline = 0;
1572             }
1573         } else {
1574             break;
1575         }
1576     }
1577 #ifdef PERL_MAD
1578     if (PL_madskills)
1579         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1580 #endif /* PERL_MAD */
1581     PL_parser->bufptr = s;
1582 }
1583
1584 /*
1585
1586 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1587
1588 This function performs syntax checking on a prototype, C<proto>.
1589 If C<warn> is true, any illegal characters or mismatched brackets
1590 will trigger illegalproto warnings, declaring that they were
1591 detected in the prototype for C<name>.
1592
1593 The return value is C<true> if this is a valid prototype, and
1594 C<false> if it is not, regardless of whether C<warn> was C<true> or
1595 C<false>.
1596
1597 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1598
1599 =cut
1600
1601  */
1602
1603 bool
1604 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1605 {
1606     STRLEN len, origlen;
1607     char *p = proto ? SvPV(proto, len) : NULL;
1608     bool bad_proto = FALSE;
1609     bool in_brackets = FALSE;
1610     bool after_slash = FALSE;
1611     char greedy_proto = ' ';
1612     bool proto_after_greedy_proto = FALSE;
1613     bool must_be_last = FALSE;
1614     bool underscore = FALSE;
1615     bool bad_proto_after_underscore = FALSE;
1616
1617     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1618
1619     if (!proto)
1620         return TRUE;
1621
1622     origlen = len;
1623     for (; len--; p++) {
1624         if (!isSPACE(*p)) {
1625             if (must_be_last)
1626                 proto_after_greedy_proto = TRUE;
1627             if (underscore) {
1628                 if (!strchr(";@%", *p))
1629                     bad_proto_after_underscore = TRUE;
1630                 underscore = FALSE;
1631             }
1632             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1633                 bad_proto = TRUE;
1634             }
1635             else {
1636                 if (*p == '[')
1637                     in_brackets = TRUE;
1638                 else if (*p == ']')
1639                     in_brackets = FALSE;
1640                 else if ((*p == '@' || *p == '%') &&
1641                     !after_slash &&
1642                     !in_brackets ) {
1643                     must_be_last = TRUE;
1644                     greedy_proto = *p;
1645                 }
1646                 else if (*p == '_')
1647                     underscore = TRUE;
1648             }
1649             if (*p == '\\')
1650                 after_slash = TRUE;
1651             else
1652                 after_slash = FALSE;
1653         }
1654     }
1655
1656     if (warn) {
1657         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1658         p -= origlen;
1659         p = SvUTF8(proto)
1660             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1661                              origlen, UNI_DISPLAY_ISPRINT)
1662             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1663
1664         if (proto_after_greedy_proto)
1665             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1666                         "Prototype after '%c' for %"SVf" : %s",
1667                         greedy_proto, SVfARG(name), p);
1668         if (in_brackets)
1669             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1670                         "Missing ']' in prototype for %"SVf" : %s",
1671                         SVfARG(name), p);
1672         if (bad_proto)
1673             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1674                         "Illegal character in prototype for %"SVf" : %s",
1675                         SVfARG(name), p);
1676         if (bad_proto_after_underscore)
1677             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1678                         "Illegal character after '_' in prototype for %"SVf" : %s",
1679                         SVfARG(name), p);
1680     }
1681
1682     return (! (proto_after_greedy_proto || bad_proto) );
1683 }
1684
1685 /*
1686  * S_incline
1687  * This subroutine has nothing to do with tilting, whether at windmills
1688  * or pinball tables.  Its name is short for "increment line".  It
1689  * increments the current line number in CopLINE(PL_curcop) and checks
1690  * to see whether the line starts with a comment of the form
1691  *    # line 500 "foo.pm"
1692  * If so, it sets the current line number and file to the values in the comment.
1693  */
1694
1695 STATIC void
1696 S_incline(pTHX_ const char *s)
1697 {
1698     dVAR;
1699     const char *t;
1700     const char *n;
1701     const char *e;
1702     line_t line_num;
1703
1704     PERL_ARGS_ASSERT_INCLINE;
1705
1706     COPLINE_INC_WITH_HERELINES;
1707     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1708      && s+1 == PL_bufend && *s == ';') {
1709         /* fake newline in string eval */
1710         CopLINE_dec(PL_curcop);
1711         return;
1712     }
1713     if (*s++ != '#')
1714         return;
1715     while (SPACE_OR_TAB(*s))
1716         s++;
1717     if (strnEQ(s, "line", 4))
1718         s += 4;
1719     else
1720         return;
1721     if (SPACE_OR_TAB(*s))
1722         s++;
1723     else
1724         return;
1725     while (SPACE_OR_TAB(*s))
1726         s++;
1727     if (!isDIGIT(*s))
1728         return;
1729
1730     n = s;
1731     while (isDIGIT(*s))
1732         s++;
1733     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1734         return;
1735     while (SPACE_OR_TAB(*s))
1736         s++;
1737     if (*s == '"' && (t = strchr(s+1, '"'))) {
1738         s++;
1739         e = t + 1;
1740     }
1741     else {
1742         t = s;
1743         while (!isSPACE(*t))
1744             t++;
1745         e = t;
1746     }
1747     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1748         e++;
1749     if (*e != '\n' && *e != '\0')
1750         return;         /* false alarm */
1751
1752     line_num = atoi(n)-1;
1753
1754     if (t - s > 0) {
1755         const STRLEN len = t - s;
1756
1757         if (!PL_rsfp && !PL_parser->filtered) {
1758             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1759              * to *{"::_<newfilename"} */
1760             /* However, the long form of evals is only turned on by the
1761                debugger - usually they're "(eval %lu)" */
1762             GV * const cfgv = CopFILEGV(PL_curcop);
1763             if (cfgv) {
1764                 char smallbuf[128];
1765                 STRLEN tmplen2 = len;
1766                 char *tmpbuf2;
1767                 GV *gv2;
1768
1769                 if (tmplen2 + 2 <= sizeof smallbuf)
1770                     tmpbuf2 = smallbuf;
1771                 else
1772                     Newx(tmpbuf2, tmplen2 + 2, char);
1773
1774                 tmpbuf2[0] = '_';
1775                 tmpbuf2[1] = '<';
1776
1777                 memcpy(tmpbuf2 + 2, s, tmplen2);
1778                 tmplen2 += 2;
1779
1780                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1781                 if (!isGV(gv2)) {
1782                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1783                     /* adjust ${"::_<newfilename"} to store the new file name */
1784                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1785                     /* The line number may differ. If that is the case,
1786                        alias the saved lines that are in the array.
1787                        Otherwise alias the whole array. */
1788                     if (CopLINE(PL_curcop) == line_num) {
1789                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1790                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1791                     }
1792                     else if (GvAV(cfgv)) {
1793                         AV * const av = GvAV(cfgv);
1794                         const I32 start = CopLINE(PL_curcop)+1;
1795                         I32 items = AvFILLp(av) - start;
1796                         if (items > 0) {
1797                             AV * const av2 = GvAVn(gv2);
1798                             SV **svp = AvARRAY(av) + start;
1799                             I32 l = (I32)line_num+1;
1800                             while (items--)
1801                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1802                         }
1803                     }
1804                 }
1805
1806                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1807             }
1808         }
1809         CopFILE_free(PL_curcop);
1810         CopFILE_setn(PL_curcop, s, len);
1811     }
1812     CopLINE_set(PL_curcop, line_num);
1813 }
1814
1815 #define skipspace(s) skipspace_flags(s, 0)
1816
1817 #ifdef PERL_MAD
1818 /* skip space before PL_thistoken */
1819
1820 STATIC char *
1821 S_skipspace0(pTHX_ char *s)
1822 {
1823     PERL_ARGS_ASSERT_SKIPSPACE0;
1824
1825     s = skipspace(s);
1826     if (!PL_madskills)
1827         return s;
1828     if (PL_skipwhite) {
1829         if (!PL_thiswhite)
1830             PL_thiswhite = newSVpvs("");
1831         sv_catsv(PL_thiswhite, PL_skipwhite);
1832         sv_free(PL_skipwhite);
1833         PL_skipwhite = 0;
1834     }
1835     PL_realtokenstart = s - SvPVX(PL_linestr);
1836     return s;
1837 }
1838
1839 /* skip space after PL_thistoken */
1840
1841 STATIC char *
1842 S_skipspace1(pTHX_ char *s)
1843 {
1844     const char *start = s;
1845     I32 startoff = start - SvPVX(PL_linestr);
1846
1847     PERL_ARGS_ASSERT_SKIPSPACE1;
1848
1849     s = skipspace(s);
1850     if (!PL_madskills)
1851         return s;
1852     start = SvPVX(PL_linestr) + startoff;
1853     if (!PL_thistoken && PL_realtokenstart >= 0) {
1854         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1855         PL_thistoken = newSVpvn(tstart, start - tstart);
1856     }
1857     PL_realtokenstart = -1;
1858     if (PL_skipwhite) {
1859         if (!PL_nextwhite)
1860             PL_nextwhite = newSVpvs("");
1861         sv_catsv(PL_nextwhite, PL_skipwhite);
1862         sv_free(PL_skipwhite);
1863         PL_skipwhite = 0;
1864     }
1865     return s;
1866 }
1867
1868 STATIC char *
1869 S_skipspace2(pTHX_ char *s, SV **svp)
1870 {
1871     char *start;
1872     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1873     const I32 startoff = s - SvPVX(PL_linestr);
1874
1875     PERL_ARGS_ASSERT_SKIPSPACE2;
1876
1877     s = skipspace(s);
1878     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1879     if (!PL_madskills || !svp)
1880         return s;
1881     start = SvPVX(PL_linestr) + startoff;
1882     if (!PL_thistoken && PL_realtokenstart >= 0) {
1883         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1884         PL_thistoken = newSVpvn(tstart, start - tstart);
1885         PL_realtokenstart = -1;
1886     }
1887     if (PL_skipwhite) {
1888         if (!*svp)
1889             *svp = newSVpvs("");
1890         sv_setsv(*svp, PL_skipwhite);
1891         sv_free(PL_skipwhite);
1892         PL_skipwhite = 0;
1893     }
1894
1895     return s;
1896 }
1897 #endif
1898
1899 STATIC void
1900 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1901 {
1902     AV *av = CopFILEAVx(PL_curcop);
1903     if (av) {
1904         SV * const sv = newSV_type(SVt_PVMG);
1905         if (orig_sv)
1906             sv_setsv_flags(sv, orig_sv, 0); /* no cow */
1907         else
1908             sv_setpvn(sv, buf, len);
1909         (void)SvIOK_on(sv);
1910         SvIV_set(sv, 0);
1911         av_store(av, (I32)CopLINE(PL_curcop), sv);
1912     }
1913 }
1914
1915 /*
1916  * S_skipspace
1917  * Called to gobble the appropriate amount and type of whitespace.
1918  * Skips comments as well.
1919  */
1920
1921 STATIC char *
1922 S_skipspace_flags(pTHX_ char *s, U32 flags)
1923 {
1924 #ifdef PERL_MAD
1925     char *start = s;
1926 #endif /* PERL_MAD */
1927     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1928 #ifdef PERL_MAD
1929     if (PL_skipwhite) {
1930         sv_free(PL_skipwhite);
1931         PL_skipwhite = NULL;
1932     }
1933 #endif /* PERL_MAD */
1934     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1935         while (s < PL_bufend && SPACE_OR_TAB(*s))
1936             s++;
1937     } else {
1938         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1939         PL_bufptr = s;
1940         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1941                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1942                     LEX_NO_NEXT_CHUNK : 0));
1943         s = PL_bufptr;
1944         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1945         if (PL_linestart > PL_bufptr)
1946             PL_bufptr = PL_linestart;
1947         return s;
1948     }
1949 #ifdef PERL_MAD
1950     if (PL_madskills)
1951         PL_skipwhite = newSVpvn(start, s-start);
1952 #endif /* PERL_MAD */
1953     return s;
1954 }
1955
1956 /*
1957  * S_check_uni
1958  * Check the unary operators to ensure there's no ambiguity in how they're
1959  * used.  An ambiguous piece of code would be:
1960  *     rand + 5
1961  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1962  * the +5 is its argument.
1963  */
1964
1965 STATIC void
1966 S_check_uni(pTHX)
1967 {
1968     dVAR;
1969     const char *s;
1970     const char *t;
1971
1972     if (PL_oldoldbufptr != PL_last_uni)
1973         return;
1974     while (isSPACE(*PL_last_uni))
1975         PL_last_uni++;
1976     s = PL_last_uni;
1977     while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1978         s++;
1979     if ((t = strchr(s, '(')) && t < PL_bufptr)
1980         return;
1981
1982     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1983                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1984                      (int)(s - PL_last_uni), PL_last_uni);
1985 }
1986
1987 /*
1988  * LOP : macro to build a list operator.  Its behaviour has been replaced
1989  * with a subroutine, S_lop() for which LOP is just another name.
1990  */
1991
1992 #define LOP(f,x) return lop(f,x,s)
1993
1994 /*
1995  * S_lop
1996  * Build a list operator (or something that might be one).  The rules:
1997  *  - if we have a next token, then it's a list operator [why?]
1998  *  - if the next thing is an opening paren, then it's a function
1999  *  - else it's a list operator
2000  */
2001
2002 STATIC I32
2003 S_lop(pTHX_ I32 f, int x, char *s)
2004 {
2005     dVAR;
2006
2007     PERL_ARGS_ASSERT_LOP;
2008
2009     pl_yylval.ival = f;
2010     CLINE;
2011     PL_expect = x;
2012     PL_bufptr = s;
2013     PL_last_lop = PL_oldbufptr;
2014     PL_last_lop_op = (OPCODE)f;
2015 #ifdef PERL_MAD
2016     if (PL_lasttoke)
2017         goto lstop;
2018 #else
2019     if (PL_nexttoke)
2020         goto lstop;
2021 #endif
2022     if (*s == '(')
2023         return REPORT(FUNC);
2024     s = PEEKSPACE(s);
2025     if (*s == '(')
2026         return REPORT(FUNC);
2027     else {
2028         lstop:
2029         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2030             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2031         return REPORT(LSTOP);
2032     }
2033 }
2034
2035 #ifdef PERL_MAD
2036  /*
2037  * S_start_force
2038  * Sets up for an eventual force_next().  start_force(0) basically does
2039  * an unshift, while start_force(-1) does a push.  yylex removes items
2040  * on the "pop" end.
2041  */
2042
2043 STATIC void
2044 S_start_force(pTHX_ int where)
2045 {
2046     int i;
2047
2048     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
2049         where = PL_lasttoke;
2050     assert(PL_curforce < 0 || PL_curforce == where);
2051     if (PL_curforce != where) {
2052         for (i = PL_lasttoke; i > where; --i) {
2053             PL_nexttoke[i] = PL_nexttoke[i-1];
2054         }
2055         PL_lasttoke++;
2056     }
2057     if (PL_curforce < 0)        /* in case of duplicate start_force() */
2058         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2059     PL_curforce = where;
2060     if (PL_nextwhite) {
2061         if (PL_madskills)
2062             curmad('^', newSVpvs(""));
2063         CURMAD('_', PL_nextwhite);
2064     }
2065 }
2066
2067 STATIC void
2068 S_curmad(pTHX_ char slot, SV *sv)
2069 {
2070     MADPROP **where;
2071
2072     if (!sv)
2073         return;
2074     if (PL_curforce < 0)
2075         where = &PL_thismad;
2076     else
2077         where = &PL_nexttoke[PL_curforce].next_mad;
2078
2079     if (PL_faketokens)
2080         sv_setpvs(sv, "");
2081     else {
2082         if (!IN_BYTES) {
2083             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2084                 SvUTF8_on(sv);
2085             else if (PL_encoding) {
2086                 sv_recode_to_utf8(sv, PL_encoding);
2087             }
2088         }
2089     }
2090
2091     /* keep a slot open for the head of the list? */
2092     if (slot != '_' && *where && (*where)->mad_key == '^') {
2093         (*where)->mad_key = slot;
2094         sv_free(MUTABLE_SV(((*where)->mad_val)));
2095         (*where)->mad_val = (void*)sv;
2096     }
2097     else
2098         addmad(newMADsv(slot, sv), where, 0);
2099 }
2100 #else
2101 #  define start_force(where)    NOOP
2102 #  define curmad(slot, sv)      NOOP
2103 #endif
2104
2105 /*
2106  * S_force_next
2107  * When the lexer realizes it knows the next token (for instance,
2108  * it is reordering tokens for the parser) then it can call S_force_next
2109  * to know what token to return the next time the lexer is called.  Caller
2110  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2111  * and possibly PL_expect to ensure the lexer handles the token correctly.
2112  */
2113
2114 STATIC void
2115 S_force_next(pTHX_ I32 type)
2116 {
2117     dVAR;
2118 #ifdef DEBUGGING
2119     if (DEBUG_T_TEST) {
2120         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2121         tokereport(type, &NEXTVAL_NEXTTOKE);
2122     }
2123 #endif
2124 #ifdef PERL_MAD
2125     if (PL_curforce < 0)
2126         start_force(PL_lasttoke);
2127     PL_nexttoke[PL_curforce].next_type = type;
2128     if (PL_lex_state != LEX_KNOWNEXT)
2129         PL_lex_defer = PL_lex_state;
2130     PL_lex_state = LEX_KNOWNEXT;
2131     PL_lex_expect = PL_expect;
2132     PL_curforce = -1;
2133 #else
2134     PL_nexttype[PL_nexttoke] = type;
2135     PL_nexttoke++;
2136     if (PL_lex_state != LEX_KNOWNEXT) {
2137         PL_lex_defer = PL_lex_state;
2138         PL_lex_expect = PL_expect;
2139         PL_lex_state = LEX_KNOWNEXT;
2140     }
2141 #endif
2142 }
2143
2144 void
2145 Perl_yyunlex(pTHX)
2146 {
2147     int yyc = PL_parser->yychar;
2148     if (yyc != YYEMPTY) {
2149         if (yyc) {
2150             start_force(-1);
2151             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2152             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2153                 PL_lex_allbrackets--;
2154                 PL_lex_brackets--;
2155                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2156             } else if (yyc == '('/*)*/) {
2157                 PL_lex_allbrackets--;
2158                 yyc |= (2<<24);
2159             }
2160             force_next(yyc);
2161         }
2162         PL_parser->yychar = YYEMPTY;
2163     }
2164 }
2165
2166 STATIC SV *
2167 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2168 {
2169     dVAR;
2170     SV * const sv = newSVpvn_utf8(start, len,
2171                                   !IN_BYTES
2172                                   && UTF
2173                                   && !is_ascii_string((const U8*)start, len)
2174                                   && is_utf8_string((const U8*)start, len));
2175     return sv;
2176 }
2177
2178 /*
2179  * S_force_word
2180  * When the lexer knows the next thing is a word (for instance, it has
2181  * just seen -> and it knows that the next char is a word char, then
2182  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2183  * lookahead.
2184  *
2185  * Arguments:
2186  *   char *start : buffer position (must be within PL_linestr)
2187  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2188  *   int check_keyword : if true, Perl checks to make sure the word isn't
2189  *       a keyword (do this if the word is a label, e.g. goto FOO)
2190  *   int allow_pack : if true, : characters will also be allowed (require,
2191  *       use, etc. do this)
2192  *   int allow_initial_tick : used by the "sub" lexer only.
2193  */
2194
2195 STATIC char *
2196 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2197 {
2198     dVAR;
2199     char *s;
2200     STRLEN len;
2201
2202     PERL_ARGS_ASSERT_FORCE_WORD;
2203
2204     start = SKIPSPACE1(start);
2205     s = start;
2206     if (isIDFIRST_lazy_if(s,UTF) ||
2207         (allow_pack && *s == ':') )
2208     {
2209         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2210         if (check_keyword) {
2211           char *s2 = PL_tokenbuf;
2212           if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2213             s2 += 6, len -= 6;
2214           if (keyword(s2, len, 0))
2215             return start;
2216         }
2217         start_force(PL_curforce);
2218         if (PL_madskills)
2219             curmad('X', newSVpvn(start,s-start));
2220         if (token == METHOD) {
2221             s = SKIPSPACE1(s);
2222             if (*s == '(')
2223                 PL_expect = XTERM;
2224             else {
2225                 PL_expect = XOPERATOR;
2226             }
2227         }
2228         if (PL_madskills)
2229             curmad('g', newSVpvs( "forced" ));
2230         NEXTVAL_NEXTTOKE.opval
2231             = (OP*)newSVOP(OP_CONST,0,
2232                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2233         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2234         force_next(token);
2235     }
2236     return s;
2237 }
2238
2239 /*
2240  * S_force_ident
2241  * Called when the lexer wants $foo *foo &foo etc, but the program
2242  * text only contains the "foo" portion.  The first argument is a pointer
2243  * to the "foo", and the second argument is the type symbol to prefix.
2244  * Forces the next token to be a "WORD".
2245  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2246  */
2247
2248 STATIC void
2249 S_force_ident(pTHX_ const char *s, int kind)
2250 {
2251     dVAR;
2252
2253     PERL_ARGS_ASSERT_FORCE_IDENT;
2254
2255     if (s[0]) {
2256         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2257         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2258                                                                 UTF ? SVf_UTF8 : 0));
2259         start_force(PL_curforce);
2260         NEXTVAL_NEXTTOKE.opval = o;
2261         force_next(WORD);
2262         if (kind) {
2263             o->op_private = OPpCONST_ENTERED;
2264             /* XXX see note in pp_entereval() for why we forgo typo
2265                warnings if the symbol must be introduced in an eval.
2266                GSAR 96-10-12 */
2267             gv_fetchpvn_flags(s, len,
2268                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2269                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2270                               kind == '$' ? SVt_PV :
2271                               kind == '@' ? SVt_PVAV :
2272                               kind == '%' ? SVt_PVHV :
2273                               SVt_PVGV
2274                               );
2275         }
2276     }
2277 }
2278
2279 static void
2280 S_force_ident_maybe_lex(pTHX_ char pit)
2281 {
2282     start_force(PL_curforce);
2283     NEXTVAL_NEXTTOKE.ival = pit;
2284     force_next('p');
2285 }
2286
2287 NV
2288 Perl_str_to_version(pTHX_ SV *sv)
2289 {
2290     NV retval = 0.0;
2291     NV nshift = 1.0;
2292     STRLEN len;
2293     const char *start = SvPV_const(sv,len);
2294     const char * const end = start + len;
2295     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2296
2297     PERL_ARGS_ASSERT_STR_TO_VERSION;
2298
2299     while (start < end) {
2300         STRLEN skip;
2301         UV n;
2302         if (utf)
2303             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2304         else {
2305             n = *(U8*)start;
2306             skip = 1;
2307         }
2308         retval += ((NV)n)/nshift;
2309         start += skip;
2310         nshift *= 1000;
2311     }
2312     return retval;
2313 }
2314
2315 /*
2316  * S_force_version
2317  * Forces the next token to be a version number.
2318  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2319  * and if "guessing" is TRUE, then no new token is created (and the caller
2320  * must use an alternative parsing method).
2321  */
2322
2323 STATIC char *
2324 S_force_version(pTHX_ char *s, int guessing)
2325 {
2326     dVAR;
2327     OP *version = NULL;
2328     char *d;
2329 #ifdef PERL_MAD
2330     I32 startoff = s - SvPVX(PL_linestr);
2331 #endif
2332
2333     PERL_ARGS_ASSERT_FORCE_VERSION;
2334
2335     s = SKIPSPACE1(s);
2336
2337     d = s;
2338     if (*d == 'v')
2339         d++;
2340     if (isDIGIT(*d)) {
2341         while (isDIGIT(*d) || *d == '_' || *d == '.')
2342             d++;
2343 #ifdef PERL_MAD
2344         if (PL_madskills) {
2345             start_force(PL_curforce);
2346             curmad('X', newSVpvn(s,d-s));
2347         }
2348 #endif
2349         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2350             SV *ver;
2351 #ifdef USE_LOCALE_NUMERIC
2352             char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2353             setlocale(LC_NUMERIC, "C");
2354 #endif
2355             s = scan_num(s, &pl_yylval);
2356 #ifdef USE_LOCALE_NUMERIC
2357             setlocale(LC_NUMERIC, loc);
2358             Safefree(loc);
2359 #endif
2360             version = pl_yylval.opval;
2361             ver = cSVOPx(version)->op_sv;
2362             if (SvPOK(ver) && !SvNIOK(ver)) {
2363                 SvUPGRADE(ver, SVt_PVNV);
2364                 SvNV_set(ver, str_to_version(ver));
2365                 SvNOK_on(ver);          /* hint that it is a version */
2366             }
2367         }
2368         else if (guessing) {
2369 #ifdef PERL_MAD
2370             if (PL_madskills) {
2371                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2372                 PL_nextwhite = 0;
2373                 s = SvPVX(PL_linestr) + startoff;
2374             }
2375 #endif
2376             return s;
2377         }
2378     }
2379
2380 #ifdef PERL_MAD
2381     if (PL_madskills && !version) {
2382         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2383         PL_nextwhite = 0;
2384         s = SvPVX(PL_linestr) + startoff;
2385     }
2386 #endif
2387     /* NOTE: The parser sees the package name and the VERSION swapped */
2388     start_force(PL_curforce);
2389     NEXTVAL_NEXTTOKE.opval = version;
2390     force_next(WORD);
2391
2392     return s;
2393 }
2394
2395 /*
2396  * S_force_strict_version
2397  * Forces the next token to be a version number using strict syntax rules.
2398  */
2399
2400 STATIC char *
2401 S_force_strict_version(pTHX_ char *s)
2402 {
2403     dVAR;
2404     OP *version = NULL;
2405 #ifdef PERL_MAD
2406     I32 startoff = s - SvPVX(PL_linestr);
2407 #endif
2408     const char *errstr = NULL;
2409
2410     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2411
2412     while (isSPACE(*s)) /* leading whitespace */
2413         s++;
2414
2415     if (is_STRICT_VERSION(s,&errstr)) {
2416         SV *ver = newSV(0);
2417         s = (char *)scan_version(s, ver, 0);
2418         version = newSVOP(OP_CONST, 0, ver);
2419     }
2420     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2421             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2422     {
2423         PL_bufptr = s;
2424         if (errstr)
2425             yyerror(errstr); /* version required */
2426         return s;
2427     }
2428
2429 #ifdef PERL_MAD
2430     if (PL_madskills && !version) {
2431         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2432         PL_nextwhite = 0;
2433         s = SvPVX(PL_linestr) + startoff;
2434     }
2435 #endif
2436     /* NOTE: The parser sees the package name and the VERSION swapped */
2437     start_force(PL_curforce);
2438     NEXTVAL_NEXTTOKE.opval = version;
2439     force_next(WORD);
2440
2441     return s;
2442 }
2443
2444 /*
2445  * S_tokeq
2446  * Tokenize a quoted string passed in as an SV.  It finds the next
2447  * chunk, up to end of string or a backslash.  It may make a new
2448  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2449  * turns \\ into \.
2450  */
2451
2452 STATIC SV *
2453 S_tokeq(pTHX_ SV *sv)
2454 {
2455     dVAR;
2456     char *s;
2457     char *send;
2458     char *d;
2459     STRLEN len = 0;
2460     SV *pv = sv;
2461
2462     PERL_ARGS_ASSERT_TOKEQ;
2463
2464     if (!SvLEN(sv))
2465         goto finish;
2466
2467     s = SvPV_force(sv, len);
2468     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2469         goto finish;
2470     send = s + len;
2471     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2472     while (s < send && !(*s == '\\' && s[1] == '\\'))
2473         s++;
2474     if (s == send)
2475         goto finish;
2476     d = s;
2477     if ( PL_hints & HINT_NEW_STRING ) {
2478         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2479     }
2480     while (s < send) {
2481         if (*s == '\\') {
2482             if (s + 1 < send && (s[1] == '\\'))
2483                 s++;            /* all that, just for this */
2484         }
2485         *d++ = *s++;
2486     }
2487     *d = '\0';
2488     SvCUR_set(sv, d - SvPVX_const(sv));
2489   finish:
2490     if ( PL_hints & HINT_NEW_STRING )
2491        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2492     return sv;
2493 }
2494
2495 /*
2496  * Now come three functions related to double-quote context,
2497  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2498  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2499  * interact with PL_lex_state, and create fake ( ... ) argument lists
2500  * to handle functions and concatenation.
2501  * For example,
2502  *   "foo\lbar"
2503  * is tokenised as
2504  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2505  */
2506
2507 /*
2508  * S_sublex_start
2509  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2510  *
2511  * Pattern matching will set PL_lex_op to the pattern-matching op to
2512  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2513  *
2514  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2515  *
2516  * Everything else becomes a FUNC.
2517  *
2518  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2519  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2520  * call to S_sublex_push().
2521  */
2522
2523 STATIC I32
2524 S_sublex_start(pTHX)
2525 {
2526     dVAR;
2527     const I32 op_type = pl_yylval.ival;
2528
2529     if (op_type == OP_NULL) {
2530         pl_yylval.opval = PL_lex_op;
2531         PL_lex_op = NULL;
2532         return THING;
2533     }
2534     if (op_type == OP_CONST || op_type == OP_READLINE) {
2535         SV *sv = tokeq(PL_lex_stuff);
2536
2537         if (SvTYPE(sv) == SVt_PVIV) {
2538             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2539             STRLEN len;
2540             const char * const p = SvPV_const(sv, len);
2541             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2542             SvREFCNT_dec(sv);
2543             sv = nsv;
2544         }
2545         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2546         PL_lex_stuff = NULL;
2547         /* Allow <FH> // "foo" */
2548         if (op_type == OP_READLINE)
2549             PL_expect = XTERMORDORDOR;
2550         return THING;
2551     }
2552     else if (op_type == OP_BACKTICK && PL_lex_op) {
2553         /* readpipe() was overridden */
2554         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2555         pl_yylval.opval = PL_lex_op;
2556         PL_lex_op = NULL;
2557         PL_lex_stuff = NULL;
2558         return THING;
2559     }
2560
2561     PL_sublex_info.super_state = PL_lex_state;
2562     PL_sublex_info.sub_inwhat = (U16)op_type;
2563     PL_sublex_info.sub_op = PL_lex_op;
2564     PL_lex_state = LEX_INTERPPUSH;
2565
2566     PL_expect = XTERM;
2567     if (PL_lex_op) {
2568         pl_yylval.opval = PL_lex_op;
2569         PL_lex_op = NULL;
2570         return PMFUNC;
2571     }
2572     else
2573         return FUNC;
2574 }
2575
2576 /*
2577  * S_sublex_push
2578  * Create a new scope to save the lexing state.  The scope will be
2579  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2580  * to the uc, lc, etc. found before.
2581  * Sets PL_lex_state to LEX_INTERPCONCAT.
2582  */
2583
2584 STATIC I32
2585 S_sublex_push(pTHX)
2586 {
2587     dVAR;
2588     LEXSHARED *shared;
2589     ENTER;
2590
2591     PL_lex_state = PL_sublex_info.super_state;
2592     SAVEBOOL(PL_lex_dojoin);
2593     SAVEI32(PL_lex_brackets);
2594     SAVEI32(PL_lex_allbrackets);
2595     SAVEI32(PL_lex_formbrack);
2596     SAVEI8(PL_lex_fakeeof);
2597     SAVEI32(PL_lex_casemods);
2598     SAVEI32(PL_lex_starts);
2599     SAVEI8(PL_lex_state);
2600     SAVESPTR(PL_lex_repl);
2601     SAVEVPTR(PL_lex_inpat);
2602     SAVEI16(PL_lex_inwhat);
2603     SAVECOPLINE(PL_curcop);
2604     SAVEPPTR(PL_bufptr);
2605     SAVEPPTR(PL_bufend);
2606     SAVEPPTR(PL_oldbufptr);
2607     SAVEPPTR(PL_oldoldbufptr);
2608     SAVEPPTR(PL_last_lop);
2609     SAVEPPTR(PL_last_uni);
2610     SAVEPPTR(PL_linestart);
2611     SAVESPTR(PL_linestr);
2612     SAVEGENERICPV(PL_lex_brackstack);
2613     SAVEGENERICPV(PL_lex_casestack);
2614     SAVEGENERICPV(PL_parser->lex_shared);
2615     SAVEBOOL(PL_parser->lex_re_reparsing);
2616
2617     /* The here-doc parser needs to be able to peek into outer lexing
2618        scopes to find the body of the here-doc.  So we put PL_linestr and
2619        PL_bufptr into lex_shared, to ‘share’ those values.
2620      */
2621     PL_parser->lex_shared->ls_linestr = PL_linestr;
2622     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2623
2624     PL_linestr = PL_lex_stuff;
2625     PL_lex_repl = PL_sublex_info.repl;
2626     PL_lex_stuff = NULL;
2627     PL_sublex_info.repl = NULL;
2628
2629     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2630         = SvPVX(PL_linestr);
2631     PL_bufend += SvCUR(PL_linestr);
2632     PL_last_lop = PL_last_uni = NULL;
2633     SAVEFREESV(PL_linestr);
2634     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2635
2636     PL_lex_dojoin = FALSE;
2637     PL_lex_brackets = PL_lex_formbrack = 0;
2638     PL_lex_allbrackets = 0;
2639     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2640     Newx(PL_lex_brackstack, 120, char);
2641     Newx(PL_lex_casestack, 12, char);
2642     PL_lex_casemods = 0;
2643     *PL_lex_casestack = '\0';
2644     PL_lex_starts = 0;
2645     PL_lex_state = LEX_INTERPCONCAT;
2646     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2647
2648     Newxz(shared, 1, LEXSHARED);
2649     shared->ls_prev = PL_parser->lex_shared;
2650     PL_parser->lex_shared = shared;
2651
2652     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2653     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2654     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2655         PL_lex_inpat = PL_sublex_info.sub_op;
2656     else
2657         PL_lex_inpat = NULL;
2658
2659     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2660     PL_in_eval &= ~EVAL_RE_REPARSING;
2661
2662     return '(';
2663 }
2664
2665 /*
2666  * S_sublex_done
2667  * Restores lexer state after a S_sublex_push.
2668  */
2669
2670 STATIC I32
2671 S_sublex_done(pTHX)
2672 {
2673     dVAR;
2674     if (!PL_lex_starts++) {
2675         SV * const sv = newSVpvs("");
2676         if (SvUTF8(PL_linestr))
2677             SvUTF8_on(sv);
2678         PL_expect = XOPERATOR;
2679         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2680         return THING;
2681     }
2682
2683     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2684         PL_lex_state = LEX_INTERPCASEMOD;
2685         return yylex();
2686     }
2687
2688     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2689     assert(PL_lex_inwhat != OP_TRANSR);
2690     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2691         PL_linestr = PL_lex_repl;
2692         PL_lex_inpat = 0;
2693         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2694         PL_bufend += SvCUR(PL_linestr);
2695         PL_last_lop = PL_last_uni = NULL;
2696         PL_lex_dojoin = FALSE;
2697         PL_lex_brackets = 0;
2698         PL_lex_allbrackets = 0;
2699         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2700         PL_lex_casemods = 0;
2701         *PL_lex_casestack = '\0';
2702         PL_lex_starts = 0;
2703         if (SvEVALED(PL_lex_repl)) {
2704             PL_lex_state = LEX_INTERPNORMAL;
2705             PL_lex_starts++;
2706             /*  we don't clear PL_lex_repl here, so that we can check later
2707                 whether this is an evalled subst; that means we rely on the
2708                 logic to ensure sublex_done() is called again only via the
2709                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2710         }
2711         else {
2712             PL_lex_state = LEX_INTERPCONCAT;
2713             PL_lex_repl = NULL;
2714         }
2715         return ',';
2716     }
2717     else {
2718 #ifdef PERL_MAD
2719         if (PL_madskills) {
2720             if (PL_thiswhite) {
2721                 if (!PL_endwhite)
2722                     PL_endwhite = newSVpvs("");
2723                 sv_catsv(PL_endwhite, PL_thiswhite);
2724                 PL_thiswhite = 0;
2725             }
2726             if (PL_thistoken)
2727                 sv_setpvs(PL_thistoken,"");
2728             else
2729                 PL_realtokenstart = -1;
2730         }
2731 #endif
2732         LEAVE;
2733         PL_bufend = SvPVX(PL_linestr);
2734         PL_bufend += SvCUR(PL_linestr);
2735         PL_expect = XOPERATOR;
2736         PL_sublex_info.sub_inwhat = 0;
2737         return ')';
2738     }
2739 }
2740
2741 PERL_STATIC_INLINE SV*
2742 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2743 {
2744     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2745      * interior, hence to the "}".  Finds what the name resolves to, returning
2746      * an SV* containing it; NULL if no valid one found */
2747
2748     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2749
2750     HV * table;
2751     SV **cvp;
2752     SV *cv;
2753     SV *rv;
2754     HV *stash;
2755     const U8* first_bad_char_loc;
2756     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2757
2758     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2759
2760     if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2761                                      e - backslash_ptr,
2762                                      &first_bad_char_loc))
2763     {
2764         /* If warnings are on, this will print a more detailed analysis of what
2765          * is wrong than the error message below */
2766         utf8n_to_uvuni(first_bad_char_loc,
2767                        e - ((char *) first_bad_char_loc),
2768                        NULL, 0);
2769
2770         /* We deliberately don't try to print the malformed character, which
2771          * might not print very well; it also may be just the first of many
2772          * malformations, so don't print what comes after it */
2773         yyerror(Perl_form(aTHX_
2774             "Malformed UTF-8 character immediately after '%.*s'",
2775             (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2776         return NULL;
2777     }
2778
2779     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2780                         /* include the <}> */
2781                         e - backslash_ptr + 1);
2782     if (! SvPOK(res)) {
2783         SvREFCNT_dec_NN(res);
2784         return NULL;
2785     }
2786
2787     /* See if the charnames handler is the Perl core's, and if so, we can skip
2788      * the validation needed for a user-supplied one, as Perl's does its own
2789      * validation. */
2790     table = GvHV(PL_hintgv);             /* ^H */
2791     cvp = hv_fetchs(table, "charnames", FALSE);
2792     if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
2793         && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
2794     {
2795         const char * const name = HvNAME(stash);
2796         if strEQ(name, "_charnames") {
2797            return res;
2798        }
2799     }
2800
2801     /* Here, it isn't Perl's charname handler.  We can't rely on a
2802      * user-supplied handler to validate the input name.  For non-ut8 input,
2803      * look to see that the first character is legal.  Then loop through the
2804      * rest checking that each is a continuation */
2805
2806     /* This code needs to be sync'ed with a regex in _charnames.pm which does
2807      * the same thing */
2808
2809     if (! UTF) {
2810         if (! isALPHAU(*s)) {
2811             goto bad_charname;
2812         }
2813         s++;
2814         while (s < e) {
2815             if (! isCHARNAME_CONT(*s)) {
2816                 goto bad_charname;
2817             }
2818             if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2819                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2820                            "A sequence of multiple spaces in a charnames "
2821                            "alias definition is deprecated");
2822             }
2823             s++;
2824         }
2825         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2826             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2827                         "Trailing white-space in a charnames alias "
2828                         "definition is deprecated");
2829         }
2830     }
2831     else {
2832         /* Similarly for utf8.  For invariants can check directly; for other
2833          * Latin1, can calculate their code point and check; otherwise  use a
2834          * swash */
2835         if (UTF8_IS_INVARIANT(*s)) {
2836             if (! isALPHAU(*s)) {
2837                 goto bad_charname;
2838             }
2839             s++;
2840         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2841             if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
2842                 goto bad_charname;
2843             }
2844             s += 2;
2845         }
2846         else {
2847             if (! PL_utf8_charname_begin) {
2848                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2849                 PL_utf8_charname_begin = _core_swash_init("utf8",
2850                                                         "_Perl_Charname_Begin",
2851                                                         &PL_sv_undef,
2852                                                         1, 0, NULL, &flags);
2853             }
2854             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2855                 goto bad_charname;
2856             }
2857             s += UTF8SKIP(s);
2858         }
2859
2860         while (s < e) {
2861             if (UTF8_IS_INVARIANT(*s)) {
2862                 if (! isCHARNAME_CONT(*s)) {
2863                     goto bad_charname;
2864                 }
2865                 if (*s == ' ' && *(s-1) == ' '
2866                  && ckWARN_d(WARN_DEPRECATED)) {
2867                     Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2868                                "A sequence of multiple spaces in a charnam"
2869                                "es alias definition is deprecated");
2870                 }
2871                 s++;
2872             }
2873             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2874                 if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
2875                                                                     *(s+1)))))
2876                 {
2877                     goto bad_charname;
2878                 }
2879                 s += 2;
2880             }
2881             else {
2882                 if (! PL_utf8_charname_continue) {
2883                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2884                     PL_utf8_charname_continue = _core_swash_init("utf8",
2885                                                 "_Perl_Charname_Continue",
2886                                                 &PL_sv_undef,
2887                                                 1, 0, NULL, &flags);
2888                 }
2889                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2890                     goto bad_charname;
2891                 }
2892                 s += UTF8SKIP(s);
2893             }
2894         }
2895         if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2896             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2897                        "Trailing white-space in a charnames alias "
2898                        "definition is deprecated");
2899         }
2900     }
2901
2902     if (SvUTF8(res)) { /* Don't accept malformed input */
2903         const U8* first_bad_char_loc;
2904         STRLEN len;
2905         const char* const str = SvPV_const(res, len);
2906         if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2907             /* If warnings are on, this will print a more detailed analysis of
2908              * what is wrong than the error message below */
2909             utf8n_to_uvuni(first_bad_char_loc,
2910                            (char *) first_bad_char_loc - str,
2911                            NULL, 0);
2912
2913             /* We deliberately don't try to print the malformed character,
2914              * which might not print very well; it also may be just the first
2915              * of many malformations, so don't print what comes after it */
2916             yyerror_pv(
2917               Perl_form(aTHX_
2918                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2919                  (int) (e - backslash_ptr + 1), backslash_ptr,
2920                  (int) ((char *) first_bad_char_loc - str), str
2921               ),
2922               SVf_UTF8);
2923             return NULL;
2924         }
2925     }
2926
2927     return res;
2928
2929   bad_charname: {
2930         int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2931
2932         /* The final %.*s makes sure that should the trailing NUL be missing
2933          * that this print won't run off the end of the string */
2934         yyerror_pv(
2935           Perl_form(aTHX_
2936             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2937             (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2938             (int)(e - s + bad_char_size), s + bad_char_size
2939           ),
2940           UTF ? SVf_UTF8 : 0);
2941         return NULL;
2942     }
2943 }
2944
2945 /*
2946   scan_const
2947
2948   Extracts the next constant part of a pattern, double-quoted string,
2949   or transliteration.  This is terrifying code.
2950
2951   For example, in parsing the double-quoted string "ab\x63$d", it would
2952   stop at the '$' and return an OP_CONST containing 'abc'.
2953
2954   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2955   processing a pattern (PL_lex_inpat is true), a transliteration
2956   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2957
2958   Returns a pointer to the character scanned up to. If this is
2959   advanced from the start pointer supplied (i.e. if anything was
2960   successfully parsed), will leave an OP_CONST for the substring scanned
2961   in pl_yylval. Caller must intuit reason for not parsing further
2962   by looking at the next characters herself.
2963
2964   In patterns:
2965     expand:
2966       \N{FOO}  => \N{U+hex_for_character_FOO}
2967       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2968
2969     pass through:
2970         all other \-char, including \N and \N{ apart from \N{ABC}
2971
2972     stops on:
2973         @ and $ where it appears to be a var, but not for $ as tail anchor
2974         \l \L \u \U \Q \E
2975         (?{  or  (??{
2976
2977
2978   In transliterations:
2979     characters are VERY literal, except for - not at the start or end
2980     of the string, which indicates a range. If the range is in bytes,
2981     scan_const expands the range to the full set of intermediate
2982     characters. If the range is in utf8, the hyphen is replaced with
2983     a certain range mark which will be handled by pmtrans() in op.c.
2984
2985   In double-quoted strings:
2986     backslashes:
2987       double-quoted style: \r and \n
2988       constants: \x31, etc.
2989       deprecated backrefs: \1 (in substitution replacements)
2990       case and quoting: \U \Q \E
2991     stops on @ and $
2992
2993   scan_const does *not* construct ops to handle interpolated strings.
2994   It stops processing as soon as it finds an embedded $ or @ variable
2995   and leaves it to the caller to work out what's going on.
2996
2997   embedded arrays (whether in pattern or not) could be:
2998       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2999
3000   $ in double-quoted strings must be the symbol of an embedded scalar.
3001
3002   $ in pattern could be $foo or could be tail anchor.  Assumption:
3003   it's a tail anchor if $ is the last thing in the string, or if it's
3004   followed by one of "()| \r\n\t"
3005
3006   \1 (backreferences) are turned into $1 in substitutions
3007
3008   The structure of the code is
3009       while (there's a character to process) {
3010           handle transliteration ranges
3011           skip regexp comments /(?#comment)/ and codes /(?{code})/
3012           skip #-initiated comments in //x patterns
3013           check for embedded arrays
3014           check for embedded scalars
3015           if (backslash) {
3016               deprecate \1 in substitution replacements
3017               handle string-changing backslashes \l \U \Q \E, etc.
3018               switch (what was escaped) {
3019                   handle \- in a transliteration (becomes a literal -)
3020                   if a pattern and not \N{, go treat as regular character
3021                   handle \132 (octal characters)
3022                   handle \x15 and \x{1234} (hex characters)
3023                   handle \N{name} (named characters, also \N{3,5} in a pattern)
3024                   handle \cV (control characters)
3025                   handle printf-style backslashes (\f, \r, \n, etc)
3026               } (end switch)
3027               continue
3028           } (end if backslash)
3029           handle regular character
3030     } (end while character to read)
3031
3032 */
3033
3034 STATIC char *
3035 S_scan_const(pTHX_ char *start)
3036 {
3037     dVAR;
3038     char *send = PL_bufend;             /* end of the constant */
3039     SV *sv = newSV(send - start);               /* sv for the constant.  See
3040                                                    note below on sizing. */
3041     char *s = start;                    /* start of the constant */
3042     char *d = SvPVX(sv);                /* destination for copies */
3043     bool dorange = FALSE;                       /* are we in a translit range? */
3044     bool didrange = FALSE;                      /* did we just finish a range? */
3045     bool in_charclass = FALSE;                  /* within /[...]/ */
3046     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
3047     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
3048                                                    to be UTF8?  But, this can
3049                                                    show as true when the source
3050                                                    isn't utf8, as for example
3051                                                    when it is entirely composed
3052                                                    of hex constants */
3053     SV *res;                            /* result from charnames */
3054
3055     /* Note on sizing:  The scanned constant is placed into sv, which is
3056      * initialized by newSV() assuming one byte of output for every byte of
3057      * input.  This routine expects newSV() to allocate an extra byte for a
3058      * trailing NUL, which this routine will append if it gets to the end of
3059      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3060      * CAPITAL LETTER A}), or more output than input if the constant ends up
3061      * recoded to utf8, but each time a construct is found that might increase
3062      * the needed size, SvGROW() is called.  Its size parameter each time is
3063      * based on the best guess estimate at the time, namely the length used so
3064      * far, plus the length the current construct will occupy, plus room for
3065      * the trailing NUL, plus one byte for every input byte still unscanned */
3066
3067     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3068                        before set */
3069 #ifdef EBCDIC
3070     UV literal_endpoint = 0;
3071     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3072 #endif
3073
3074     PERL_ARGS_ASSERT_SCAN_CONST;
3075
3076     assert(PL_lex_inwhat != OP_TRANSR);
3077     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3078         /* If we are doing a trans and we know we want UTF8 set expectation */
3079         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3080         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3081     }
3082
3083     /* Protect sv from errors and fatal warnings. */
3084     ENTER_with_name("scan_const");
3085     SAVEFREESV(sv);
3086
3087     while (s < send || dorange) {
3088
3089         /* get transliterations out of the way (they're most literal) */
3090         if (PL_lex_inwhat == OP_TRANS) {
3091             /* expand a range A-Z to the full set of characters.  AIE! */
3092             if (dorange) {
3093                 I32 i;                          /* current expanded character */
3094                 I32 min;                        /* first character in range */
3095                 I32 max;                        /* last character in range */
3096
3097 #ifdef EBCDIC
3098                 UV uvmax = 0;
3099 #endif
3100
3101                 if (has_utf8
3102 #ifdef EBCDIC
3103                     && !native_range
3104 #endif
3105                 ) {
3106                     char * const c = (char*)utf8_hop((U8*)d, -1);
3107                     char *e = d++;
3108                     while (e-- > c)
3109                         *(e + 1) = *e;
3110                     *c = (char)UTF_TO_NATIVE(0xff);
3111                     /* mark the range as done, and continue */
3112                     dorange = FALSE;
3113                     didrange = TRUE;
3114                     continue;
3115                 }
3116
3117                 i = d - SvPVX_const(sv);                /* remember current offset */
3118 #ifdef EBCDIC
3119                 SvGROW(sv,
3120                        SvLEN(sv) + (has_utf8 ?
3121                                     (512 - UTF_CONTINUATION_MARK +
3122                                      UNISKIP(0x100))
3123                                     : 256));
3124                 /* How many two-byte within 0..255: 128 in UTF-8,
3125                  * 96 in UTF-8-mod. */
3126 #else
3127                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
3128 #endif
3129                 d = SvPVX(sv) + i;              /* refresh d after realloc */
3130 #ifdef EBCDIC
3131                 if (has_utf8) {
3132                     int j;
3133                     for (j = 0; j <= 1; j++) {
3134                         char * const c = (char*)utf8_hop((U8*)d, -1);
3135                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3136                         if (j)
3137                             min = (U8)uv;
3138                         else if (uv < 256)
3139                             max = (U8)uv;
3140                         else {
3141                             max = (U8)0xff; /* only to \xff */
3142                             uvmax = uv; /* \x{100} to uvmax */
3143                         }
3144                         d = c; /* eat endpoint chars */
3145                      }
3146                 }
3147                else {
3148 #endif
3149                    d -= 2;              /* eat the first char and the - */
3150                    min = (U8)*d;        /* first char in range */
3151                    max = (U8)d[1];      /* last char in range  */
3152 #ifdef EBCDIC
3153                }
3154 #endif
3155
3156                 if (min > max) {
3157                     Perl_croak(aTHX_
3158                                "Invalid range \"%c-%c\" in transliteration operator",
3159                                (char)min, (char)max);
3160                 }
3161
3162 #ifdef EBCDIC
3163                 if (literal_endpoint == 2 &&
3164                     ((isLOWER(min) && isLOWER(max)) ||
3165                      (isUPPER(min) && isUPPER(max)))) {
3166                     if (isLOWER(min)) {
3167                         for (i = min; i <= max; i++)
3168                             if (isLOWER(i))
3169                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
3170                     } else {
3171                         for (i = min; i <= max; i++)
3172                             if (isUPPER(i))
3173                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
3174                     }
3175                 }
3176                 else
3177 #endif
3178                     for (i = min; i <= max; i++)
3179 #ifdef EBCDIC
3180                         if (has_utf8) {
3181                             const U8 ch = (U8)NATIVE_TO_UTF(i);
3182                             if (UNI_IS_INVARIANT(ch))
3183                                 *d++ = (U8)i;
3184                             else {
3185                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
3186                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
3187                             }
3188                         }
3189                         else
3190 #endif
3191                             *d++ = (char)i;
3192
3193 #ifdef EBCDIC
3194                 if (uvmax) {
3195                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3196                     if (uvmax > 0x101)
3197                         *d++ = (char)UTF_TO_NATIVE(0xff);
3198                     if (uvmax > 0x100)
3199                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3200                 }
3201 #endif
3202
3203                 /* mark the range as done, and continue */
3204                 dorange = FALSE;
3205                 didrange = TRUE;
3206 #ifdef EBCDIC
3207                 literal_endpoint = 0;
3208 #endif
3209                 continue;
3210             }
3211
3212             /* range begins (ignore - as first or last char) */
3213             else if (*s == '-' && s+1 < send  && s != start) {
3214                 if (didrange) {
3215                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3216                 }
3217                 if (has_utf8
3218 #ifdef EBCDIC
3219                     && !native_range
3220 #endif
3221                     ) {
3222                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
3223                     s++;
3224                     continue;
3225                 }
3226                 dorange = TRUE;
3227                 s++;
3228             }
3229             else {
3230                 didrange = FALSE;
3231 #ifdef EBCDIC
3232                 literal_endpoint = 0;
3233                 native_range = TRUE;
3234 #endif
3235             }
3236         }
3237
3238         /* if we get here, we're not doing a transliteration */
3239
3240         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3241             char *s1 = s-1;
3242             int esc = 0;
3243             while (s1 >= start && *s1-- == '\\')
3244                 esc = !esc;
3245             if (!esc)
3246                 in_charclass = TRUE;
3247         }
3248
3249         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
3250             char *s1 = s-1;
3251             int esc = 0;
3252             while (s1 >= start && *s1-- == '\\')
3253                 esc = !esc;
3254             if (!esc)
3255                 in_charclass = FALSE;
3256         }
3257
3258         /* skip for regexp comments /(?#comment)/, except for the last
3259          * char, which will be done separately.
3260          * Stop on (?{..}) and friends */
3261
3262         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3263             if (s[2] == '#') {
3264                 while (s+1 < send && *s != ')')
3265                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3266             }
3267             else if (!PL_lex_casemods &&
3268                      (    s[2] == '{' /* This should match regcomp.c */
3269                       || (s[2] == '?' && s[3] == '{')))
3270             {
3271                 break;
3272             }
3273         }
3274
3275         /* likewise skip #-initiated comments in //x patterns */
3276         else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3277           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3278             while (s+1 < send && *s != '\n')
3279                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3280         }
3281
3282         /* no further processing of single-quoted regex */
3283         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3284             goto default_action;
3285
3286         /* check for embedded arrays
3287            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3288            */
3289         else if (*s == '@' && s[1]) {
3290             if (isWORDCHAR_lazy_if(s+1,UTF))
3291                 break;
3292             if (strchr(":'{$", s[1]))
3293                 break;
3294             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3295                 break; /* in regexp, neither @+ nor @- are interpolated */
3296         }
3297
3298         /* check for embedded scalars.  only stop if we're sure it's a
3299            variable.
3300         */
3301         else if (*s == '$') {
3302             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3303                 break;
3304             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3305                 if (s[1] == '\\') {
3306                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3307                                    "Possible unintended interpolation of $\\ in regex");
3308                 }
3309                 break;          /* in regexp, $ might be tail anchor */
3310             }
3311         }
3312
3313         /* End of else if chain - OP_TRANS rejoin rest */
3314
3315         /* backslashes */
3316         if (*s == '\\' && s+1 < send) {
3317             char* e;    /* Can be used for ending '}', etc. */
3318
3319             s++;
3320
3321             /* warn on \1 - \9 in substitution replacements, but note that \11
3322              * is an octal; and \19 is \1 followed by '9' */
3323             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3324                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3325             {
3326                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3327                 *--s = '$';
3328                 break;
3329             }
3330
3331             /* string-change backslash escapes */
3332             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3333                 --s;
3334                 break;
3335             }
3336             /* In a pattern, process \N, but skip any other backslash escapes.
3337              * This is because we don't want to translate an escape sequence
3338              * into a meta symbol and have the regex compiler use the meta
3339              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3340              * in spite of this, we do have to process \N here while the proper
3341              * charnames handler is in scope.  See bugs #56444 and #62056.
3342              * There is a complication because \N in a pattern may also stand
3343              * for 'match a non-nl', and not mean a charname, in which case its
3344              * processing should be deferred to the regex compiler.  To be a
3345              * charname it must be followed immediately by a '{', and not look
3346              * like \N followed by a curly quantifier, i.e., not something like
3347              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3348              * quantifier */
3349             else if (PL_lex_inpat
3350                     && (*s != 'N'
3351                         || s[1] != '{'
3352                         || regcurly(s + 1, FALSE)))
3353             {
3354                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3355                 goto default_action;
3356             }
3357
3358             switch (*s) {
3359
3360             /* quoted - in transliterations */
3361             case '-':
3362                 if (PL_lex_inwhat == OP_TRANS) {
3363                     *d++ = *s++;
3364                     continue;
3365                 }
3366                 /* FALL THROUGH */
3367             default:
3368                 {
3369                     if ((isALPHANUMERIC(*s)))
3370                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3371                                        "Unrecognized escape \\%c passed through",
3372                                        *s);
3373                     /* default action is to copy the quoted character */
3374                     goto default_action;
3375                 }
3376
3377             /* eg. \132 indicates the octal constant 0132 */
3378             case '0': case '1': case '2': case '3':
3379             case '4': case '5': case '6': case '7':
3380                 {
3381                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3382                     STRLEN len = 3;
3383                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3384                     s += len;
3385                     if (len < 3 && s < send && isDIGIT(*s)
3386                         && ckWARN(WARN_MISC))
3387                     {
3388                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3389                                     "%s", form_short_octal_warning(s, len));
3390                     }
3391                 }
3392                 goto NUM_ESCAPE_INSERT;
3393
3394             /* eg. \o{24} indicates the octal constant \024 */
3395             case 'o':
3396                 {
3397                     const char* error;
3398
3399                     bool valid = grok_bslash_o(&s, &uv, &error,
3400                                                TRUE, /* Output warning */
3401                                                FALSE, /* Not strict */
3402                                                TRUE, /* Output warnings for
3403                                                          non-portables */
3404                                                UTF);
3405                     if (! valid) {
3406                         yyerror(error);
3407                         continue;
3408                     }
3409                     goto NUM_ESCAPE_INSERT;
3410                 }
3411
3412             /* eg. \x24 indicates the hex constant 0x24 */
3413             case 'x':
3414                 {
3415                     const char* error;
3416
3417                     bool valid = grok_bslash_x(&s, &uv, &error,
3418                                                TRUE, /* Output warning */
3419                                                FALSE, /* Not strict */
3420                                                TRUE,  /* Output warnings for
3421                                                          non-portables */
3422                                                UTF);
3423                     if (! valid) {
3424                         yyerror(error);
3425                         continue;
3426                     }
3427                 }
3428
3429               NUM_ESCAPE_INSERT:
3430                 /* Insert oct or hex escaped character.  There will always be
3431                  * enough room in sv since such escapes will be longer than any
3432                  * UTF-8 sequence they can end up as, except if they force us
3433                  * to recode the rest of the string into utf8 */
3434
3435                 /* Here uv is the ordinal of the next character being added in
3436                  * unicode (converted from native). */
3437                 if (!UNI_IS_INVARIANT(uv)) {
3438                     if (!has_utf8 && uv > 255) {
3439                         /* Might need to recode whatever we have accumulated so
3440                          * far if it contains any chars variant in utf8 or
3441                          * utf-ebcdic. */
3442
3443                         SvCUR_set(sv, d - SvPVX_const(sv));
3444                         SvPOK_on(sv);
3445                         *d = '\0';
3446                         /* See Note on sizing above.  */
3447                         sv_utf8_upgrade_flags_grow(sv,
3448                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3449                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3450                         d = SvPVX(sv) + SvCUR(sv);
3451                         has_utf8 = TRUE;
3452                     }
3453
3454                     if (has_utf8) {
3455                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3456                         if (PL_lex_inwhat == OP_TRANS &&
3457                             PL_sublex_info.sub_op) {
3458                             PL_sublex_info.sub_op->op_private |=
3459                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3460                                              : OPpTRANS_TO_UTF);
3461                         }
3462 #ifdef EBCDIC
3463                         if (uv > 255 && !dorange)
3464                             native_range = FALSE;
3465 #endif
3466                     }
3467                     else {
3468                         *d++ = (char)uv;
3469                     }
3470                 }
3471                 else {
3472                     *d++ = (char) uv;
3473                 }
3474                 continue;
3475
3476             case 'N':
3477                 /* In a non-pattern \N must be a named character, like \N{LATIN
3478                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3479                  * mean to match a non-newline.  For non-patterns, named
3480                  * characters are converted to their string equivalents. In
3481                  * patterns, named characters are not converted to their
3482                  * ultimate forms for the same reasons that other escapes
3483                  * aren't.  Instead, they are converted to the \N{U+...} form
3484                  * to get the value from the charnames that is in effect right
3485                  * now, while preserving the fact that it was a named character
3486                  * so that the regex compiler knows this */
3487
3488                 /* This section of code doesn't generally use the
3489                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3490                  * a close examination of this macro and determined it is a
3491                  * no-op except on utfebcdic variant characters.  Every
3492                  * character generated by this that would normally need to be
3493                  * enclosed by this macro is invariant, so the macro is not
3494                  * needed, and would complicate use of copy().  XXX There are
3495                  * other parts of this file where the macro is used
3496                  * inconsistently, but are saved by it being a no-op */
3497
3498                 /* The structure of this section of code (besides checking for
3499                  * errors and upgrading to utf8) is:
3500                  *  Further disambiguate between the two meanings of \N, and if
3501                  *      not a charname, go process it elsewhere
3502                  *  If of form \N{U+...}, pass it through if a pattern;
3503                  *      otherwise convert to utf8
3504                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3505                  *  pattern; otherwise convert to utf8 */
3506
3507                 /* Here, s points to the 'N'; the test below is guaranteed to
3508                  * succeed if we are being called on a pattern as we already
3509                  * know from a test above that the next character is a '{'.
3510                  * On a non-pattern \N must mean 'named sequence, which
3511                  * requires braces */
3512                 s++;
3513                 if (*s != '{') {
3514                     yyerror("Missing braces on \\N{}");
3515                     continue;
3516                 }
3517                 s++;
3518
3519                 /* If there is no matching '}', it is an error. */
3520                 if (! (e = strchr(s, '}'))) {
3521                     if (! PL_lex_inpat) {
3522                         yyerror("Missing right brace on \\N{}");
3523                     } else {
3524                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3525                     }
3526                     continue;
3527                 }
3528
3529                 /* Here it looks like a named character */
3530
3531                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3532                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3533                                 | PERL_SCAN_DISALLOW_PREFIX;
3534                     STRLEN len;
3535
3536                     /* For \N{U+...}, the '...' is a unicode value even on
3537                      * EBCDIC machines */
3538                     s += 2;         /* Skip to next char after the 'U+' */
3539                     len = e - s;
3540                     uv = grok_hex(s, &len, &flags, NULL);
3541                     if (len == 0 || len != (STRLEN)(e - s)) {
3542                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3543                         s = e + 1;
3544                         continue;
3545                     }
3546
3547                     if (PL_lex_inpat) {
3548
3549                         /* On non-EBCDIC platforms, pass through to the regex
3550                          * compiler unchanged.  The reason we evaluated the
3551                          * number above is to make sure there wasn't a syntax
3552                          * error.  But on EBCDIC we convert to native so
3553                          * downstream code can continue to assume it's native
3554                          */
3555                         s -= 5;     /* Include the '\N{U+' */
3556 #ifdef EBCDIC
3557                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3558                                                                and the \0 */
3559                                     "\\N{U+%X}",
3560                                     (unsigned int) UNI_TO_NATIVE(uv));
3561 #else
3562                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3563                         d += e - s + 1;
3564 #endif
3565                     }
3566                     else {  /* Not a pattern: convert the hex to string */
3567
3568                          /* If destination is not in utf8, unconditionally
3569                           * recode it to be so.  This is because \N{} implies
3570                           * Unicode semantics, and scalars have to be in utf8
3571                           * to guarantee those semantics */
3572                         if (! has_utf8) {
3573                             SvCUR_set(sv, d - SvPVX_const(sv));
3574                             SvPOK_on(sv);
3575                             *d = '\0';
3576                             /* See Note on sizing above.  */
3577                             sv_utf8_upgrade_flags_grow(
3578                                         sv,
3579                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3580                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3581                             d = SvPVX(sv) + SvCUR(sv);
3582                             has_utf8 = TRUE;
3583                         }
3584
3585                         /* Add the string to the output */
3586                         if (UNI_IS_INVARIANT(uv)) {
3587                             *d++ = (char) uv;
3588                         }
3589                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3590                     }
3591                 }
3592                 else /* Here is \N{NAME} but not \N{U+...}. */
3593                      if ((res = get_and_check_backslash_N_name(s, e)))
3594                 {
3595                     STRLEN len;
3596                     const char *str = SvPV_const(res, len);
3597                     if (PL_lex_inpat) {
3598
3599                         if (! len) { /* The name resolved to an empty string */
3600                             Copy("\\N{}", d, 4, char);
3601                             d += 4;
3602                         }
3603                         else {
3604                             /* In order to not lose information for the regex
3605                             * compiler, pass the result in the specially made
3606                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3607                             * the code points in hex of each character
3608                             * returned by charnames */
3609
3610                             const char *str_end = str + len;
3611                             const STRLEN off = d - SvPVX_const(sv);
3612
3613                             if (! SvUTF8(res)) {
3614                                 /* For the non-UTF-8 case, we can determine the
3615                                  * exact length needed without having to parse
3616                                  * through the string.  Each character takes up
3617                                  * 2 hex digits plus either a trailing dot or
3618                                  * the "}" */
3619                                 d = off + SvGROW(sv, off
3620                                                     + 3 * len
3621                                                     + 6 /* For the "\N{U+", and
3622                                                            trailing NUL */
3623                                                     + (STRLEN)(send - e));
3624                                 Copy("\\N{U+", d, 5, char);
3625                                 d += 5;
3626                                 while (str < str_end) {
3627                                     char hex_string[4];
3628                                     my_snprintf(hex_string, sizeof(hex_string),
3629                                                 "%02X.", (U8) *str);
3630                                     Copy(hex_string, d, 3, char);
3631                                     d += 3;
3632                                     str++;
3633                                 }
3634                                 d--;    /* We will overwrite below the final
3635                                            dot with a right brace */
3636                             }
3637                             else {
3638                                 STRLEN char_length; /* cur char's byte length */
3639
3640                                 /* and the number of bytes after this is
3641                                  * translated into hex digits */
3642                                 STRLEN output_length;
3643
3644                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3645                                  * for max('U+', '.'); and 1 for NUL */
3646                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3647
3648                                 /* Get the first character of the result. */
3649                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3650                                                         len,
3651                                                         &char_length,
3652                                                         UTF8_ALLOW_ANYUV);
3653                                 /* Convert first code point to hex, including
3654                                  * the boiler plate before it.  For all these,
3655                                  * we convert to native format so that
3656                                  * downstream code can continue to assume the
3657                                  * input is native */
3658                                 output_length =
3659                                     my_snprintf(hex_string, sizeof(hex_string),
3660                                             "\\N{U+%X",
3661                                             (unsigned int) UNI_TO_NATIVE(uv));
3662
3663                                 /* Make sure there is enough space to hold it */
3664                                 d = off + SvGROW(sv, off
3665                                                     + output_length
3666                                                     + (STRLEN)(send - e)
3667                                                     + 2);       /* '}' + NUL */
3668                                 /* And output it */
3669                                 Copy(hex_string, d, output_length, char);
3670                                 d += output_length;
3671
3672                                 /* For each subsequent character, append dot and
3673                                 * its ordinal in hex */
3674                                 while ((str += char_length) < str_end) {
3675                                     const STRLEN off = d - SvPVX_const(sv);
3676                                     U32 uv = utf8n_to_uvuni((U8 *) str,
3677                                                             str_end - str,
3678                                                             &char_length,
3679                                                             UTF8_ALLOW_ANYUV);
3680                                     output_length =
3681                                         my_snprintf(hex_string,
3682                                             sizeof(hex_string),
3683                                             ".%X",
3684                                             (unsigned int) UNI_TO_NATIVE(uv));
3685
3686                                     d = off + SvGROW(sv, off
3687                                                         + output_length
3688                                                         + (STRLEN)(send - e)
3689                                                         + 2);   /* '}' +  NUL */
3690                                     Copy(hex_string, d, output_length, char);
3691                                     d += output_length;
3692                                 }
3693                             }
3694
3695                             *d++ = '}'; /* Done.  Add the trailing brace */
3696                         }
3697                     }
3698                     else { /* Here, not in a pattern.  Convert the name to a
3699                             * string. */
3700
3701                          /* If destination is not in utf8, unconditionally
3702                           * recode it to be so.  This is because \N{} implies
3703                           * Unicode semantics, and scalars have to be in utf8
3704                           * to guarantee those semantics */
3705                         if (! has_utf8) {
3706                             SvCUR_set(sv, d - SvPVX_const(sv));
3707                             SvPOK_on(sv);
3708                             *d = '\0';
3709                             /* See Note on sizing above.  */
3710                             sv_utf8_upgrade_flags_grow(sv,
3711                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3712                                                 len + (STRLEN)(send - s) + 1);
3713                             d = SvPVX(sv) + SvCUR(sv);
3714                             has_utf8 = TRUE;
3715                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3716
3717                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3718                              * set correctly here). */
3719                             const STRLEN off = d - SvPVX_const(sv);
3720                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3721                         }
3722                         Copy(str, d, len, char);
3723                         d += len;
3724                     }
3725
3726                     SvREFCNT_dec(res);
3727
3728                 } /* End \N{NAME} */
3729 #ifdef EBCDIC
3730                 if (!dorange)
3731                     native_range = FALSE; /* \N{} is defined to be Unicode */
3732 #endif
3733                 s = e + 1;  /* Point to just after the '}' */
3734                 continue;
3735
3736             /* \c is a control character */
3737             case 'c':
3738                 s++;
3739                 if (s < send) {
3740                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3741                 }
3742                 else {
3743                     yyerror("Missing control char name in \\c");
3744                 }
3745                 continue;
3746
3747             /* printf-style backslashes, formfeeds, newlines, etc */
3748             case 'b':
3749                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3750                 break;
3751             case 'n':
3752                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3753                 break;
3754             case 'r':
3755                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3756                 break;
3757             case 'f':
3758                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3759                 break;
3760             case 't':
3761                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3762                 break;
3763             case 'e':
3764                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3765                 break;
3766             case 'a':
3767                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3768                 break;
3769             } /* end switch */
3770
3771             s++;
3772             continue;
3773         } /* end if (backslash) */
3774 #ifdef EBCDIC
3775         else
3776             literal_endpoint++;
3777 #endif
3778
3779     default_action:
3780         /* If we started with encoded form, or already know we want it,
3781            then encode the next character */
3782         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3783             STRLEN len  = 1;
3784
3785
3786             /* One might think that it is wasted effort in the case of the
3787              * source being utf8 (this_utf8 == TRUE) to take the next character
3788              * in the source, convert it to an unsigned value, and then convert
3789              * it back again.  But the source has not been validated here.  The
3790              * routine that does the conversion checks for errors like
3791              * malformed utf8 */
3792
3793             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3794             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3795             if (!has_utf8) {
3796                 SvCUR_set(sv, d - SvPVX_const(sv));
3797                 SvPOK_on(sv);
3798                 *d = '\0';
3799                 /* See Note on sizing above.  */
3800                 sv_utf8_upgrade_flags_grow(sv,
3801                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3802                                         need + (STRLEN)(send - s) + 1);
3803                 d = SvPVX(sv) + SvCUR(sv);
3804                 has_utf8 = TRUE;
3805             } else if (need > len) {
3806                 /* encoded value larger than old, may need extra space (NOTE:
3807                  * SvCUR() is not set correctly here).   See Note on sizing
3808                  * above.  */
3809                 const STRLEN off = d - SvPVX_const(sv);
3810                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3811             }
3812             s += len;
3813
3814             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3815 #ifdef EBCDIC
3816             if (uv > 255 && !dorange)
3817                 native_range = FALSE;
3818 #endif
3819         }
3820         else {
3821             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3822         }
3823     } /* while loop to process each character */
3824
3825     /* terminate the string and set up the sv */
3826     *d = '\0';
3827     SvCUR_set(sv, d - SvPVX_const(sv));
3828     if (SvCUR(sv) >= SvLEN(sv))
3829         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3830                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3831
3832     SvPOK_on(sv);
3833     if (PL_encoding && !has_utf8) {
3834         sv_recode_to_utf8(sv, PL_encoding);
3835         if (SvUTF8(sv))
3836             has_utf8 = TRUE;
3837     }
3838     if (has_utf8) {
3839         SvUTF8_on(sv);
3840         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3841             PL_sublex_info.sub_op->op_private |=
3842                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3843         }
3844     }
3845
3846     /* shrink the sv if we allocated more than we used */
3847     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3848         SvPV_shrink_to_cur(sv);
3849     }
3850
3851     /* return the substring (via pl_yylval) only if we parsed anything */
3852     if (s > PL_bufptr) {
3853         SvREFCNT_inc_simple_void_NN(sv);
3854         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3855             && ! PL_parser->lex_re_reparsing)
3856         {
3857             const char *const key = PL_lex_inpat ? "qr" : "q";
3858             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3859             const char *type;
3860             STRLEN typelen;
3861
3862             if (PL_lex_inwhat == OP_TRANS) {
3863                 type = "tr";
3864                 typelen = 2;
3865             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3866                 type = "s";
3867                 typelen = 1;
3868             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3869                 type = "q";
3870                 typelen = 1;
3871             } else  {
3872                 type = "qq";
3873                 typelen = 2;
3874             }
3875
3876             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3877                                 type, typelen);
3878         }
3879         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3880     }
3881     LEAVE_with_name("scan_const");
3882     return s;
3883 }
3884
3885 /* S_intuit_more
3886  * Returns TRUE if there's more to the expression (e.g., a subscript),
3887  * FALSE otherwise.
3888  *
3889  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3890  *
3891  * ->[ and ->{ return TRUE
3892  * { and [ outside a pattern are always subscripts, so return TRUE
3893  * if we're outside a pattern and it's not { or [, then return FALSE
3894  * if we're in a pattern and the first char is a {
3895  *   {4,5} (any digits around the comma) returns FALSE
3896  * if we're in a pattern and the first char is a [
3897  *   [] returns FALSE
3898  *   [SOMETHING] has a funky algorithm to decide whether it's a
3899  *      character class or not.  It has to deal with things like
3900  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3901  * anything else returns TRUE
3902  */
3903
3904 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3905
3906 STATIC int
3907 S_intuit_more(pTHX_ char *s)
3908 {
3909     dVAR;
3910
3911     PERL_ARGS_ASSERT_INTUIT_MORE;
3912
3913     if (PL_lex_brackets)
3914         return TRUE;
3915     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3916         return TRUE;
3917     if (*s != '{' && *s != '[')
3918         return FALSE;
3919     if (!PL_lex_inpat)
3920         return TRUE;
3921
3922     /* In a pattern, so maybe we have {n,m}. */
3923     if (*s == '{') {
3924         if (regcurly(s, FALSE)) {
3925             return FALSE;
3926         }
3927         return TRUE;
3928     }
3929
3930     /* On the other hand, maybe we have a character class */
3931
3932     s++;
3933     if (*s == ']' || *s == '^')
3934         return FALSE;
3935     else {
3936         /* this is terrifying, and it works */
3937         int weight;
3938         char seen[256];
3939         const char * const send = strchr(s,']');
3940         unsigned char un_char, last_un_char;
3941         char tmpbuf[sizeof PL_tokenbuf * 4];
3942
3943         if (!send)              /* has to be an expression */
3944             return TRUE;
3945         weight = 2;             /* let's weigh the evidence */
3946
3947         if (*s == '$')
3948             weight -= 3;
3949         else if (isDIGIT(*s)) {
3950             if (s[1] != ']') {
3951                 if (isDIGIT(s[1]) && s[2] == ']')
3952                     weight -= 10;
3953             }
3954             else
3955                 weight -= 100;
3956         }
3957         Zero(seen,256,char);
3958         un_char = 255;
3959         for (; s < send; s++) {
3960             last_un_char = un_char;
3961             un_char = (unsigned char)*s;
3962             switch (*s) {
3963             case '@':
3964             case '&':
3965             case '$':
3966                 weight -= seen[un_char] * 10;
3967                 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3968                     int len;
3969                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3970                     len = (int)strlen(tmpbuf);
3971                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3972                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3973                         weight -= 100;
3974                     else
3975                         weight -= 10;
3976                 }
3977                 else if (*s == '$' && s[1] &&
3978                   strchr("[#!%*<>()-=",s[1])) {
3979                     if (/*{*/ strchr("])} =",s[2]))
3980                         weight -= 10;
3981                     else
3982                         weight -= 1;
3983                 }
3984                 break;
3985             case '\\':
3986                 un_char = 254;
3987                 if (s[1]) {
3988                     if (strchr("wds]",s[1]))
3989                         weight += 100;
3990                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3991                         weight += 1;
3992                     else if (strchr("rnftbxcav",s[1]))
3993                         weight += 40;
3994                     else if (isDIGIT(s[1])) {
3995                         weight += 40;
3996                         while (s[1] && isDIGIT(s[1]))
3997                             s++;
3998                     }
3999                 }
4000                 else
4001                     weight += 100;
4002                 break;
4003             case '-':
4004                 if (s[1] == '\\')
4005                     weight += 50;
4006                 if (strchr("aA01! ",last_un_char))
4007                     weight += 30;
4008                 if (strchr("zZ79~",s[1]))
4009                     weight += 30;
4010                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4011                     weight -= 5;        /* cope with negative subscript */
4012                 break;
4013             default:
4014                 if (!isWORDCHAR(last_un_char)
4015                     && !(last_un_char == '$' || last_un_char == '@'
4016                          || last_un_char == '&')
4017                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4018                     char *d = tmpbuf;
4019                     while (isALPHA(*s))
4020                         *d++ = *s++;
4021                     *d = '\0';
4022                     if (keyword(tmpbuf, d - tmpbuf, 0))
4023                         weight -= 150;
4024                 }
4025                 if (un_char == last_un_char + 1)
4026                     weight += 5;
4027                 weight -= seen[un_char];
4028                 break;
4029             }
4030             seen[un_char]++;
4031         }
4032         if (weight >= 0)        /* probably a character class */
4033             return FALSE;
4034     }
4035
4036     return TRUE;
4037 }
4038
4039 /*
4040  * S_intuit_method
4041  *
4042  * Does all the checking to disambiguate
4043  *   foo bar
4044  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4045  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4046  *
4047  * First argument is the stuff after the first token, e.g. "bar".
4048  *
4049  * Not a method if foo is a filehandle.
4050  * Not a method if foo is a subroutine prototyped to take a filehandle.
4051  * Not a method if it's really "Foo $bar"
4052  * Method if it's "foo $bar"
4053  * Not a method if it's really "print foo $bar"
4054  * Method if it's really "foo package::" (interpreted as package->foo)
4055  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4056  * Not a method if bar is a filehandle or package, but is quoted with
4057  *   =>
4058  */
4059
4060 STATIC int
4061 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4062 {
4063     dVAR;
4064     char *s = start + (*start == '$');
4065     char tmpbuf[sizeof PL_tokenbuf];
4066     STRLEN len;
4067     GV* indirgv;
4068 #ifdef PERL_MAD
4069     int soff;
4070 #endif
4071
4072     PERL_ARGS_ASSERT_INTUIT_METHOD;
4073
4074     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4075             return 0;
4076     if (cv && SvPOK(cv)) {
4077         const char *proto = CvPROTO(cv);
4078         if (proto) {
4079             while (*proto && (isSPACE(*proto) || *proto == ';'))
4080                 proto++;
4081             if (*proto == '*')
4082                 return 0;
4083         }
4084     }
4085
4086     if (*start == '$') {
4087         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4088                 isUPPER(*PL_tokenbuf))
4089             return 0;
4090 #ifdef PERL_MAD
4091         len = start - SvPVX(PL_linestr);
4092 #endif
4093         s = PEEKSPACE(s);
4094 #ifdef PERL_MAD
4095         start = SvPVX(PL_linestr) + len;
4096 #endif
4097         PL_bufptr = start;
4098         PL_expect = XREF;
4099         return *s == '(' ? FUNCMETH : METHOD;
4100     }
4101
4102     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4103     /* start is the beginning of the possible filehandle/object,
4104      * and s is the end of it
4105      * tmpbuf is a copy of it (but with single quotes as double colons)
4106      */
4107
4108     if (!keyword(tmpbuf, len, 0)) {
4109         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4110             len -= 2;
4111             tmpbuf[len] = '\0';
4112 #ifdef PERL_MAD
4113             soff = s - SvPVX(PL_linestr);
4114 #endif
4115             goto bare_package;
4116         }
4117         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4118         if (indirgv && GvCVu(indirgv))
4119             return 0;
4120         /* filehandle or package name makes it a method */
4121         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4122 #ifdef PERL_MAD
4123             soff = s - SvPVX(PL_linestr);
4124 #endif
4125             s = PEEKSPACE(s);
4126             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4127                 return 0;       /* no assumptions -- "=>" quotes bareword */
4128       bare_package:
4129             start_force(PL_curforce);
4130             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4131                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4132             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4133             if (PL_madskills)
4134                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4135                                                             ( UTF ? SVf_UTF8 : 0 )));
4136             PL_expect = XTERM;
4137             force_next(WORD);
4138             PL_bufptr = s;
4139 #ifdef PERL_MAD
4140             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4141 #endif
4142             return *s == '(' ? FUNCMETH : METHOD;
4143         }
4144     }
4145     return 0;
4146 }
4147
4148 /* Encoded script support. filter_add() effectively inserts a
4149  * 'pre-processing' function into the current source input stream.
4150  * Note that the filter function only applies to the current source file
4151  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4152  *
4153  * The datasv parameter (which may be NULL) can be used to pass
4154  * private data to this instance of the filter. The filter function
4155  * can recover the SV using the FILTER_DATA macro and use it to
4156  * store private buffers and state information.
4157  *
4158  * The supplied datasv parameter is upgraded to a PVIO type
4159  * and the IoDIRP/IoANY field is used to store the function pointer,
4160  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4161  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4162  * private use must be set using malloc'd pointers.
4163  */
4164
4165 SV *
4166 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4167 {
4168     dVAR;
4169     if (!funcp)
4170         return NULL;
4171
4172     if (!PL_parser)
4173         return NULL;
4174
4175     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4176         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4177
4178     if (!PL_rsfp_filters)
4179         PL_rsfp_filters = newAV();
4180     if (!datasv)
4181         datasv = newSV(0);
4182     SvUPGRADE(datasv, SVt_PVIO);
4183     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4184     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4185     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4186                           FPTR2DPTR(void *, IoANY(datasv)),
4187                           SvPV_nolen(datasv)));
4188     av_unshift(PL_rsfp_filters, 1);
4189     av_store(PL_rsfp_filters, 0, datasv) ;
4190     if (
4191         !PL_parser->filtered
4192      && PL_parser->lex_flags & LEX_EVALBYTES
4193      && PL_bufptr < PL_bufend
4194     ) {
4195         const char *s = PL_bufptr;
4196         while (s < PL_bufend) {
4197             if (*s == '\n') {
4198                 SV *linestr = PL_parser->linestr;
4199                 char *buf = SvPVX(linestr);
4200                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4201                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4202                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4203                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4204                 STRLEN const last_uni_pos =
4205                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4206                 STRLEN const last_lop_pos =
4207                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4208                 av_push(PL_rsfp_filters, linestr);
4209                 PL_parser->linestr =
4210                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4211                 buf = SvPVX(PL_parser->linestr);
4212                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4213                 PL_parser->bufptr = buf + bufptr_pos;
4214                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4215                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4216                 PL_parser->linestart = buf + linestart_pos;
4217                 if (PL_parser->last_uni)
4218                     PL_parser->last_uni = buf + last_uni_pos;
4219                 if (PL_parser->last_lop)
4220                     PL_parser->last_lop = buf + last_lop_pos;
4221                 SvLEN(linestr) = SvCUR(linestr);
4222                 SvCUR(linestr) = s-SvPVX(linestr);
4223                 PL_parser->filtered = 1;
4224                 break;
4225             }
4226             s++;
4227         }
4228     }
4229     return(datasv);
4230 }
4231
4232
4233 /* Delete most recently added instance of this filter function. */
4234 void
4235 Perl_filter_del(pTHX_ filter_t funcp)
4236 {
4237     dVAR;
4238     SV *datasv;
4239
4240     PERL_ARGS_ASSERT_FILTER_DEL;
4241
4242 #ifdef DEBUGGING
4243     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4244                           FPTR2DPTR(void*, funcp)));
4245 #endif
4246     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4247         return;
4248     /* if filter is on top of stack (usual case) just pop it off */
4249     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4250     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4251         sv_free(av_pop(PL_rsfp_filters));
4252
4253         return;
4254     }
4255     /* we need to search for the correct entry and clear it     */
4256     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4257 }
4258
4259
4260 /* Invoke the idxth filter function for the current rsfp.        */
4261 /* maxlen 0 = read one text line */
4262 I32
4263 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4264 {
4265     dVAR;
4266     filter_t funcp;
4267     SV *datasv = NULL;
4268     /* This API is bad. It should have been using unsigned int for maxlen.
4269        Not sure if we want to change the API, but if not we should sanity
4270        check the value here.  */
4271     unsigned int correct_length
4272         = maxlen < 0 ?
4273 #ifdef PERL_MICRO
4274         0x7FFFFFFF
4275 #else
4276         INT_MAX
4277 #endif
4278         : maxlen;
4279
4280     PERL_ARGS_ASSERT_FILTER_READ;
4281
4282     if (!PL_parser || !PL_rsfp_filters)
4283         return -1;
4284     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4285         /* Provide a default input filter to make life easy.    */
4286         /* Note that we append to the line. This is handy.      */
4287         DEBUG_P(PerlIO_printf(Perl_debug_log,
4288                               "filter_read %d: from rsfp\n", idx));
4289         if (correct_length) {
4290             /* Want a block */
4291             int len ;
4292             const int old_len = SvCUR(buf_sv);
4293
4294             /* ensure buf_sv is large enough */
4295             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4296             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4297                                    correct_length)) <= 0) {
4298                 if (PerlIO_error(PL_rsfp))
4299                     return -1;          /* error */
4300                 else
4301                     return 0 ;          /* end of file */
4302             }
4303             SvCUR_set(buf_sv, old_len + len) ;
4304             SvPVX(buf_sv)[old_len + len] = '\0';
4305         } else {
4306             /* Want a line */
4307             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4308                 if (PerlIO_error(PL_rsfp))
4309                     return -1;          /* error */
4310                 else
4311                     return 0 ;          /* end of file */
4312             }
4313         }
4314         return SvCUR(buf_sv);
4315     }
4316     /* Skip this filter slot if filter has been deleted */
4317     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4318         DEBUG_P(PerlIO_printf(Perl_debug_log,
4319                               "filter_read %d: skipped (filter deleted)\n",
4320                               idx));
4321         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4322     }
4323     if (SvTYPE(datasv) != SVt_PVIO) {
4324         if (correct_length) {
4325             /* Want a block */
4326             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4327             if (!remainder) return 0; /* eof */
4328             if (correct_length > remainder) correct_length = remainder;
4329             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4330             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4331         } else {
4332             /* Want a line */
4333             const char *s = SvEND(datasv);
4334             const char *send = SvPVX(datasv) + SvLEN(datasv);
4335             while (s < send) {
4336                 if (*s == '\n') {
4337                     s++;
4338                     break;
4339                 }
4340                 s++;
4341             }
4342             if (s == send) return 0; /* eof */
4343             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4344             SvCUR_set(datasv, s-SvPVX(datasv));
4345         }
4346         return SvCUR(buf_sv);
4347     }
4348     /* Get function pointer hidden within datasv        */
4349     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4350     DEBUG_P(PerlIO_printf(Perl_debug_log,
4351                           "filter_read %d: via function %p (%s)\n",
4352                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4353     /* Call function. The function is expected to       */
4354     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4355     /* Return: <0:error, =0:eof, >0:not eof             */
4356     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4357 }
4358
4359 STATIC char *
4360 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4361 {
4362     dVAR;
4363
4364     PERL_ARGS_ASSERT_FILTER_GETS;
4365
4366 #ifdef PERL_CR_FILTER
4367     if (!PL_rsfp_filters) {
4368         filter_add(S_cr_textfilter,NULL);
4369     }
4370 #endif
4371     if (PL_rsfp_filters) {
4372         if (!append)
4373             SvCUR_set(sv, 0);   /* start with empty line        */
4374         if (FILTER_READ(0, sv, 0) > 0)
4375             return ( SvPVX(sv) ) ;
4376         else
4377             return NULL ;
4378     }
4379     else
4380         return (sv_gets(sv, PL_rsfp, append));
4381 }
4382
4383 STATIC HV *
4384 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4385 {
4386     dVAR;
4387     GV *gv;
4388
4389     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4390
4391     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4392         return PL_curstash;
4393
4394     if (len > 2 &&
4395         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4396         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4397     {
4398         return GvHV(gv);                        /* Foo:: */
4399     }
4400
4401     /* use constant CLASS => 'MyClass' */
4402     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4403     if (gv && GvCV(gv)) {
4404         SV * const sv = cv_const_sv(GvCV(gv));
4405         if (sv)
4406             pkgname = SvPV_const(sv, len);
4407     }
4408
4409     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4410 }
4411
4412 /*
4413  * S_readpipe_override
4414  * Check whether readpipe() is overridden, and generates the appropriate
4415  * optree, provided sublex_start() is called afterwards.
4416  */
4417 STATIC void
4418 S_readpipe_override(pTHX)
4419 {
4420     GV **gvp;
4421     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4422     pl_yylval.ival = OP_BACKTICK;
4423     if ((gv_readpipe
4424                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4425             ||
4426             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4427              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4428              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4429     {
4430         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4431             op_append_elem(OP_LIST,
4432                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4433                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4434     }
4435 }
4436
4437 #ifdef PERL_MAD
4438  /*
4439  * Perl_madlex
4440  * The intent of this yylex wrapper is to minimize the changes to the
4441  * tokener when we aren't interested in collecting madprops.  It remains
4442  * to be seen how successful this strategy will be...
4443  */
4444
4445 int
4446 Perl_madlex(pTHX)
4447 {
4448     int optype;
4449     char *s = PL_bufptr;
4450
4451     /* make sure PL_thiswhite is initialized */
4452     PL_thiswhite = 0;
4453     PL_thismad = 0;
4454
4455     /* previous token ate up our whitespace? */
4456     if (!PL_lasttoke && PL_nextwhite) {
4457         PL_thiswhite = PL_nextwhite;
4458         PL_nextwhite = 0;
4459     }
4460
4461     /* isolate the token, and figure out where it is without whitespace */
4462     PL_realtokenstart = -1;
4463     PL_thistoken = 0;
4464     optype = yylex();
4465     s = PL_bufptr;
4466     assert(PL_curforce < 0);
4467
4468     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4469         if (!PL_thistoken) {
4470             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4471                 PL_thistoken = newSVpvs("");
4472             else {
4473                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4474                 PL_thistoken = newSVpvn(tstart, s - tstart);
4475             }
4476         }
4477         if (PL_thismad) /* install head */
4478             CURMAD('X', PL_thistoken);
4479     }
4480
4481     /* last whitespace of a sublex? */
4482     if (optype == ')' && PL_endwhite) {
4483         CURMAD('X', PL_endwhite);
4484     }
4485
4486     if (!PL_thismad) {
4487
4488         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4489         if (!PL_thiswhite && !PL_endwhite && !optype) {
4490             sv_free(PL_thistoken);
4491             PL_thistoken = 0;
4492             return 0;
4493         }
4494
4495         /* put off final whitespace till peg */
4496         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4497             PL_nextwhite = PL_thiswhite;
4498             PL_thiswhite = 0;
4499         }
4500         else if (PL_thisopen) {
4501             CURMAD('q', PL_thisopen);
4502             if (PL_thistoken)
4503                 sv_free(PL_thistoken);
4504             PL_thistoken = 0;
4505         }
4506         else {
4507             /* Store actual token text as madprop X */
4508             CURMAD('X', PL_thistoken);
4509         }
4510
4511         if (PL_thiswhite) {
4512             /* add preceding whitespace as madprop _ */
4513             CURMAD('_', PL_thiswhite);
4514         }
4515
4516         if (PL_thisstuff) {
4517             /* add quoted material as madprop = */
4518             CURMAD('=', PL_thisstuff);
4519         }
4520
4521         if (PL_thisclose) {
4522             /* add terminating quote as madprop Q */
4523             CURMAD('Q', PL_thisclose);
4524         }
4525     }
4526
4527     /* special processing based on optype */
4528
4529     switch (optype) {
4530
4531     /* opval doesn't need a TOKEN since it can already store mp */
4532     case WORD:
4533     case METHOD:
4534     case FUNCMETH:
4535     case THING:
4536     case PMFUNC:
4537     case PRIVATEREF:
4538     case FUNC0SUB:
4539     case UNIOPSUB:
4540     case LSTOPSUB:
4541         if (pl_yylval.opval)
4542             append_madprops(PL_thismad, pl_yylval.opval, 0);
4543         PL_thismad = 0;
4544         return optype;
4545
4546     /* fake EOF */
4547     case 0:
4548         optype = PEG;
4549         if (PL_endwhite) {
4550             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4551             PL_endwhite = 0;
4552         }
4553         break;
4554
4555     /* pval */
4556     case LABEL:
4557         break;
4558
4559     case ']':
4560     case '}':
4561         if (PL_faketokens)
4562             break;
4563         /* remember any fake bracket that lexer is about to discard */
4564         if (PL_lex_brackets == 1 &&
4565             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4566         {
4567             s = PL_bufptr;
4568             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4569                 s++;
4570             if (*s == '}') {
4571                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4572                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4573                 PL_thiswhite = 0;
4574                 PL_bufptr = s - 1;
4575                 break;  /* don't bother looking for trailing comment */
4576             }
4577             else
4578                 s = PL_bufptr;
4579         }
4580         if (optype == ']')
4581             break;
4582         /* FALLTHROUGH */
4583
4584     /* attach a trailing comment to its statement instead of next token */
4585     case ';':
4586         if (PL_faketokens)
4587             break;
4588         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4589             s = PL_bufptr;
4590             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4591                 s++;
4592             if (*s == '\n' || *s == '#') {
4593                 while (s < PL_bufend && *s != '\n')
4594                     s++;
4595                 if (s < PL_bufend)
4596                     s++;
4597                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4598                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4599                 PL_thiswhite = 0;
4600                 PL_bufptr = s;
4601             }
4602         }
4603         break;
4604
4605     /* ival */
4606     default:
4607         break;
4608
4609     }
4610
4611     /* Create new token struct.  Note: opvals return early above. */
4612     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4613     PL_thismad = 0;
4614     return optype;
4615 }
4616 #endif
4617
4618 STATIC char *
4619 S_tokenize_use(pTHX_ int is_use, char *s) {
4620     dVAR;
4621
4622     PERL_ARGS_ASSERT_TOKENIZE_USE;
4623
4624     if (PL_expect != XSTATE)
4625         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4626                     is_use ? "use" : "no"));
4627     PL_expect = XTERM;
4628     s = SKIPSPACE1(s);
4629     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4630         s = force_version(s, TRUE);
4631         if (*s == ';' || *s == '}'
4632                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4633             start_force(PL_curforce);
4634             NEXTVAL_NEXTTOKE.opval = NULL;
4635             force_next(WORD);
4636         }
4637         else if (*s == 'v') {
4638             s = force_word(s,WORD,FALSE,TRUE);
4639             s = force_version(s, FALSE);
4640         }
4641     }
4642     else {
4643         s = force_word(s,WORD,FALSE,TRUE);
4644         s = force_version(s, FALSE);
4645     }
4646     pl_yylval.ival = is_use;
4647     return s;
4648 }
4649 #ifdef DEBUGGING
4650     static const char* const exp_name[] =
4651         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4652           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4653         };
4654 #endif
4655
4656 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4657 STATIC bool
4658 S_word_takes_any_delimeter(char *p, STRLEN len)
4659 {
4660     return (len == 1 && strchr("msyq", p[0])) ||
4661            (len == 2 && (
4662             (p[0] == 't' && p[1] == 'r') ||
4663             (p[0] == 'q' && strchr("qwxr", p[1]))));
4664 }
4665
4666 /*
4667   yylex
4668
4669   Works out what to call the token just pulled out of the input
4670   stream.  The yacc parser takes care of taking the ops we return and
4671   stitching them into a tree.
4672
4673   Returns:
4674     The type of the next token
4675
4676   Structure:
4677       Switch based on the current state:
4678           - if we already built the token before, use it
4679           - if we have a case modifier in a string, deal with that
4680           - handle other cases of interpolation inside a string
4681           - scan the next line if we are inside a format
4682       In the normal state switch on the next character:
4683           - default:
4684             if alphabetic, go to key lookup
4685             unrecoginized character - croak
4686           - 0/4/26: handle end-of-line or EOF
4687           - cases for whitespace
4688           - \n and #: handle comments and line numbers
4689           - various operators, brackets and sigils
4690           - numbers
4691           - quotes
4692           - 'v': vstrings (or go to key lookup)
4693           - 'x' repetition operator (or go to key lookup)
4694           - other ASCII alphanumerics (key lookup begins here):
4695               word before => ?
4696               keyword plugin
4697               scan built-in keyword (but do nothing with it yet)
4698               check for statement label
4699               check for lexical subs
4700                   goto just_a_word if there is one
4701               see whether built-in keyword is overridden
4702               switch on keyword number:
4703                   - default: just_a_word:
4704                       not a built-in keyword; handle bareword lookup
4705                       disambiguate between method and sub call
4706                       fall back to bareword
4707                   - cases for built-in keywords
4708 */
4709
4710
4711 #ifdef __SC__
4712 #pragma segment Perl_yylex
4713 #endif
4714 int
4715 Perl_yylex(pTHX)
4716 {
4717     dVAR;
4718     char *s = PL_bufptr;
4719     char *d;
4720     STRLEN len;
4721     bool bof = FALSE;
4722     const bool saw_infix_sigil = PL_parser->saw_infix_sigil;
4723     U8 formbrack = 0;
4724     U32 fake_eof = 0;
4725
4726     /* orig_keyword, gvp, and gv are initialized here because
4727      * jump to the label just_a_word_zero can bypass their
4728      * initialization later. */
4729     I32 orig_keyword = 0;
4730     GV *gv = NULL;
4731     GV **gvp = NULL;
4732
4733     DEBUG_T( {
4734         SV* tmp = newSVpvs("");
4735         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4736             (IV)CopLINE(PL_curcop),
4737             lex_state_names[PL_lex_state],
4738             exp_name[PL_expect],
4739             pv_display(tmp, s, strlen(s), 0, 60));
4740         SvREFCNT_dec(tmp);
4741     } );
4742
4743     switch (PL_lex_state) {
4744 #ifdef COMMENTARY
4745     case LEX_NORMAL:            /* Some compilers will produce faster */
4746     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4747         break;
4748 #endif
4749
4750     /* when we've already built the next token, just pull it out of the queue */
4751     case LEX_KNOWNEXT:
4752 #ifdef PERL_MAD
4753         PL_lasttoke--;
4754         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4755         if (PL_madskills) {
4756             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4757             PL_nexttoke[PL_lasttoke].next_mad = 0;
4758             if (PL_thismad && PL_thismad->mad_key == '_') {
4759                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4760                 PL_thismad->mad_val = 0;
4761                 mad_free(PL_thismad);
4762                 PL_thismad = 0;
4763             }
4764         }
4765         if (!PL_lasttoke) {
4766             PL_lex_state = PL_lex_defer;
4767             PL_expect = PL_lex_expect;
4768             PL_lex_defer = LEX_NORMAL;
4769             if (!PL_nexttoke[PL_lasttoke].next_type)
4770                 return yylex();
4771         }
4772 #else
4773         PL_nexttoke--;
4774         pl_yylval = PL_nextval[PL_nexttoke];
4775         if (!PL_nexttoke) {
4776             PL_lex_state = PL_lex_defer;
4777             PL_expect = PL_lex_expect;
4778             PL_lex_defer = LEX_NORMAL;
4779         }
4780 #endif
4781         {
4782             I32 next_type;
4783 #ifdef PERL_MAD
4784             next_type = PL_nexttoke[PL_lasttoke].next_type;
4785 #else
4786             next_type = PL_nexttype[PL_nexttoke];
4787 #endif
4788             if (next_type & (7<<24)) {
4789                 if (next_type & (1<<24)) {
4790                     if (PL_lex_brackets > 100)
4791                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4792                     PL_lex_brackstack[PL_lex_brackets++] =
4793                         (char) ((next_type >> 16) & 0xff);
4794                 }
4795                 if (next_type & (2<<24))
4796                     PL_lex_allbrackets++;
4797                 if (next_type & (4<<24))
4798                     PL_lex_allbrackets--;
4799                 next_type &= 0xffff;
4800             }
4801             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4802         }
4803
4804     /* interpolated case modifiers like \L \U, including \Q and \E.
4805        when we get here, PL_bufptr is at the \
4806     */
4807     case LEX_INTERPCASEMOD:
4808 #ifdef DEBUGGING
4809         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4810             Perl_croak(aTHX_
4811                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4812                        PL_bufptr, PL_bufend, *PL_bufptr);
4813 #endif
4814         /* handle \E or end of string */
4815         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4816             /* if at a \E */
4817             if (PL_lex_casemods) {
4818                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4819                 PL_lex_casestack[PL_lex_casemods] = '\0';
4820
4821                 if (PL_bufptr != PL_bufend
4822                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4823                         || oldmod == 'F')) {
4824                     PL_bufptr += 2;
4825                     PL_lex_state = LEX_INTERPCONCAT;
4826 #ifdef PERL_MAD
4827                     if (PL_madskills)
4828                         PL_thistoken = newSVpvs("\\E");
4829 #endif
4830                 }
4831                 PL_lex_allbrackets--;
4832                 return REPORT(')');
4833             }
4834             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4835                /* Got an unpaired \E */
4836                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4837                         "Useless use of \\E");
4838             }
4839 #ifdef PERL_MAD
4840             while (PL_bufptr != PL_bufend &&
4841               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4842                 if (PL_madskills) {
4843                   if (!PL_thiswhite)
4844                     PL_thiswhite = newSVpvs("");
4845                   sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4846                 }
4847                 PL_bufptr += 2;
4848             }
4849 #else
4850             if (PL_bufptr != PL_bufend)
4851                 PL_bufptr += 2;
4852 #endif
4853             PL_lex_state = LEX_INTERPCONCAT;
4854             return yylex();
4855         }
4856         else {
4857             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4858               "### Saw case modifier\n"); });
4859             s = PL_bufptr + 1;
4860             if (s[1] == '\\' && s[2] == 'E') {
4861 #ifdef PERL_MAD
4862                 if (PL_madskills) {
4863                   if (!PL_thiswhite)
4864                     PL_thiswhite = newSVpvs("");
4865                   sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4866                 }
4867 #endif
4868                 PL_bufptr = s + 3;
4869                 PL_lex_state = LEX_INTERPCONCAT;
4870                 return yylex();
4871             }
4872             else {
4873                 I32 tmp;
4874                 if (!PL_madskills) /* when just compiling don't need correct */
4875                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4876                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4877                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4878                     (strchr(PL_lex_casestack, 'L')
4879                         || strchr(PL_lex_casestack, 'U')
4880                         || strchr(PL_lex_casestack, 'F'))) {
4881                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4882                     PL_lex_allbrackets--;
4883                     return REPORT(')');
4884                 }
4885                 if (PL_lex_casemods > 10)
4886                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4887                 PL_lex_casestack[PL_lex_casemods++] = *s;
4888                 PL_lex_casestack[PL_lex_casemods] = '\0';
4889                 PL_lex_state = LEX_INTERPCONCAT;
4890                 start_force(PL_curforce);
4891                 NEXTVAL_NEXTTOKE.ival = 0;
4892                 force_next((2<<24)|'(');
4893                 start_force(PL_curforce);
4894                 if (*s == 'l')
4895                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4896                 else if (*s == 'u')
4897                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4898                 else if (*s == 'L')
4899                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4900                 else if (*s == 'U')
4901                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4902                 else if (*s == 'Q')
4903                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4904                 else if (*s == 'F')
4905                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4906                 else
4907                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4908                 if (PL_madskills) {
4909                     SV* const tmpsv = newSVpvs("\\ ");
4910                     /* replace the space with the character we want to escape
4911                      */
4912                     SvPVX(tmpsv)[1] = *s;
4913                     curmad('_', tmpsv);
4914                 }
4915                 PL_bufptr = s + 1;
4916             }
4917             force_next(FUNC);
4918             if (PL_lex_starts) {
4919                 s = PL_bufptr;
4920                 PL_lex_starts = 0;
4921 #ifdef PERL_MAD
4922                 if (PL_madskills) {
4923                     if (PL_thistoken)
4924                         sv_free(PL_thistoken);
4925                     PL_thistoken = newSVpvs("");
4926                 }
4927 #endif
4928                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4929                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4930                     OPERATOR(',');
4931                 else
4932                     Aop(OP_CONCAT);
4933             }
4934             else
4935                 return yylex();
4936         }
4937
4938     case LEX_INTERPPUSH:
4939         return REPORT(sublex_push());
4940
4941     case LEX_INTERPSTART:
4942         if (PL_bufptr == PL_bufend)
4943             return REPORT(sublex_done());
4944         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4945               "### Interpolated variable\n"); });
4946         PL_expect = XTERM;
4947         /* for /@a/, we leave the joining for the regex engine to do
4948          * (unless we're within \Q etc) */
4949         PL_lex_dojoin = (*PL_bufptr == '@'
4950                             && (!PL_lex_inpat || PL_lex_casemods));
4951         PL_lex_state = LEX_INTERPNORMAL;
4952         if (PL_lex_dojoin) {
4953             start_force(PL_curforce);
4954             NEXTVAL_NEXTTOKE.ival = 0;
4955             force_next(',');
4956             start_force(PL_curforce);
4957             force_ident("\"", '$');
4958             start_force(PL_curforce);
4959             NEXTVAL_NEXTTOKE.ival = 0;
4960             force_next('$');
4961             start_force(PL_curforce);
4962             NEXTVAL_NEXTTOKE.ival = 0;
4963             force_next((2<<24)|'(');
4964             start_force(PL_curforce);
4965             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4966             force_next(FUNC);
4967         }
4968         /* Convert (?{...}) and friends to 'do {...}' */
4969         if (PL_lex_inpat && *PL_bufptr == '(') {
4970             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4971             PL_bufptr += 2;
4972             if (*PL_bufptr != '{')
4973                 PL_bufptr++;
4974             start_force(PL_curforce);
4975             /* XXX probably need a CURMAD(something) here */
4976             PL_expect = XTERMBLOCK;
4977             force_next(DO);
4978         }
4979
4980         if (PL_lex_starts++) {
4981             s = PL_bufptr;
4982 #ifdef PERL_MAD
4983             if (PL_madskills) {
4984                 if (PL_thistoken)
4985                     sv_free(PL_thistoken);
4986                 PL_thistoken = newSVpvs("");
4987             }
4988 #endif
4989             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4990             if (!PL_lex_casemods && PL_lex_inpat)
4991                 OPERATOR(',');
4992             else
4993                 Aop(OP_CONCAT);
4994         }
4995         return yylex();
4996
4997     case LEX_INTERPENDMAYBE:
4998         if (intuit_more(PL_bufptr)) {
4999             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
5000             break;
5001         }
5002         /* FALL THROUGH */
5003
5004     case LEX_INTERPEND:
5005         if (PL_lex_dojoin) {
5006             PL_lex_dojoin = FALSE;
5007             PL_lex_state = LEX_INTERPCONCAT;
5008 #ifdef PERL_MAD
5009             if (PL_madskills) {
5010                 if (PL_thistoken)
5011                     sv_free(PL_thistoken);
5012                 PL_thistoken = newSVpvs("");
5013             }
5014 #endif
5015             PL_lex_allbrackets--;
5016             return REPORT(')');
5017         }
5018         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5019             && SvEVALED(PL_lex_repl))
5020         {
5021             if (PL_bufptr != PL_bufend)
5022                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5023             PL_lex_repl = NULL;
5024         }
5025         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
5026            re_eval_str.  If the here-doc body’s length equals the previous
5027            value of re_eval_start, re_eval_start will now be null.  So
5028            check re_eval_str as well. */
5029         if (PL_parser->lex_shared->re_eval_start
5030          || PL_parser->lex_shared->re_eval_str) {
5031             SV *sv;
5032             if (*PL_bufptr != ')')
5033                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5034             PL_bufptr++;
5035             /* having compiled a (?{..}) expression, return the original
5036              * text too, as a const */
5037             if (PL_parser->lex_shared->re_eval_str) {
5038                 sv = PL_parser->lex_shared->re_eval_str;
5039                 PL_parser->lex_shared->re_eval_str = NULL;
5040                 SvCUR_set(sv,
5041                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5042                 SvPV_shrink_to_cur(sv);
5043             }
5044             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5045                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5046             start_force(PL_curforce);
5047             /* XXX probably need a CURMAD(something) here */
5048             NEXTVAL_NEXTTOKE.opval =
5049                     (OP*)newSVOP(OP_CONST, 0,
5050                                  sv);
5051             force_next(THING);
5052             PL_parser->lex_shared->re_eval_start = NULL;
5053             PL_expect = XTERM;
5054             return REPORT(',');
5055         }
5056
5057         /* FALLTHROUGH */
5058     case LEX_INTERPCONCAT:
5059 #ifdef DEBUGGING
5060         if (PL_lex_brackets)
5061             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5062                        (long) PL_lex_brackets);
5063 #endif
5064         if (PL_bufptr == PL_bufend)
5065             return REPORT(sublex_done());
5066
5067         /* m'foo' still needs to be parsed for possible (?{...}) */
5068         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5069             SV *sv = newSVsv(PL_linestr);
5070             sv = tokeq(sv);
5071             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5072             s = PL_bufend;
5073         }
5074         else {
5075             s = scan_const(PL_bufptr);
5076             if (*s == '\\')
5077                 PL_lex_state = LEX_INTERPCASEMOD;
5078             else
5079                 PL_lex_state = LEX_INTERPSTART;
5080         }
5081
5082         if (s != PL_bufptr) {
5083             start_force(PL_curforce);
5084             if (PL_madskills) {
5085                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5086             }
5087             NEXTVAL_NEXTTOKE = pl_yylval;
5088             PL_expect = XTERM;
5089             force_next(THING);
5090             if (PL_lex_starts++) {
5091 #ifdef PERL_MAD
5092                 if (PL_madskills) {
5093                     if (PL_thistoken)
5094                         sv_free(PL_thistoken);
5095                     PL_thistoken = newSVpvs("");
5096                 }
5097 #endif
5098                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5099                 if (!PL_lex_casemods && PL_lex_inpat)
5100                     OPERATOR(',');
5101                 else
5102                     Aop(OP_CONCAT);
5103             }
5104             else {
5105                 PL_bufptr = s;
5106                 return yylex();
5107             }
5108         }
5109
5110         return yylex();
5111     case LEX_FORMLINE:
5112         s = scan_formline(PL_bufptr);
5113         if (!PL_lex_formbrack)
5114         {
5115             formbrack = 1;
5116             goto rightbracket;
5117         }
5118         PL_bufptr = s;
5119         return yylex();
5120     }
5121
5122     /* We really do *not* want PL_linestr ever becoming a COW. */
5123     assert (!SvIsCOW(PL_linestr));
5124     s = PL_bufptr;
5125     PL_oldoldbufptr = PL_oldbufptr;
5126     PL_oldbufptr = s;
5127     PL_parser->saw_infix_sigil = 0;
5128
5129   retry:
5130 #ifdef PERL_MAD
5131     if (PL_thistoken) {
5132         sv_free(PL_thistoken);
5133         PL_thistoken = 0;
5134     }
5135     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
5136 #endif
5137     switch (*s) {
5138     default:
5139         if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5140             goto keylookup;
5141         {
5142         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5143         const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
5144                                                     UTF8SKIP(s),
5145                                                     SVs_TEMP | SVf_UTF8),
5146                                             10, UNI_DISPLAY_ISPRINT))
5147                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5148         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5149         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5150             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5151         } else {
5152             d = PL_linestart;
5153         }
5154         *s = '\0';
5155         sv_setpv(dsv, d);
5156         if (UTF)
5157             SvUTF8_on(dsv);
5158         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
5159     }
5160     case 4:
5161     case 26:
5162         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
5163     case 0:
5164 #ifdef PERL_MAD
5165         if (PL_madskills)
5166             PL_faketokens = 0;
5167 #endif
5168         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5169             PL_last_uni = 0;
5170             PL_last_lop = 0;
5171             if (PL_lex_brackets &&
5172                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5173                 yyerror((const char *)
5174                         (PL_lex_formbrack
5175                          ? "Format not terminated"
5176                          : "Missing right curly or square bracket"));
5177             }
5178             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5179                         "### Tokener got EOF\n");
5180             } );
5181             TOKEN(0);
5182         }
5183         if (s++ < PL_bufend)
5184             goto retry;                 /* ignore stray nulls */
5185         PL_last_uni = 0;
5186         PL_last_lop = 0;
5187         if (!PL_in_eval && !PL_preambled) {
5188             PL_preambled = TRUE;
5189 #ifdef PERL_MAD
5190             if (PL_madskills)
5191                 PL_faketokens = 1;
5192 #endif
5193             if (PL_perldb) {
5194                 /* Generate a string of Perl code to load the debugger.
5195                  * If PERL5DB is set, it will return the contents of that,
5196                  * otherwise a compile-time require of perl5db.pl.  */
5197
5198                 const char * const pdb = PerlEnv_getenv("PERL5DB");
5199
5200                 if (pdb) {
5201                     sv_setpv(PL_linestr, pdb);
5202                     sv_catpvs(PL_linestr,";");
5203                 } else {
5204                     SETERRNO(0,SS_NORMAL);
5205                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5206                 }
5207             } else
5208                 sv_setpvs(PL_linestr,"");
5209             if (PL_preambleav) {
5210                 SV **svp = AvARRAY(PL_preambleav);
5211                 SV **const end = svp + AvFILLp(PL_preambleav);
5212                 while(svp <= end) {
5213                     sv_catsv(PL_linestr, *svp);
5214                     ++svp;
5215                     sv_catpvs(PL_linestr, ";");
5216                 }
5217                 sv_free(MUTABLE_SV(PL_preambleav));
5218                 PL_preambleav = NULL;
5219             }
5220             if (PL_minus_E)
5221                 sv_catpvs(PL_linestr,
5222                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5223             if (PL_minus_n || PL_minus_p) {
5224                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5225                 if (PL_minus_l)
5226                     sv_catpvs(PL_linestr,"chomp;");
5227                 if (PL_minus_a) {
5228                     if (PL_minus_F) {
5229                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5230                              || *PL_splitstr == '"')
5231                               && strchr(PL_splitstr + 1, *PL_splitstr))
5232                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5233                         else {
5234                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5235                                bytes can be used as quoting characters.  :-) */
5236                             const char *splits = PL_splitstr;
5237                             sv_catpvs(PL_linestr, "our @F=split(q\0");
5238                             do {
5239                                 /* Need to \ \s  */
5240                                 if (*splits == '\\')
5241                                     sv_catpvn(PL_linestr, splits, 1);
5242                                 sv_catpvn(PL_linestr, splits, 1);
5243                             } while (*splits++);
5244                             /* This loop will embed the trailing NUL of
5245                                PL_linestr as the last thing it does before
5246                                terminating.  */
5247                             sv_catpvs(PL_linestr, ");");
5248                         }
5249                     }
5250                     else
5251                         sv_catpvs(PL_linestr,"our @F=split(' ');");
5252                 }
5253             }
5254             sv_catpvs(PL_linestr, "\n");
5255             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5256             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5257             PL_last_lop = PL_last_uni = NULL;
5258             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5259                 update_debugger_info(PL_linestr, NULL, 0);
5260             goto retry;
5261         }
5262         do {
5263             fake_eof = 0;
5264             bof = PL_rsfp ? TRUE : FALSE;
5265             if (0) {
5266               fake_eof:
5267                 fake_eof = LEX_FAKE_EOF;
5268             }
5269             PL_bufptr = PL_bufend;
5270             COPLINE_INC_WITH_HERELINES;
5271             if (!lex_next_chunk(fake_eof)) {
5272                 CopLINE_dec(PL_curcop);
5273                 s = PL_bufptr;
5274                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5275             }
5276             CopLINE_dec(PL_curcop);
5277 #ifdef PERL_MAD
5278             if (!PL_rsfp)
5279                 PL_realtokenstart = -1;
5280 #endif
5281             s = PL_bufptr;
5282             /* If it looks like the start of a BOM or raw UTF-16,
5283              * check if it in fact is. */
5284             if (bof && PL_rsfp &&
5285                      (*s == 0 ||
5286                       *(U8*)s == 0xEF ||
5287                       *(U8*)s >= 0xFE ||
5288                       s[1] == 0)) {
5289                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5290                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5291 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5292                 /* offset may include swallowed CR */
5293                 if (!bof)
5294                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5295 #endif
5296                 if (bof) {
5297                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5298                     s = swallow_bom((U8*)s);
5299                 }
5300             }
5301             if (PL_parser->in_pod) {
5302                 /* Incest with pod. */
5303 #ifdef PERL_MAD
5304                 if (PL_madskills)
5305                     sv_catsv(PL_thiswhite, PL_linestr);
5306 #endif
5307                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5308                     sv_setpvs(PL_linestr, "");
5309                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5310                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5311                     PL_last_lop = PL_last_uni = NULL;
5312                     PL_parser->in_pod = 0;
5313                 }
5314             }
5315             if (PL_rsfp || PL_parser->filtered)
5316                 incline(s);
5317         } while (PL_parser->in_pod);
5318         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5319         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5320         PL_last_lop = PL_last_uni = NULL;
5321         if (CopLINE(PL_curcop) == 1) {
5322             while (s < PL_bufend && isSPACE(*s))
5323                 s++;
5324             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5325                 s++;
5326 #ifdef PERL_MAD
5327             if (PL_madskills)
5328                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5329 #endif
5330             d = NULL;
5331             if (!PL_in_eval) {
5332                 if (*s == '#' && *(s+1) == '!')
5333                     d = s + 2;
5334 #ifdef ALTERNATE_SHEBANG
5335                 else {
5336                     static char const as[] = ALTERNATE_SHEBANG;
5337                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5338                         d = s + (sizeof(as) - 1);
5339                 }
5340 #endif /* ALTERNATE_SHEBANG */
5341             }
5342             if (d) {
5343                 char *ipath;
5344                 char *ipathend;
5345
5346                 while (isSPACE(*d))
5347                     d++;
5348                 ipath = d;
5349                 while (*d && !isSPACE(*d))
5350                     d++;
5351                 ipathend = d;
5352
5353 #ifdef ARG_ZERO_IS_SCRIPT
5354                 if (ipathend > ipath) {
5355                     /*
5356                      * HP-UX (at least) sets argv[0] to the script name,
5357                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5358                      * at least, set argv[0] to the basename of the Perl
5359                      * interpreter. So, having found "#!", we'll set it right.
5360                      */
5361                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5362                                                     SVt_PV)); /* $^X */
5363                     assert(SvPOK(x) || SvGMAGICAL(x));
5364                     if (sv_eq(x, CopFILESV(PL_curcop))) {
5365                         sv_setpvn(x, ipath, ipathend - ipath);
5366                         SvSETMAGIC(x);
5367                     }
5368                     else {
5369                         STRLEN blen;
5370                         STRLEN llen;
5371                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5372                         const char * const lstart = SvPV_const(x,llen);
5373                         if (llen < blen) {
5374                             bstart += blen - llen;
5375                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5376                                 sv_setpvn(x, ipath, ipathend - ipath);
5377                                 SvSETMAGIC(x);
5378                             }
5379                         }
5380                     }
5381                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5382                 }
5383 #endif /* ARG_ZERO_IS_SCRIPT */
5384
5385                 /*
5386                  * Look for options.
5387                  */
5388                 d = instr(s,"perl -");
5389                 if (!d) {
5390                     d = instr(s,"perl");
5391 #if defined(DOSISH)
5392                     /* avoid getting into infinite loops when shebang
5393                      * line contains "Perl" rather than "perl" */
5394                     if (!d) {
5395                         for (d = ipathend-4; d >= ipath; --d) {
5396                             if ((*d == 'p' || *d == 'P')
5397                                 && !ibcmp(d, "perl", 4))
5398                             {
5399                                 break;
5400                             }
5401                         }
5402                         if (d < ipath)
5403                             d = NULL;
5404                     }
5405 #endif
5406                 }
5407 #ifdef ALTERNATE_SHEBANG
5408                 /*
5409                  * If the ALTERNATE_SHEBANG on this system starts with a
5410                  * character that can be part of a Perl expression, then if
5411                  * we see it but not "perl", we're probably looking at the
5412                  * start of Perl code, not a request to hand off to some
5413                  * other interpreter.  Similarly, if "perl" is there, but
5414                  * not in the first 'word' of the line, we assume the line
5415                  * contains the start of the Perl program.
5416                  */
5417                 if (d && *s != '#') {
5418                     const char *c = ipath;
5419                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5420                         c++;
5421                     if (c < d)
5422                         d = NULL;       /* "perl" not in first word; ignore */
5423                     else
5424                         *s = '#';       /* Don't try to parse shebang line */
5425                 }
5426 #endif /* ALTERNATE_SHEBANG */
5427                 if (!d &&
5428                     *s == '#' &&
5429                     ipathend > ipath &&
5430                     !PL_minus_c &&
5431                     !instr(s,"indir") &&
5432                     instr(PL_origargv[0],"perl"))
5433                 {
5434                     dVAR;
5435                     char **newargv;
5436
5437                     *ipathend = '\0';
5438                     s = ipathend + 1;
5439                     while (s < PL_bufend && isSPACE(*s))
5440                         s++;
5441                     if (s < PL_bufend) {
5442                         Newx(newargv,PL_origargc+3,char*);
5443                         newargv[1] = s;
5444                         while (s < PL_bufend && !isSPACE(*s))
5445                             s++;
5446                         *s = '\0';
5447                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5448                     }
5449                     else
5450                         newargv = PL_origargv;
5451                     newargv[0] = ipath;
5452                     PERL_FPU_PRE_EXEC
5453                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5454                     PERL_FPU_POST_EXEC
5455                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5456                 }
5457                 if (d) {
5458                     while (*d && !isSPACE(*d))
5459                         d++;
5460                     while (SPACE_OR_TAB(*d))
5461                         d++;
5462
5463                     if (*d++ == '-') {
5464                         const bool switches_done = PL_doswitches;
5465                         const U32 oldpdb = PL_perldb;
5466                         const bool oldn = PL_minus_n;
5467                         const bool oldp = PL_minus_p;
5468                         const char *d1 = d;
5469
5470                         do {
5471                             bool baduni = FALSE;
5472                             if (*d1 == 'C') {
5473                                 const char *d2 = d1 + 1;
5474                                 if (parse_unicode_opts((const char **)&d2)
5475                                     != PL_unicode)
5476                                     baduni = TRUE;
5477                             }
5478                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5479                                 const char * const m = d1;
5480                                 while (*d1 && !isSPACE(*d1))
5481                                     d1++;
5482                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5483                                       (int)(d1 - m), m);
5484                             }
5485                             d1 = moreswitches(d1);
5486                         } while (d1);
5487                         if (PL_doswitches && !switches_done) {
5488                             int argc = PL_origargc;
5489                             char **argv = PL_origargv;
5490                             do {
5491                                 argc--,argv++;
5492                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5493                             init_argv_symbols(argc,argv);
5494                         }
5495                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5496                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5497                               /* if we have already added "LINE: while (<>) {",
5498                                  we must not do it again */
5499                         {
5500                             sv_setpvs(PL_linestr, "");
5501                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5502                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5503                             PL_last_lop = PL_last_uni = NULL;
5504                             PL_preambled = FALSE;
5505                             if (PERLDB_LINE || PERLDB_SAVESRC)
5506                                 (void)gv_fetchfile(PL_origfilename);
5507                             goto retry;
5508                         }
5509                     }
5510                 }
5511             }
5512         }
5513         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5514             PL_lex_state = LEX_FORMLINE;
5515             start_force(PL_curforce);
5516             NEXTVAL_NEXTTOKE.ival = 0;
5517             force_next(FORMRBRACK);
5518             TOKEN(';');
5519         }
5520         goto retry;
5521     case '\r':
5522 #ifdef PERL_STRICT_CR
5523         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5524         Perl_croak(aTHX_
5525       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5526 #endif
5527     case ' ': case '\t': case '\f': case 013:
5528 #ifdef PERL_MAD
5529         PL_realtokenstart = -1;
5530         if (PL_madskills) {
5531           if (!PL_thiswhite)
5532             PL_thiswhite = newSVpvs("");
5533           sv_catpvn(PL_thiswhite, s, 1);
5534         }
5535 #endif
5536         s++;
5537         goto retry;
5538     case '#':
5539     case '\n':
5540 #ifdef PERL_MAD
5541         PL_realtokenstart = -1;
5542         if (PL_madskills)
5543             PL_faketokens = 0;
5544 #endif
5545         if (PL_lex_state != LEX_NORMAL ||
5546              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5547             if (*s == '#' && s == PL_linestart && PL_in_eval
5548              && !PL_rsfp && !PL_parser->filtered) {
5549                 /* handle eval qq[#line 1 "foo"\n ...] */
5550                 CopLINE_dec(PL_curcop);
5551                 incline(s);
5552             }
5553             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5554                 s = SKIPSPACE0(s);
5555                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5556                     incline(s);
5557             }
5558             else {
5559                 const bool in_comment = *s == '#';
5560                 d = s;
5561                 while (d < PL_bufend && *d != '\n')
5562                     d++;
5563                 if (d < PL_bufend)
5564                     d++;
5565                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5566                     Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5567                                d, PL_bufend);
5568 #ifdef PERL_MAD
5569                 if (PL_madskills)
5570                     PL_thiswhite = newSVpvn(s, d - s);
5571 #endif
5572                 s = d;
5573                 if (in_comment && d == PL_bufend
5574                  && PL_lex_state == LEX_INTERPNORMAL
5575                  && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5576                  && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5577                 else incline(s);
5578             }
5579             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5580                 PL_lex_state = LEX_FORMLINE;
5581                 start_force(PL_curforce);
5582                 NEXTVAL_NEXTTOKE.ival = 0;
5583                 force_next(FORMRBRACK);
5584                 TOKEN(';');
5585             }
5586         }
5587         else {
5588 #ifdef PERL_MAD
5589             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5590                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5591                     PL_faketokens = 0;
5592                     s = SKIPSPACE0(s);
5593                     TOKEN(PEG); /* make sure any #! line is accessible */
5594                 }
5595                 s = SKIPSPACE0(s);
5596             }
5597             else {
5598 /*              if (PL_madskills && PL_lex_formbrack) { */
5599                     d = s;
5600                     while (d < PL_bufend && *d != '\n')
5601                         d++;
5602                     if (d < PL_bufend)
5603                         d++;
5604                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5605                       Perl_croak(aTHX_ "panic: input overflow");
5606                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5607                         if (!PL_thiswhite)
5608                             PL_thiswhite = newSVpvs("");
5609                         if (CopLINE(PL_curcop) == 1) {
5610                             sv_setpvs(PL_thiswhite, "");
5611                             PL_faketokens = 0;
5612                         }
5613                         sv_catpvn(PL_thiswhite, s, d - s);
5614                     }
5615                     s = d;
5616 /*              }
5617                 *s = '\0';
5618                 PL_bufend = s; */
5619             }
5620 #else
5621             while (s < PL_bufend && *s != '\n')
5622                 s++;
5623             if (s < PL_bufend)
5624                 s++;
5625             else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5626               Perl_croak(aTHX_ "panic: input overflow");
5627 #endif
5628         }
5629         goto retry;
5630     case '-':
5631         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5632             I32 ftst = 0;
5633             char tmp;
5634
5635             s++;
5636             PL_bufptr = s;
5637             tmp = *s++;
5638
5639             while (s < PL_bufend && SPACE_OR_TAB(*s))
5640                 s++;
5641
5642             if (strnEQ(s,"=>",2)) {
5643                 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5644                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5645                 OPERATOR('-');          /* unary minus */
5646             }
5647             PL_last_uni = PL_oldbufptr;
5648             switch (tmp) {
5649             case 'r': ftst = OP_FTEREAD;        break;
5650             case 'w': ftst = OP_FTEWRITE;       break;
5651             case 'x': ftst = OP_FTEEXEC;        break;
5652             case 'o': ftst = OP_FTEOWNED;       break;
5653             case 'R': ftst = OP_FTRREAD;        break;
5654             case 'W': ftst = OP_FTRWRITE;       break;
5655             case 'X': ftst = OP_FTREXEC;        break;
5656             case 'O': ftst = OP_FTROWNED;       break;
5657             case 'e': ftst = OP_FTIS;           break;
5658             case 'z': ftst = OP_FTZERO;         break;
5659             case 's': ftst = OP_FTSIZE;         break;
5660             case 'f': ftst = OP_FTFILE;         break;
5661             case 'd': ftst = OP_FTDIR;          break;
5662             case 'l': ftst = OP_FTLINK;         break;
5663             case 'p': ftst = OP_FTPIPE;         break;
5664             case 'S': ftst = OP_FTSOCK;         break;
5665             case 'u': ftst = OP_FTSUID;         break;
5666             case 'g': ftst = OP_FTSGID;         break;
5667             case 'k': ftst = OP_FTSVTX;         break;
5668             case 'b': ftst = OP_FTBLK;          break;
5669             case 'c': ftst = OP_FTCHR;          break;
5670             case 't': ftst = OP_FTTTY;          break;
5671             case 'T': ftst = OP_FTTEXT;         break;
5672             case 'B': ftst = OP_FTBINARY;       break;
5673             case 'M': case 'A': case 'C':
5674                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5675                 switch (tmp) {
5676                 case 'M': ftst = OP_FTMTIME;    break;
5677                 case 'A': ftst = OP_FTATIME;    break;
5678                 case 'C': ftst = OP_FTCTIME;    break;
5679                 default:                        break;
5680                 }
5681                 break;
5682             default:
5683                 break;
5684             }
5685             if (ftst) {
5686                 PL_last_lop_op = (OPCODE)ftst;
5687                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5688                         "### Saw file test %c\n", (int)tmp);
5689                 } );
5690                 FTST(ftst);
5691             }
5692             else {
5693                 /* Assume it was a minus followed by a one-letter named
5694                  * subroutine call (or a -bareword), then. */
5695                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5696                         "### '-%c' looked like a file test but was not\n",
5697                         (int) tmp);
5698                 } );
5699                 s = --PL_bufptr;
5700             }
5701         }
5702         {
5703             const char tmp = *s++;
5704             if (*s == tmp) {
5705                 s++;
5706                 if (PL_expect == XOPERATOR)
5707                     TERM(POSTDEC);
5708                 else
5709                     OPERATOR(PREDEC);
5710             }
5711             else if (*s == '>') {
5712                 s++;
5713                 s = SKIPSPACE1(s);
5714                 if (isIDFIRST_lazy_if(s,UTF)) {
5715                     s = force_word(s,METHOD,FALSE,TRUE);
5716                     TOKEN(ARROW);
5717                 }
5718                 else if (*s == '$')
5719                     OPERATOR(ARROW);
5720                 else
5721                     TERM(ARROW);
5722             }
5723             if (PL_expect == XOPERATOR) {
5724                 if (*s == '=' && !PL_lex_allbrackets &&
5725                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5726                     s--;
5727                     TOKEN(0);
5728                 }
5729                 Aop(OP_SUBTRACT);
5730             }
5731             else {
5732                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5733                     check_uni();
5734                 OPERATOR('-');          /* unary minus */
5735             }
5736         }
5737
5738     case '+':
5739         {
5740             const char tmp = *s++;
5741             if (*s == tmp) {
5742                 s++;
5743                 if (PL_expect == XOPERATOR)
5744                     TERM(POSTINC);
5745                 else
5746                     OPERATOR(PREINC);
5747             }
5748             if (PL_expect == XOPERATOR) {
5749                 if (*s == '=' && !PL_lex_allbrackets &&
5750                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5751                     s--;
5752                     TOKEN(0);
5753                 }
5754                 Aop(OP_ADD);
5755             }
5756             else {
5757                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5758                     check_uni();
5759                 OPERATOR('+');
5760             }
5761         }
5762
5763     case '*':
5764         if (PL_expect != XOPERATOR) {
5765             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5766             PL_expect = XOPERATOR;
5767             force_ident(PL_tokenbuf, '*');
5768             if (!*PL_tokenbuf)
5769                 PREREF('*');
5770             TERM('*');
5771         }
5772         s++;
5773         if (*s == '*') {
5774             s++;
5775             if (*s == '=' && !PL_lex_allbrackets &&
5776                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5777                 s -= 2;
5778                 TOKEN(0);
5779             }
5780             PWop(OP_POW);
5781         }
5782         if (*s == '=' && !PL_lex_allbrackets &&
5783                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5784             s--;
5785             TOKEN(0);
5786         }
5787         PL_parser->saw_infix_sigil = 1;
5788         Mop(OP_MULTIPLY);
5789
5790     case '%':
5791         if (PL_expect == XOPERATOR) {
5792             if (s[1] == '=' && !PL_lex_allbrackets &&
5793                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5794                 TOKEN(0);
5795             ++s;
5796             PL_parser->saw_infix_sigil = 1;
5797             Mop(OP_MODULO);
5798         }
5799         PL_tokenbuf[0] = '%';
5800         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5801                 sizeof PL_tokenbuf - 1, FALSE);
5802         if (!PL_tokenbuf[1]) {
5803             PREREF('%');
5804         }
5805         PL_expect = XOPERATOR;
5806         force_ident_maybe_lex('%');
5807         TERM('%');
5808
5809     case '^':
5810         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5811                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5812             TOKEN(0);
5813         s++;
5814         BOop(OP_BIT_XOR);
5815     case '[':
5816         if (PL_lex_brackets > 100)
5817             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5818         PL_lex_brackstack[PL_lex_brackets++] = 0;
5819         PL_lex_allbrackets++;
5820         {
5821             const char tmp = *s++;
5822             OPERATOR(tmp);
5823         }
5824     case '~':
5825         if (s[1] == '~'
5826             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5827         {
5828             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5829                 TOKEN(0);
5830             s += 2;
5831             Perl_ck_warner_d(aTHX_
5832                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5833                 "Smartmatch is experimental");
5834             Eop(OP_SMARTMATCH);
5835         }
5836         s++;
5837         OPERATOR('~');
5838     case ',':
5839         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5840             TOKEN(0);
5841         s++;
5842         OPERATOR(',');
5843     case ':':
5844         if (s[1] == ':') {
5845             len = 0;
5846             goto just_a_word_zero_gv;
5847         }
5848         s++;
5849         switch (PL_expect) {
5850             OP *attrs;
5851 #ifdef PERL_MAD
5852             I32 stuffstart;
5853 #endif
5854         case XOPERATOR:
5855             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5856                 break;
5857             PL_bufptr = s;      /* update in case we back off */
5858             if (*s == '=') {
5859                 Perl_croak(aTHX_
5860                            "Use of := for an empty attribute list is not allowed");
5861             }
5862             goto grabattrs;
5863         case XATTRBLOCK:
5864             PL_expect = XBLOCK;
5865             goto grabattrs;
5866         case XATTRTERM:
5867             PL_expect = XTERMBLOCK;
5868          grabattrs:
5869 #ifdef PERL_MAD
5870             stuffstart = s - SvPVX(PL_linestr) - 1;
5871 #endif
5872             s = PEEKSPACE(s);
5873             attrs = NULL;
5874             while (isIDFIRST_lazy_if(s,UTF)) {
5875                 I32 tmp;
5876                 SV *sv;
5877                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5878                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5879                     if (tmp < 0) tmp = -tmp;
5880                     switch (tmp) {
5881                     case KEY_or:
5882                     case KEY_and:
5883                     case KEY_for:
5884                     case KEY_foreach:
5885                     case KEY_unless:
5886                     case KEY_if:
5887                     case KEY_while:
5888                     case KEY_until:
5889                         goto got_attrs;
5890                     default:
5891                         break;
5892                     }
5893                 }
5894                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5895                 if (*d == '(') {
5896                     d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
5897                     if (!d) {
5898                         /* MUST advance bufptr here to avoid bogus
5899                            "at end of line" context messages from yyerror().
5900                          */
5901                         PL_bufptr = s + len;
5902                         yyerror("Unterminated attribute parameter in attribute list");
5903                         if (attrs)
5904                             op_free(attrs);
5905                         sv_free(sv);
5906                         return REPORT(0);       /* EOF indicator */
5907                     }
5908                 }
5909                 if (PL_lex_stuff) {
5910                     sv_catsv(sv, PL_lex_stuff);
5911                     attrs = op_append_elem(OP_LIST, attrs,
5912                                         newSVOP(OP_CONST, 0, sv));
5913                     SvREFCNT_dec(PL_lex_stuff);
5914                     PL_lex_stuff = NULL;
5915                 }
5916                 else {
5917                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5918                         sv_free(sv);
5919                         if (PL_in_my == KEY_our) {
5920                             deprecate(":unique");
5921                         }
5922                         else
5923                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5924                     }
5925
5926                     /* NOTE: any CV attrs applied here need to be part of
5927                        the CVf_BUILTIN_ATTRS define in cv.h! */
5928                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5929                         sv_free(sv);
5930                         CvLVALUE_on(PL_compcv);
5931                     }
5932                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5933                         sv_free(sv);
5934                         deprecate(":locked");
5935                     }
5936                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5937                         sv_free(sv);
5938                         CvMETHOD_on(PL_compcv);
5939                     }
5940                     /* After we've set the flags, it could be argued that
5941                        we don't need to do the attributes.pm-based setting
5942                        process, and shouldn't bother appending recognized
5943                        flags.  To experiment with that, uncomment the
5944                        following "else".  (Note that's already been
5945                        uncommented.  That keeps the above-applied built-in
5946                        attributes from being intercepted (and possibly
5947                        rejected) by a package's attribute routines, but is
5948                        justified by the performance win for the common case
5949                        of applying only built-in attributes.) */
5950                     else
5951                         attrs = op_append_elem(OP_LIST, attrs,
5952                                             newSVOP(OP_CONST, 0,
5953                                                     sv));
5954                 }
5955                 s = PEEKSPACE(d);
5956                 if (*s == ':' && s[1] != ':')
5957                     s = PEEKSPACE(s+1);
5958                 else if (s == d)
5959                     break;      /* require real whitespace or :'s */
5960                 /* XXX losing whitespace on sequential attributes here */
5961             }
5962             {
5963                 const char tmp
5964                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5965                 if (*s != ';' && *s != '}' && *s != tmp
5966                     && (tmp != '=' || *s != ')')) {
5967                     const char q = ((*s == '\'') ? '"' : '\'');
5968                     /* If here for an expression, and parsed no attrs, back
5969                        off. */
5970                     if (tmp == '=' && !attrs) {
5971                         s = PL_bufptr;
5972                         break;
5973                     }
5974                     /* MUST advance bufptr here to avoid bogus "at end of line"
5975                        context messages from yyerror().
5976                     */
5977                     PL_bufptr = s;
5978                     yyerror( (const char *)
5979                              (*s
5980                               ? Perl_form(aTHX_ "Invalid separator character "
5981                                           "%c%c%c in attribute list", q, *s, q)
5982                               : "Unterminated attribute list" ) );
5983                     if (attrs)
5984                         op_free(attrs);
5985                     OPERATOR(':');
5986                 }
5987             }
5988         got_attrs:
5989             if (attrs) {
5990                 start_force(PL_curforce);
5991                 NEXTVAL_NEXTTOKE.opval = attrs;
5992                 CURMAD('_', PL_nextwhite);
5993                 force_next(THING);
5994             }
5995 #ifdef PERL_MAD
5996             if (PL_madskills) {
5997                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5998                                      (s - SvPVX(PL_linestr)) - stuffstart);
5999             }
6000 #endif
6001             TOKEN(COLONATTR);
6002         }
6003         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6004             s--;
6005             TOKEN(0);
6006         }
6007         PL_lex_allbrackets--;
6008         OPERATOR(':');
6009     case '(':
6010         s++;
6011         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6012             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
6013         else
6014             PL_expect = XTERM;
6015         s = SKIPSPACE1(s);
6016         PL_lex_allbrackets++;
6017         TOKEN('(');
6018     case ';':
6019         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6020             TOKEN(0);
6021         CLINE;
6022         s++;
6023         OPERATOR(';');
6024     case ')':
6025         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6026             TOKEN(0);
6027         s++;
6028         PL_lex_allbrackets--;
6029         s = SKIPSPACE1(s);
6030         if (*s == '{')
6031             PREBLOCK(')');
6032         TERM(')');
6033     case ']':
6034         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6035             TOKEN(0);
6036         s++;
6037         if (PL_lex_brackets <= 0)
6038             yyerror("Unmatched right square bracket");
6039         else
6040             --PL_lex_brackets;
6041         PL_lex_allbrackets--;
6042         if (PL_lex_state == LEX_INTERPNORMAL) {
6043             if (PL_lex_brackets == 0) {
6044                 if (*s == '-' && s[1] == '>')
6045                     PL_lex_state = LEX_INTERPENDMAYBE;
6046                 else if (*s != '[' && *s != '{')
6047                     PL_lex_state = LEX_INTERPEND;
6048             }
6049         }
6050         TERM(']');
6051     case '{':
6052         s++;
6053       leftbracket:
6054         if (PL_lex_brackets > 100) {
6055             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6056         }
6057         switch (PL_expect) {
6058         case XTERM:
6059             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6060             PL_lex_allbrackets++;
6061             OPERATOR(HASHBRACK);
6062         case XOPERATOR:
6063             while (s < PL_bufend && SPACE_OR_TAB(*s))
6064                 s++;
6065             d = s;
6066             PL_tokenbuf[0] = '\0';
6067             if (d < PL_bufend && *d == '-') {
6068                 PL_tokenbuf[0] = '-';
6069                 d++;
6070                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6071                     d++;
6072             }
6073             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6074                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6075                               FALSE, &len);
6076                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6077                     d++;
6078                 if (*d == '}') {
6079                     const char minus = (PL_tokenbuf[0] == '-');
6080                     s = force_word(s + minus, WORD, FALSE, TRUE);
6081                     if (minus)
6082                         force_next('-');
6083                 }
6084             }
6085             /* FALL THROUGH */
6086         case XATTRBLOCK:
6087         case XBLOCK:
6088             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6089             PL_lex_allbrackets++;
6090             PL_expect = XSTATE;
6091             break;
6092         case XATTRTERM:
6093         case XTERMBLOCK:
6094             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6095             PL_lex_allbrackets++;
6096             PL_expect = XSTATE;
6097             break;
6098         default: {
6099                 const char *t;
6100                 if (PL_oldoldbufptr == PL_last_lop)
6101                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6102                 else
6103                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6104                 PL_lex_allbrackets++;
6105                 s = SKIPSPACE1(s);
6106                 if (*s == '}') {
6107                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6108                         PL_expect = XTERM;
6109                         /* This hack is to get the ${} in the message. */
6110                         PL_bufptr = s+1;
6111                         yyerror("syntax error");
6112                         break;
6113                     }
6114                     OPERATOR(HASHBRACK);
6115                 }
6116                 /* This hack serves to disambiguate a pair of curlies
6117                  * as being a block or an anon hash.  Normally, expectation
6118                  * determines that, but in cases where we're not in a
6119                  * position to expect anything in particular (like inside
6120                  * eval"") we have to resolve the ambiguity.  This code
6121                  * covers the case where the first term in the curlies is a
6122                  * quoted string.  Most other cases need to be explicitly
6123                  * disambiguated by prepending a "+" before the opening
6124                  * curly in order to force resolution as an anon hash.
6125                  *
6126                  * XXX should probably propagate the outer expectation
6127                  * into eval"" to rely less on this hack, but that could
6128                  * potentially break current behavior of eval"".
6129                  * GSAR 97-07-21
6130                  */
6131                 t = s;
6132                 if (*s == '\'' || *s == '"' || *s == '`') {
6133                     /* common case: get past first string, handling escapes */
6134                     for (t++; t < PL_bufend && *t != *s;)
6135                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
6136                             t++;
6137                     t++;
6138                 }
6139                 else if (*s == 'q') {
6140                     if (++t < PL_bufend
6141                         && (!isWORDCHAR(*t)
6142                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6143                                 && !isWORDCHAR(*t))))
6144                     {
6145                         /* skip q//-like construct */
6146                         const char *tmps;
6147                         char open, close, term;
6148                         I32 brackets = 1;
6149
6150                         while (t < PL_bufend && isSPACE(*t))
6151                             t++;
6152                         /* check for q => */
6153                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6154                             OPERATOR(HASHBRACK);
6155                         }
6156                         term = *t;
6157                         open = term;
6158                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6159                             term = tmps[5];
6160                         close = term;
6161                         if (open == close)
6162                             for (t++; t < PL_bufend; t++) {
6163                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6164                                     t++;
6165                                 else if (*t == open)
6166                                     break;
6167                             }
6168                         else {
6169                             for (t++; t < PL_bufend; t++) {
6170                                 if (*t == '\\' && t+1 < PL_bufend)
6171                                     t++;
6172                                 else if (*t == close && --brackets <= 0)
6173                                     break;
6174                                 else if (*t == open)
6175                                     brackets++;
6176                             }
6177                         }
6178                         t++;
6179                     }
6180                     else
6181                         /* skip plain q word */
6182                         while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6183                              t += UTF8SKIP(t);
6184                 }
6185                 else if (isWORDCHAR_lazy_if(t,UTF)) {
6186                     t += UTF8SKIP(t);
6187                     while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6188                          t += UTF8SKIP(t);
6189                 }
6190                 while (t < PL_bufend && isSPACE(*t))
6191                     t++;
6192                 /* if comma follows first term, call it an anon hash */
6193                 /* XXX it could be a comma expression with loop modifiers */
6194                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6195                                    || (*t == '=' && t[1] == '>')))
6196                     OPERATOR(HASHBRACK);
6197                 if (PL_expect == XREF)
6198                     PL_expect = XTERM;
6199                 else {
6200                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6201                     PL_expect = XSTATE;
6202                 }
6203             }
6204             break;
6205         }
6206         pl_yylval.ival = CopLINE(PL_curcop);
6207         if (isSPACE(*s) || *s == '#')
6208             PL_copline = NOLINE;   /* invalidate current command line number */
6209         TOKEN(formbrack ? '=' : '{');
6210     case '}':
6211         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6212             TOKEN(0);
6213       rightbracket:
6214         s++;
6215         if (PL_lex_brackets <= 0)
6216             yyerror("Unmatched right curly bracket");
6217         else
6218             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6219         PL_lex_allbrackets--;
6220         if (PL_lex_state == LEX_INTERPNORMAL) {
6221             if (PL_lex_brackets == 0) {
6222                 if (PL_expect & XFAKEBRACK) {
6223                     PL_expect &= XENUMMASK;
6224                     PL_lex_state = LEX_INTERPEND;
6225                     PL_bufptr = s;
6226 #if 0
6227                     if (PL_madskills) {
6228                         if (!PL_thiswhite)
6229                             PL_thiswhite = newSVpvs("");
6230                         sv_catpvs(PL_thiswhite,"}");
6231                     }
6232 #endif
6233                     return yylex();     /* ignore fake brackets */
6234                 }
6235                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6236                  && SvEVALED(PL_lex_repl))
6237                     PL_lex_state = LEX_INTERPEND;
6238                 else if (*s == '-' && s[1] == '>')
6239                     PL_lex_state = LEX_INTERPENDMAYBE;
6240                 else if (*s != '[' && *s != '{')
6241                     PL_lex_state = LEX_INTERPEND;
6242             }
6243         }
6244         if (PL_expect & XFAKEBRACK) {
6245             PL_expect &= XENUMMASK;
6246             PL_bufptr = s;
6247             return yylex();             /* ignore fake brackets */
6248         }
6249         start_force(PL_curforce);
6250         if (PL_madskills) {
6251             curmad('X', newSVpvn(s-1,1));
6252             CURMAD('_', PL_thiswhite);
6253         }
6254         force_next(formbrack ? '.' : '}');
6255         if (formbrack) LEAVE;
6256 #ifdef PERL_MAD
6257         if (PL_madskills && !PL_thistoken)
6258             PL_thistoken = newSVpvs("");
6259 #endif
6260         if (formbrack == 2) { /* means . where arguments were expected */
6261             start_force(PL_curforce);
6262             force_next(';');
6263             TOKEN(FORMRBRACK);
6264         }
6265         TOKEN(';');
6266     case '&':
6267         s++;
6268         if (*s++ == '&') {
6269             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6270                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6271                 s -= 2;
6272                 TOKEN(0);
6273             }
6274             AOPERATOR(ANDAND);
6275         }
6276         s--;
6277         if (PL_expect == XOPERATOR) {
6278             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6279                 && isIDFIRST_lazy_if(s,UTF))
6280             {
6281                 CopLINE_dec(PL_curcop);
6282                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6283                 CopLINE_inc(PL_curcop);
6284             }
6285             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6286                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6287                 s--;
6288                 TOKEN(0);
6289             }
6290             PL_parser->saw_infix_sigil = 1;
6291             BAop(OP_BIT_AND);
6292         }
6293
6294         PL_tokenbuf[0] = '&';
6295         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6296                        sizeof PL_tokenbuf - 1, TRUE);
6297         if (PL_tokenbuf[1]) {
6298             PL_expect = XOPERATOR;
6299             force_ident_maybe_lex('&');
6300         }
6301         else
6302             PREREF('&');
6303         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6304         TERM('&');
6305
6306     case '|':
6307         s++;
6308         if (*s++ == '|') {
6309             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6310                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6311                 s -= 2;
6312                 TOKEN(0);
6313             }
6314             AOPERATOR(OROR);
6315         }
6316         s--;
6317         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6318                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6319             s--;
6320             TOKEN(0);
6321         }
6322         BOop(OP_BIT_OR);
6323     case '=':
6324         s++;
6325         {
6326             const char tmp = *s++;
6327             if (tmp == '=') {
6328                 if (!PL_lex_allbrackets &&
6329                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6330                     s -= 2;
6331                     TOKEN(0);
6332                 }
6333                 Eop(OP_EQ);
6334             }
6335             if (tmp == '>') {
6336                 if (!PL_lex_allbrackets &&
6337                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6338                     s -= 2;
6339                     TOKEN(0);
6340                 }
6341                 OPERATOR(',');
6342             }
6343             if (tmp == '~')
6344                 PMop(OP_MATCH);
6345             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6346                 && strchr("+-*/%.^&|<",tmp))
6347                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6348                             "Reversed %c= operator",(int)tmp);
6349             s--;
6350             if (PL_expect == XSTATE && isALPHA(tmp) &&
6351                 (s == PL_linestart+1 || s[-2] == '\n') )
6352                 {
6353                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6354                         || PL_lex_state != LEX_NORMAL) {
6355                         d = PL_bufend;
6356                         while (s < d) {
6357                             if (*s++ == '\n') {
6358                                 incline(s);
6359                                 if (strnEQ(s,"=cut",4)) {
6360                                     s = strchr(s,'\n');
6361                                     if (s)
6362                                         s++;
6363                                     else
6364                                         s = d;
6365                                     incline(s);
6366                                     goto retry;
6367                                 }
6368                             }
6369                         }
6370                         goto retry;
6371                     }
6372 #ifdef PERL_MAD
6373                     if (PL_madskills) {
6374                         if (!PL_thiswhite)
6375                             PL_thiswhite = newSVpvs("");
6376                         sv_catpvn(PL_thiswhite, PL_linestart,
6377                                   PL_bufend - PL_linestart);
6378                     }
6379 #endif
6380                     s = PL_bufend;
6381                     PL_parser->in_pod = 1;
6382                     goto retry;
6383                 }
6384         }
6385         if (PL_expect == XBLOCK) {
6386             const char *t = s;
6387 #ifdef PERL_STRICT_CR
6388             while (SPACE_OR_TAB(*t))
6389 #else
6390             while (SPACE_OR_TAB(*t) || *t == '\r')
6391 #endif
6392                 t++;
6393             if (*t == '\n' || *t == '#') {
6394                 formbrack = 1;
6395                 ENTER;
6396                 SAVEI8(PL_parser->form_lex_state);
6397                 SAVEI32(PL_lex_formbrack);
6398                 PL_parser->form_lex_state = PL_lex_state;
6399                 PL_lex_formbrack = PL_lex_brackets + 1;
6400                 goto leftbracket;
6401             }
6402         }
6403         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6404             s--;
6405             TOKEN(0);
6406         }
6407         pl_yylval.ival = 0;
6408         OPERATOR(ASSIGNOP);
6409     case '!':
6410         s++;
6411         {
6412             const char tmp = *s++;
6413             if (tmp == '=') {
6414                 /* was this !=~ where !~ was meant?
6415                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6416
6417                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6418                     const char *t = s+1;
6419
6420                     while (t < PL_bufend && isSPACE(*t))
6421                         ++t;
6422
6423                     if (*t == '/' || *t == '?' ||
6424                         ((*t == 'm' || *t == 's' || *t == 'y')
6425                          && !isWORDCHAR(t[1])) ||
6426                         (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6427                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6428                                     "!=~ should be !~");
6429                 }
6430                 if (!PL_lex_allbrackets &&
6431                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6432                     s -= 2;
6433                     TOKEN(0);
6434                 }
6435                 Eop(OP_NE);
6436             }
6437             if (tmp == '~')
6438                 PMop(OP_NOT);
6439         }
6440         s--;
6441         OPERATOR('!');
6442     case '<':
6443         if (PL_expect != XOPERATOR) {
6444             if (s[1] != '<' && !strchr(s,'>'))
6445                 check_uni();
6446             if (s[1] == '<')
6447                 s = scan_heredoc(s);
6448             else
6449                 s = scan_inputsymbol(s);
6450             PL_expect = XOPERATOR;
6451             TOKEN(sublex_start());
6452         }
6453         s++;
6454         {
6455             char tmp = *s++;
6456             if (tmp == '<') {
6457                 if (*s == '=' && !PL_lex_allbrackets &&
6458                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6459                     s -= 2;
6460                     TOKEN(0);
6461                 }
6462                 SHop(OP_LEFT_SHIFT);
6463             }
6464             if (tmp == '=') {
6465                 tmp = *s++;
6466                 if (tmp == '>') {
6467                     if (!PL_lex_allbrackets &&
6468                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6469                         s -= 3;
6470                         TOKEN(0);
6471                     }
6472                     Eop(OP_NCMP);
6473                 }
6474                 s--;
6475                 if (!PL_lex_allbrackets &&
6476                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6477                     s -= 2;
6478                     TOKEN(0);
6479                 }
6480                 Rop(OP_LE);
6481             }
6482         }
6483         s--;
6484         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6485             s--;
6486             TOKEN(0);
6487         }
6488         Rop(OP_LT);
6489     case '>':
6490         s++;
6491         {
6492             const char tmp = *s++;
6493             if (tmp == '>') {
6494                 if (*s == '=' && !PL_lex_allbrackets &&
6495                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6496                     s -= 2;
6497                     TOKEN(0);
6498                 }
6499                 SHop(OP_RIGHT_SHIFT);
6500             }
6501             else if (tmp == '=') {
6502                 if (!PL_lex_allbrackets &&
6503                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6504                     s -= 2;
6505                     TOKEN(0);
6506                 }
6507                 Rop(OP_GE);
6508             }
6509         }
6510         s--;
6511         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6512             s--;
6513             TOKEN(0);
6514         }
6515         Rop(OP_GT);
6516
6517     case '$':
6518         CLINE;
6519
6520         if (PL_expect == XOPERATOR) {
6521             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6522                 return deprecate_commaless_var_list();
6523             }
6524         }
6525
6526         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6527             PL_tokenbuf[0] = '@';
6528             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6529                            sizeof PL_tokenbuf - 1, FALSE);
6530             if (PL_expect == XOPERATOR)
6531                 no_op("Array length", s);
6532             if (!PL_tokenbuf[1])
6533                 PREREF(DOLSHARP);
6534             PL_expect = XOPERATOR;
6535             force_ident_maybe_lex('#');
6536             TOKEN(DOLSHARP);
6537         }
6538
6539         PL_tokenbuf[0] = '$';
6540         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6541                        sizeof PL_tokenbuf - 1, FALSE);
6542         if (PL_expect == XOPERATOR)
6543             no_op("Scalar", s);
6544         if (!PL_tokenbuf[1]) {
6545             if (s == PL_bufend)
6546                 yyerror("Final $ should be \\$ or $name");
6547             PREREF('$');
6548         }
6549
6550         d = s;
6551         {
6552             const char tmp = *s;
6553             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6554                 s = SKIPSPACE1(s);
6555
6556             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6557                 && intuit_more(s)) {
6558                 if (*s == '[') {
6559                     PL_tokenbuf[0] = '@';
6560                     if (ckWARN(WARN_SYNTAX)) {
6561                         char *t = s+1;
6562
6563                         while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6564                             t++;
6565                         if (*t++ == ',') {
6566                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6567                             while (t < PL_bufend && *t != ']')
6568                                 t++;
6569                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6570                                         "Multidimensional syntax %.*s not supported",
6571                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6572                         }
6573                     }
6574                 }
6575                 else if (*s == '{') {
6576                     char *t;
6577                     PL_tokenbuf[0] = '%';
6578                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6579                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6580                         {
6581                             char tmpbuf[sizeof PL_tokenbuf];
6582                             do {
6583                                 t++;
6584                             } while (isSPACE(*t));
6585                             if (isIDFIRST_lazy_if(t,UTF)) {
6586                                 STRLEN len;
6587                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6588                                               &len);
6589                                 while (isSPACE(*t))
6590                                     t++;
6591                                 if (*t == ';'
6592                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6593                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6594                                         "You need to quote \"%"UTF8f"\"",
6595                                          UTF8fARG(UTF, len, tmpbuf));
6596                             }
6597                         }
6598                 }
6599             }
6600
6601             PL_expect = XOPERATOR;
6602             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6603                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6604                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6605                     PL_expect = XOPERATOR;
6606                 else if (strchr("$@\"'`q", *s))
6607                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6608                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6609                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6610                 else if (isIDFIRST_lazy_if(s,UTF)) {
6611                     char tmpbuf[sizeof PL_tokenbuf];
6612                     int t2;
6613                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6614                     if ((t2 = keyword(tmpbuf, len, 0))) {
6615                         /* binary operators exclude handle interpretations */
6616                         switch (t2) {
6617                         case -KEY_x:
6618                         case -KEY_eq:
6619                         case -KEY_ne:
6620                         case -KEY_gt:
6621                         case -KEY_lt:
6622                         case -KEY_ge:
6623                         case -KEY_le:
6624                         case -KEY_cmp:
6625                             break;
6626                         default:
6627                             PL_expect = XTERM;  /* e.g. print $fh length() */
6628                             break;
6629                         }
6630                     }
6631                     else {
6632                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6633                     }
6634                 }
6635                 else if (isDIGIT(*s))
6636                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6637                 else if (*s == '.' && isDIGIT(s[1]))
6638                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6639                 else if ((*s == '?' || *s == '-' || *s == '+')
6640                          && !isSPACE(s[1]) && s[1] != '=')
6641                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6642                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6643                          && s[1] != '/')
6644                     PL_expect = XTERM;          /* e.g. print $fh /.../
6645                                                    XXX except DORDOR operator
6646                                                 */
6647                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6648                          && s[2] != '=')
6649                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6650             }
6651         }
6652         force_ident_maybe_lex('$');
6653         TOKEN('$');
6654
6655     case '@':
6656         if (PL_expect == XOPERATOR)
6657             no_op("Array", s);
6658         PL_tokenbuf[0] = '@';
6659         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6660         if (!PL_tokenbuf[1]) {
6661             PREREF('@');
6662         }
6663         if (PL_lex_state == LEX_NORMAL)
6664             s = SKIPSPACE1(s);
6665         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6666             if (*s == '{')
6667                 PL_tokenbuf[0] = '%';
6668
6669             /* Warn about @ where they meant $. */
6670             if (*s == '[' || *s == '{') {
6671                 if (ckWARN(WARN_SYNTAX)) {
6672                     const char *t = s + 1;
6673                     while (*t && (isWORDCHAR_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6674                         t += UTF ? UTF8SKIP(t) : 1;
6675                     if (*t == '}' || *t == ']') {
6676                         t++;
6677                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6678        /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6679                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6680                          "Scalar value %"UTF8f" better written as $%"UTF8f,
6681                           UTF8fARG(UTF, t-PL_bufptr, PL_bufptr),
6682                           UTF8fARG(UTF, t-PL_bufptr-1, PL_bufptr+1));
6683                     }
6684                 }
6685             }
6686         }
6687         PL_expect = XOPERATOR;
6688         force_ident_maybe_lex('@');
6689         TERM('@');
6690
6691      case '/':                  /* may be division, defined-or, or pattern */
6692         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6693             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6694                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6695                 TOKEN(0);
6696             s += 2;
6697             AOPERATOR(DORDOR);
6698         }
6699      case '?':                  /* may either be conditional or pattern */
6700         if (PL_expect == XOPERATOR) {
6701              char tmp = *s++;
6702              if(tmp == '?') {
6703                 if (!PL_lex_allbrackets &&
6704                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6705                     s--;
6706                     TOKEN(0);
6707                 }
6708                 PL_lex_allbrackets++;
6709                 OPERATOR('?');
6710              }
6711              else {
6712                  tmp = *s++;
6713                  if(tmp == '/') {
6714                      /* A // operator. */
6715                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6716                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6717                                             LEX_FAKEEOF_LOGIC)) {
6718                         s -= 2;
6719                         TOKEN(0);
6720                     }
6721                     AOPERATOR(DORDOR);
6722                  }
6723                  else {
6724                      s--;
6725                      if (*s == '=' && !PL_lex_allbrackets &&
6726                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6727                          s--;
6728                          TOKEN(0);
6729                      }
6730                      Mop(OP_DIVIDE);
6731                  }
6732              }
6733          }
6734          else {
6735              /* Disable warning on "study /blah/" */
6736              if (PL_oldoldbufptr == PL_last_uni
6737               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6738                   || memNE(PL_last_uni, "study", 5)
6739                   || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6740               ))
6741                  check_uni();
6742              if (*s == '?')
6743                  deprecate("?PATTERN? without explicit operator");
6744              s = scan_pat(s,OP_MATCH);
6745              TERM(sublex_start());
6746          }
6747
6748     case '.':
6749         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6750 #ifdef PERL_STRICT_CR
6751             && s[1] == '\n'
6752 #else
6753             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6754 #endif
6755             && (s == PL_linestart || s[-1] == '\n') )
6756         {
6757             PL_expect = XSTATE;
6758             formbrack = 2; /* dot seen where arguments expected */
6759             goto rightbracket;
6760         }
6761         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6762             s += 3;
6763             OPERATOR(YADAYADA);
6764         }
6765         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6766             char tmp = *s++;
6767             if (*s == tmp) {
6768                 if (!PL_lex_allbrackets &&
6769                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6770                     s--;
6771                     TOKEN(0);
6772                 }
6773                 s++;
6774                 if (*s == tmp) {
6775                     s++;
6776                     pl_yylval.ival = OPf_SPECIAL;
6777                 }
6778                 else
6779                     pl_yylval.ival = 0;
6780                 OPERATOR(DOTDOT);
6781             }
6782             if (*s == '=' && !PL_lex_allbrackets &&
6783                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6784                 s--;
6785                 TOKEN(0);
6786             }
6787             Aop(OP_CONCAT);
6788         }
6789         /* FALL THROUGH */
6790     case '0': case '1': case '2': case '3': case '4':
6791     case '5': case '6': case '7': case '8': case '9':
6792         s = scan_num(s, &pl_yylval);
6793         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6794         if (PL_expect == XOPERATOR)
6795             no_op("Number",s);
6796         TERM(THING);
6797
6798     case '\'':
6799         s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6800         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6801         if (PL_expect == XOPERATOR) {
6802             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6803                 return deprecate_commaless_var_list();
6804             }
6805             else
6806                 no_op("String",s);
6807         }
6808         if (!s)
6809             missingterm(NULL);
6810         pl_yylval.ival = OP_CONST;
6811         TERM(sublex_start());
6812
6813     case '"':
6814         s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6815         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6816         if (PL_expect == XOPERATOR) {
6817             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6818                 return deprecate_commaless_var_list();
6819             }
6820             else
6821                 no_op("String",s);
6822         }
6823         if (!s)
6824             missingterm(NULL);
6825         pl_yylval.ival = OP_CONST;
6826         /* FIXME. I think that this can be const if char *d is replaced by
6827            more localised variables.  */
6828         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6829             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6830                 pl_yylval.ival = OP_STRINGIFY;
6831                 break;
6832             }
6833         }
6834         TERM(sublex_start());
6835
6836     case '`':
6837         s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6838         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6839         if (PL_expect == XOPERATOR)
6840             no_op("Backticks",s);
6841         if (!s)
6842             missingterm(NULL);
6843         readpipe_override();
6844         TERM(sublex_start());
6845
6846     case '\\':
6847         s++;
6848         if (PL_lex_inwhat && isDIGIT(*s))
6849             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6850                            *s, *s);
6851         if (PL_expect == XOPERATOR)
6852             no_op("Backslash",s);
6853         OPERATOR(REFGEN);
6854
6855     case 'v':
6856         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6857             char *start = s + 2;
6858             while (isDIGIT(*start) || *start == '_')
6859                 start++;
6860             if (*start == '.' && isDIGIT(start[1])) {
6861                 s = scan_num(s, &pl_yylval);
6862                 TERM(THING);
6863             }
6864             else if ((*start == ':' && start[1] == ':')
6865                   || (PL_expect == XSTATE && *start == ':'))
6866                 goto keylookup;
6867             else if (PL_expect == XSTATE) {
6868                 d = start;
6869                 while (d < PL_bufend && isSPACE(*d)) d++;
6870                 if (*d == ':') goto keylookup;
6871             }
6872             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6873             if (!isALPHA(*start) && (PL_expect == XTERM
6874                         || PL_expect == XREF || PL_expect == XSTATE
6875                         || PL_expect == XTERMORDORDOR)) {
6876                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6877                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6878                 if (!gv) {
6879                     s = scan_num(s, &pl_yylval);
6880                     TERM(THING);
6881                 }
6882             }
6883         }
6884         goto keylookup;
6885     case 'x':
6886         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6887             s++;
6888             Mop(OP_REPEAT);
6889         }
6890         goto keylookup;
6891
6892     case '_':
6893     case 'a': case 'A':
6894     case 'b': case 'B':
6895     case 'c': case 'C':
6896     case 'd': case 'D':
6897     case 'e': case 'E':
6898     case 'f': case 'F':
6899     case 'g': case 'G':
6900     case 'h': case 'H':
6901     case 'i': case 'I':
6902     case 'j': case 'J':
6903     case 'k': case 'K':
6904     case 'l': case 'L':
6905     case 'm': case 'M':
6906     case 'n': case 'N':
6907     case 'o': case 'O':
6908     case 'p': case 'P':
6909     case 'q': case 'Q':
6910     case 'r': case 'R':
6911     case 's': case 'S':
6912     case 't': case 'T':
6913     case 'u': case 'U':
6914               case 'V':
6915     case 'w': case 'W':
6916               case 'X':
6917     case 'y': case 'Y':
6918     case 'z': case 'Z':
6919
6920       keylookup: {
6921         bool anydelim;
6922         bool lex;
6923         I32 tmp;
6924         SV *sv;
6925         CV *cv;
6926         PADOFFSET off;
6927         OP *rv2cv_op;
6928
6929         lex = FALSE;
6930         orig_keyword = 0;
6931         off = 0;
6932         sv = NULL;
6933         cv = NULL;
6934         gv = NULL;
6935         gvp = NULL;
6936         rv2cv_op = NULL;
6937
6938         PL_bufptr = s;
6939         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6940
6941         /* Some keywords can be followed by any delimiter, including ':' */
6942         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6943
6944         /* x::* is just a word, unless x is "CORE" */
6945         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6946             goto just_a_word;
6947
6948         d = s;
6949         while (d < PL_bufend && isSPACE(*d))
6950                 d++;    /* no comments skipped here, or s### is misparsed */
6951
6952         /* Is this a word before a => operator? */
6953         if (*d == '=' && d[1] == '>') {
6954           fat_arrow:
6955             CLINE;
6956             pl_yylval.opval
6957                 = (OP*)newSVOP(OP_CONST, 0,
6958                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6959             pl_yylval.opval->op_private = OPpCONST_BARE;
6960             TERM(WORD);
6961         }
6962
6963         /* Check for plugged-in keyword */
6964         {
6965             OP *o;
6966             int result;
6967             char *saved_bufptr = PL_bufptr;
6968             PL_bufptr = s;
6969             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6970             s = PL_bufptr;
6971             if (result == KEYWORD_PLUGIN_DECLINE) {
6972                 /* not a plugged-in keyword */
6973                 PL_bufptr = saved_bufptr;
6974             } else if (result == KEYWORD_PLUGIN_STMT) {
6975                 pl_yylval.opval = o;
6976                 CLINE;
6977                 PL_expect = XSTATE;
6978                 return REPORT(PLUGSTMT);
6979             } else if (result == KEYWORD_PLUGIN_EXPR) {
6980                 pl_yylval.opval = o;
6981                 CLINE;
6982                 PL_expect = XOPERATOR;
6983                 return REPORT(PLUGEXPR);
6984             } else {
6985                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6986                                         PL_tokenbuf);
6987             }
6988         }
6989
6990         /* Check for built-in keyword */
6991         tmp = keyword(PL_tokenbuf, len, 0);
6992
6993         /* Is this a label? */
6994         if (!anydelim && PL_expect == XSTATE
6995               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6996             s = d + 1;
6997             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6998             pl_yylval.pval[len] = '\0';
6999             pl_yylval.pval[len+1] = UTF ? 1 : 0;
7000             CLINE;
7001             TOKEN(LABEL);
7002         }
7003
7004         /* Check for lexical sub */
7005         if (PL_expect != XOPERATOR) {
7006             char tmpbuf[sizeof PL_tokenbuf + 1];
7007             *tmpbuf = '&';
7008             Copy(PL_tokenbuf, tmpbuf+1, len, char);
7009             off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7010             if (off != NOT_IN_PAD) {
7011                 assert(off); /* we assume this is boolean-true below */
7012                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7013                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
7014                     HEK * const stashname = HvNAME_HEK(stash);
7015                     sv = newSVhek(stashname);
7016                     sv_catpvs(sv, "::");
7017                     sv_catpvn_flags(sv, PL_tokenbuf, len,
7018                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
7019                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7020                                     SVt_PVCV);
7021                     off = 0;
7022                     if (!gv) {
7023                         sv_free(sv);
7024                         sv = NULL;
7025                         goto just_a_word;
7026                     }
7027                 }
7028                 else {
7029                     rv2cv_op = newOP(OP_PADANY, 0);
7030                     rv2cv_op->op_targ = off;
7031                     cv = find_lexical_cv(off);
7032                 }
7033                 lex = TRUE;
7034                 goto just_a_word;
7035             }
7036             off = 0;
7037         }
7038
7039         if (tmp < 0) {                  /* second-class keyword? */
7040             GV *ogv = NULL;     /* override (winner) */
7041             GV *hgv = NULL;     /* hidden (loser) */
7042             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7043                 CV *cv;
7044                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7045                                             UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
7046                     (cv = GvCVu(gv)))
7047                 {
7048                     if (GvIMPORTED_CV(gv))
7049                         ogv = gv;
7050                     else if (! CvMETHOD(cv))
7051                         hgv = gv;
7052                 }
7053                 if (!ogv &&
7054                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7055                                             UTF ? -(I32)len : (I32)len, FALSE)) &&
7056                     (gv = *gvp) && isGV_with_GP(gv) &&
7057                     GvCVu(gv) && GvIMPORTED_CV(gv))
7058                 {
7059                     ogv = gv;
7060                 }
7061             }
7062             if (ogv) {
7063                 orig_keyword = tmp;
7064                 tmp = 0;                /* overridden by import or by GLOBAL */
7065             }
7066             else if (gv && !gvp
7067                      && -tmp==KEY_lock  /* XXX generalizable kludge */
7068                      && GvCVu(gv))
7069             {
7070                 tmp = 0;                /* any sub overrides "weak" keyword */
7071             }
7072             else {                      /* no override */
7073                 tmp = -tmp;
7074                 if (tmp == KEY_dump) {
7075                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7076                                    "dump() better written as CORE::dump()");
7077                 }
7078                 gv = NULL;
7079                 gvp = 0;
7080                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
7081                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7082                                    "Ambiguous call resolved as CORE::%s(), "
7083                                    "qualify as such or use &",
7084                                    GvENAME(hgv));
7085             }
7086         }
7087
7088         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7089          && (!anydelim || *s != '#')) {
7090             /* no override, and not s### either; skipspace is safe here
7091              * check for => on following line */
7092             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7093             STRLEN   soff = s         - SvPVX(PL_linestr);
7094             s = skipspace_flags(s, LEX_NO_INCLINE);
7095             if (*s == '=' && s[1] == '>') goto fat_arrow;
7096             PL_bufptr = SvPVX(PL_linestr) + bufoff;
7097             s         = SvPVX(PL_linestr) +   soff;
7098         }
7099
7100       reserved_word:
7101         switch (tmp) {
7102
7103         default:                        /* not a keyword */
7104             /* Trade off - by using this evil construction we can pull the
7105                variable gv into the block labelled keylookup. If not, then
7106                we have to give it function scope so that the goto from the
7107                earlier ':' case doesn't bypass the initialisation.  */
7108             if (0) {
7109             just_a_word_zero_gv:
7110                 sv = NULL;
7111                 cv = NULL;
7112                 gv = NULL;
7113                 gvp = NULL;
7114                 rv2cv_op = NULL;
7115                 orig_keyword = 0;
7116                 lex = 0;
7117                 off = 0;
7118             }
7119           just_a_word: {
7120                 int pkgname = 0;
7121                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7122                 const char penultchar =
7123                     lastchar && PL_bufptr - 2 >= PL_linestart
7124                          ? PL_bufptr[-2]
7125                          : 0;
7126 #ifdef PERL_MAD
7127                 SV *nextPL_nextwhite = 0;
7128 #endif
7129
7130
7131                 /* Get the rest if it looks like a package qualifier */
7132
7133                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7134                     STRLEN morelen;
7135                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7136                                   TRUE, &morelen);
7137                     if (!morelen)
7138                         Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7139                                 UTF8fARG(UTF, len, PL_tokenbuf),
7140                                 *s == '\'' ? "'" : "::");
7141                     len += morelen;
7142                     pkgname = 1;
7143                 }
7144
7145                 if (PL_expect == XOPERATOR) {
7146                     if (PL_bufptr == PL_linestart) {
7147                         CopLINE_dec(PL_curcop);
7148                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7149                         CopLINE_inc(PL_curcop);
7150                     }
7151                     else
7152                         no_op("Bareword",s);
7153                 }
7154
7155                 /* Look for a subroutine with this name in current package,
7156                    unless this is a lexical sub, or name is "Foo::",
7157                    in which case Foo is a bareword
7158                    (and a package name). */
7159
7160                 if (len > 2 && !PL_madskills &&
7161                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7162                 {
7163                     if (ckWARN(WARN_BAREWORD)
7164                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7165                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7166                           "Bareword \"%"UTF8f"\" refers to nonexistent package",
7167                            UTF8fARG(UTF, len, PL_tokenbuf));
7168                     len -= 2;
7169                     PL_tokenbuf[len] = '\0';
7170                     gv = NULL;
7171                     gvp = 0;
7172                 }
7173                 else {
7174                     if (!lex && !gv) {
7175                         /* Mustn't actually add anything to a symbol table.
7176                            But also don't want to "initialise" any placeholder
7177                            constants that might already be there into full
7178                            blown PVGVs with attached PVCV.  */
7179                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7180                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7181                                                SVt_PVCV);
7182                     }
7183                     len = 0;
7184                 }
7185
7186                 /* if we saw a global override before, get the right name */
7187
7188                 if (!sv)
7189                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7190                     len ? len : strlen(PL_tokenbuf));
7191                 if (gvp) {
7192                     SV * const tmp_sv = sv;
7193                     sv = newSVpvs("CORE::GLOBAL::");
7194                     sv_catsv(sv, tmp_sv);
7195                     SvREFCNT_dec(tmp_sv);
7196                 }
7197
7198 #ifdef PERL_MAD
7199                 if (PL_madskills && !PL_thistoken) {
7200                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7201                     PL_thistoken = newSVpvn(start,s - start);
7202                     PL_realtokenstart = s - SvPVX(PL_linestr);
7203                 }
7204 #endif
7205
7206                 /* Presume this is going to be a bareword of some sort. */
7207                 CLINE;
7208                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7209                 pl_yylval.opval->op_private = OPpCONST_BARE;
7210
7211                 /* And if "Foo::", then that's what it certainly is. */
7212                 if (len)
7213                     goto safe_bareword;
7214
7215                 if (!off)
7216                 {
7217                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7218                     const_op->op_private = OPpCONST_BARE;
7219                     rv2cv_op = newCVREF(0, const_op);
7220                     cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7221                 }
7222
7223                 /* See if it's the indirect object for a list operator. */
7224
7225                 if (PL_oldoldbufptr &&
7226                     PL_oldoldbufptr < PL_bufptr &&
7227                     (PL_oldoldbufptr == PL_last_lop
7228                      || PL_oldoldbufptr == PL_last_uni) &&
7229                     /* NO SKIPSPACE BEFORE HERE! */
7230                     (PL_expect == XREF ||
7231                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7232                 {
7233                     bool immediate_paren = *s == '(';
7234
7235                     /* (Now we can afford to cross potential line boundary.) */
7236                     s = SKIPSPACE2(s,nextPL_nextwhite);
7237 #ifdef PERL_MAD
7238                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
7239 #endif
7240
7241                     /* Two barewords in a row may indicate method call. */
7242
7243                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7244                         (tmp = intuit_method(s, gv, cv))) {
7245                         op_free(rv2cv_op);
7246                         if (tmp == METHOD && !PL_lex_allbrackets &&
7247                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7248                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7249                         return REPORT(tmp);
7250                     }
7251
7252                     /* If not a declared subroutine, it's an indirect object. */
7253                     /* (But it's an indir obj regardless for sort.) */
7254                     /* Also, if "_" follows a filetest operator, it's a bareword */
7255
7256                     if (
7257                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7258                          (!cv &&
7259                         (PL_last_lop_op != OP_MAPSTART &&
7260                          PL_last_lop_op != OP_GREPSTART))))
7261                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7262                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7263                        )
7264                     {
7265                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7266                         goto bareword;
7267                     }
7268                 }
7269
7270                 PL_expect = XOPERATOR;
7271 #ifdef PERL_MAD
7272                 if (isSPACE(*s))
7273                     s = SKIPSPACE2(s,nextPL_nextwhite);
7274                 PL_nextwhite = nextPL_nextwhite;
7275 #else
7276                 s = skipspace(s);
7277 #endif
7278
7279                 /* Is this a word before a => operator? */
7280                 if (*s == '=' && s[1] == '>' && !pkgname) {
7281                     op_free(rv2cv_op);
7282                     CLINE;
7283                     /* This is our own scalar, created a few lines above,
7284                        so this is safe. */
7285                     SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7286                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7287                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7288                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7289                     SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7290                     TERM(WORD);
7291                 }
7292
7293                 /* If followed by a paren, it's certainly a subroutine. */
7294                 if (*s == '(') {
7295                     CLINE;
7296                     if (cv) {
7297                         d = s + 1;
7298                         while (SPACE_OR_TAB(*d))
7299                             d++;
7300                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7301                             s = d + 1;
7302                             goto its_constant;
7303                         }
7304                     }
7305 #ifdef PERL_MAD
7306                     if (PL_madskills) {
7307                         PL_nextwhite = PL_thiswhite;
7308                         PL_thiswhite = 0;
7309                     }
7310                     start_force(PL_curforce);
7311 #endif
7312                     NEXTVAL_NEXTTOKE.opval =
7313                         off ? rv2cv_op : pl_yylval.opval;
7314                     PL_expect = XOPERATOR;
7315 #ifdef PERL_MAD
7316                     if (PL_madskills) {
7317                         PL_nextwhite = nextPL_nextwhite;
7318                         curmad('X', PL_thistoken);
7319                         PL_thistoken = newSVpvs("");
7320                     }
7321 #endif
7322                     if (off)
7323                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7324                     else op_free(rv2cv_op),        force_next(WORD);
7325                     pl_yylval.ival = 0;
7326                     TOKEN('&');
7327                 }
7328
7329                 /* If followed by var or block, call it a method (unless sub) */
7330
7331                 if ((*s == '$' || *s == '{') && !cv) {
7332                     op_free(rv2cv_op);
7333                     PL_last_lop = PL_oldbufptr;
7334                     PL_last_lop_op = OP_METHOD;
7335                     if (!PL_lex_allbrackets &&
7336                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7337                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7338                     PREBLOCK(METHOD);
7339                 }
7340
7341                 /* If followed by a bareword, see if it looks like indir obj. */
7342
7343                 if (!orig_keyword
7344                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7345                         && (tmp = intuit_method(s, gv, cv))) {
7346                     op_free(rv2cv_op);
7347                     if (tmp == METHOD && !PL_lex_allbrackets &&
7348                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7349                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7350                     return REPORT(tmp);
7351                 }
7352
7353                 /* Not a method, so call it a subroutine (if defined) */
7354
7355                 if (cv) {
7356                     if (lastchar == '-' && penultchar != '-') {
7357                         const STRLEN l = len ? len : strlen(PL_tokenbuf);
7358                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7359                             "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7360                              UTF8fARG(UTF, l, PL_tokenbuf),
7361                              UTF8fARG(UTF, l, PL_tokenbuf));
7362                     }
7363                     /* Check for a constant sub */
7364                     if ((sv = cv_const_sv_or_av(cv))) {
7365                   its_constant:
7366                         op_free(rv2cv_op);
7367                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7368                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7369                         if (SvTYPE(sv) == SVt_PVAV)
7370                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7371                                                       pl_yylval.opval);
7372                         else {
7373                             pl_yylval.opval->op_private = OPpCONST_FOLDED;
7374                             pl_yylval.opval->op_folded = 1;
7375                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7376                         }
7377                         TOKEN(WORD);
7378                     }
7379
7380                     op_free(pl_yylval.opval);
7381                     pl_yylval.opval =
7382                         off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7383                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7384                     PL_last_lop = PL_oldbufptr;
7385                     PL_last_lop_op = OP_ENTERSUB;
7386                     /* Is there a prototype? */
7387                     if (
7388 #ifdef PERL_MAD
7389                         cv &&
7390 #endif
7391                         SvPOK(cv))
7392                     {
7393                         STRLEN protolen = CvPROTOLEN(cv);
7394                         const char *proto = CvPROTO(cv);
7395                         bool optional;
7396                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7397                         if (!protolen)
7398                             TERM(FUNC0SUB);
7399                         if ((optional = *proto == ';'))
7400                           do
7401                             proto++;
7402                           while (*proto == ';');
7403                         if (
7404                             (
7405                                 (
7406                                     *proto == '$' || *proto == '_'
7407                                  || *proto == '*' || *proto == '+'
7408                                 )
7409                              && proto[1] == '\0'
7410                             )
7411                          || (
7412                              *proto == '\\' && proto[1] && proto[2] == '\0'
7413                             )
7414                         )
7415                             UNIPROTO(UNIOPSUB,optional);
7416                         if (*proto == '\\' && proto[1] == '[') {
7417                             const char *p = proto + 2;
7418                             while(*p && *p != ']')
7419                                 ++p;
7420                             if(*p == ']' && !p[1])
7421                                 UNIPROTO(UNIOPSUB,optional);
7422                         }
7423                         if (*proto == '&' && *s == '{') {
7424                             if (PL_curstash)
7425                                 sv_setpvs(PL_subname, "__ANON__");
7426                             else
7427                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7428                             if (!PL_lex_allbrackets &&
7429                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7430                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7431                             PREBLOCK(LSTOPSUB);
7432                         }
7433                     }
7434 #ifdef PERL_MAD
7435                     {
7436                         if (PL_madskills) {
7437                             PL_nextwhite = PL_thiswhite;
7438                             PL_thiswhite = 0;
7439                         }
7440                         start_force(PL_curforce);
7441                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7442                         PL_expect = XTERM;
7443                         if (PL_madskills) {
7444                             PL_nextwhite = nextPL_nextwhite;
7445                             curmad('X', PL_thistoken);
7446                             PL_thistoken = newSVpvs("");
7447                         }
7448                         force_next(off ? PRIVATEREF : WORD);
7449                         if (!PL_lex_allbrackets &&
7450                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7451                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7452                         TOKEN(NOAMP);
7453                     }
7454                 }
7455
7456                 /* Guess harder when madskills require "best effort". */
7457                 if (PL_madskills && (!gv || !GvCVu(gv))) {
7458                     int probable_sub = 0;
7459                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
7460                         probable_sub = 1;
7461                     else if (isALPHA(*s)) {
7462                         char tmpbuf[1024];
7463                         STRLEN tmplen;
7464                         d = s;
7465                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7466                         if (!keyword(tmpbuf, tmplen, 0))
7467                             probable_sub = 1;
7468                         else {
7469                             while (d < PL_bufend && isSPACE(*d))
7470                                 d++;
7471                             if (*d == '=' && d[1] == '>')
7472                                 probable_sub = 1;
7473                         }
7474                     }
7475                     if (probable_sub) {
7476                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7477                                         SVt_PVCV);
7478                         op_free(pl_yylval.opval);
7479                         pl_yylval.opval =
7480                             off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7481                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7482                         PL_last_lop = PL_oldbufptr;
7483                         PL_last_lop_op = OP_ENTERSUB;
7484                         PL_nextwhite = PL_thiswhite;
7485                         PL_thiswhite = 0;
7486                         start_force(PL_curforce);
7487                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7488                         PL_expect = XTERM;
7489                         PL_nextwhite = nextPL_nextwhite;
7490                         curmad('X', PL_thistoken);
7491                         PL_thistoken = newSVpvs("");
7492                         force_next(off ? PRIVATEREF : WORD);
7493                         if (!PL_lex_allbrackets &&
7494                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7495                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7496                         TOKEN(NOAMP);
7497                     }
7498 #else
7499                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7500                     PL_expect = XTERM;
7501                     force_next(off ? PRIVATEREF : WORD);
7502                     if (!PL_lex_allbrackets &&
7503                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7504                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7505                     TOKEN(NOAMP);
7506 #endif
7507                 }
7508
7509                 /* Call it a bare word */
7510
7511                 if (PL_hints & HINT_STRICT_SUBS)
7512                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7513                 else {
7514                 bareword:
7515                     /* after "print" and similar functions (corresponding to
7516                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7517                      * a filehandle should be subject to "strict subs".
7518                      * Likewise for the optional indirect-object argument to system
7519                      * or exec, which can't be a bareword */
7520                     if ((PL_last_lop_op == OP_PRINT
7521                             || PL_last_lop_op == OP_PRTF
7522                             || PL_last_lop_op == OP_SAY
7523                             || PL_last_lop_op == OP_SYSTEM
7524                             || PL_last_lop_op == OP_EXEC)
7525                             && (PL_hints & HINT_STRICT_SUBS))
7526                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7527                     if (lastchar != '-') {
7528                         if (ckWARN(WARN_RESERVED)) {
7529                             d = PL_tokenbuf;
7530                             while (isLOWER(*d))
7531                                 d++;
7532                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7533                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7534                                        PL_tokenbuf);
7535                         }
7536                     }
7537                 }
7538                 op_free(rv2cv_op);
7539
7540             safe_bareword:
7541                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7542                  && saw_infix_sigil) {
7543                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7544                                      "Operator or semicolon missing before %c%"UTF8f,
7545                                      lastchar,
7546                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7547                                               PL_tokenbuf));
7548                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7549                                      "Ambiguous use of %c resolved as operator %c",
7550                                      lastchar, lastchar);
7551                 }
7552                 TOKEN(WORD);
7553             }
7554
7555         case KEY___FILE__:
7556             FUN0OP(
7557                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7558             );
7559
7560         case KEY___LINE__:
7561             FUN0OP(
7562                 (OP*)newSVOP(OP_CONST, 0,
7563                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7564             );
7565
7566         case KEY___PACKAGE__:
7567             FUN0OP(
7568                 (OP*)newSVOP(OP_CONST, 0,
7569                                         (PL_curstash
7570                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7571                                          : &PL_sv_undef))
7572             );
7573
7574         case KEY___DATA__:
7575         case KEY___END__: {
7576             GV *gv;
7577             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7578                 const char *pname = "main";
7579                 STRLEN plen = 4;
7580                 U32 putf8 = 0;
7581                 if (PL_tokenbuf[2] == 'D')
7582                 {
7583                     HV * const stash =
7584                         PL_curstash ? PL_curstash : PL_defstash;
7585                     pname = HvNAME_get(stash);
7586                     plen  = HvNAMELEN (stash);
7587                     if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7588                 }
7589                 gv = gv_fetchpvn_flags(
7590                         Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7591                         plen+6, GV_ADD|putf8, SVt_PVIO
7592                 );
7593                 GvMULTI_on(gv);
7594                 if (!GvIO(gv))
7595                     GvIOp(gv) = newIO();
7596                 IoIFP(GvIOp(gv)) = PL_rsfp;
7597 #if defined(HAS_FCNTL) && defined(F_SETFD)
7598                 {
7599                     const int fd = PerlIO_fileno(PL_rsfp);
7600                     fcntl(fd,F_SETFD,fd >= 3);
7601                 }
7602 #endif
7603                 /* Mark this internal pseudo-handle as clean */
7604                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7605                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7606                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7607                 else
7608                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7609 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7610                 /* if the script was opened in binmode, we need to revert
7611                  * it to text mode for compatibility; but only iff it has CRs
7612                  * XXX this is a questionable hack at best. */
7613                 if (PL_bufend-PL_bufptr > 2
7614                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7615                 {
7616                     Off_t loc = 0;
7617                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7618                         loc = PerlIO_tell(PL_rsfp);
7619                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7620                     }
7621 #ifdef NETWARE
7622                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7623 #else
7624                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7625 #endif  /* NETWARE */
7626                         if (loc > 0)
7627                             PerlIO_seek(PL_rsfp, loc, 0);
7628                     }
7629                 }
7630 #endif
7631 #ifdef PERLIO_LAYERS
7632                 if (!IN_BYTES) {
7633                     if (UTF)
7634                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7635                     else if (PL_encoding) {
7636                         SV *name;
7637                         dSP;
7638                         ENTER;
7639                         SAVETMPS;
7640                         PUSHMARK(sp);
7641                         EXTEND(SP, 1);
7642                         XPUSHs(PL_encoding);
7643                         PUTBACK;
7644                         call_method("name", G_SCALAR);
7645                         SPAGAIN;
7646                         name = POPs;
7647                         PUTBACK;
7648                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7649                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7650                                                       SVfARG(name)));
7651                         FREETMPS;
7652                         LEAVE;
7653                     }
7654                 }
7655 #endif
7656 #ifdef PERL_MAD
7657                 if (PL_madskills) {
7658                     if (PL_realtokenstart >= 0) {
7659                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7660                         if (!PL_endwhite)
7661                             PL_endwhite = newSVpvs("");
7662                         sv_catsv(PL_endwhite, PL_thiswhite);
7663                         PL_thiswhite = 0;
7664                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7665                         PL_realtokenstart = -1;
7666                     }
7667                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7668                            != NULL) ;
7669                 }
7670 #endif
7671                 PL_rsfp = NULL;
7672             }
7673             goto fake_eof;
7674         }
7675
7676         case KEY___SUB__:
7677             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7678
7679         case KEY_AUTOLOAD:
7680         case KEY_DESTROY:
7681         case KEY_BEGIN:
7682         case KEY_UNITCHECK:
7683         case KEY_CHECK:
7684         case KEY_INIT:
7685         case KEY_END:
7686             if (PL_expect == XSTATE) {
7687                 s = PL_bufptr;
7688                 goto really_sub;
7689             }
7690             goto just_a_word;
7691
7692         case KEY_CORE:
7693             if (*s == ':' && s[1] == ':') {
7694                 STRLEN olen = len;
7695                 d = s;
7696                 s += 2;
7697                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7698                 if ((*s == ':' && s[1] == ':')
7699                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7700                 {
7701                     s = d;
7702                     len = olen;
7703                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7704                     goto just_a_word;
7705                 }
7706                 if (!tmp)
7707                     Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7708                                       UTF8fARG(UTF, len, PL_tokenbuf));
7709                 if (tmp < 0)
7710                     tmp = -tmp;
7711                 else if (tmp == KEY_require || tmp == KEY_do
7712                       || tmp == KEY_glob)
7713                     /* that's a way to remember we saw "CORE::" */
7714                     orig_keyword = tmp;
7715                 goto reserved_word;
7716             }
7717             goto just_a_word;
7718
7719         case KEY_abs:
7720             UNI(OP_ABS);
7721
7722         case KEY_alarm:
7723             UNI(OP_ALARM);
7724
7725         case KEY_accept:
7726             LOP(OP_ACCEPT,XTERM);
7727
7728         case KEY_and:
7729             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7730                 return REPORT(0);
7731             OPERATOR(ANDOP);
7732
7733         case KEY_atan2:
7734             LOP(OP_ATAN2,XTERM);
7735
7736         case KEY_bind:
7737             LOP(OP_BIND,XTERM);
7738
7739         case KEY_binmode:
7740             LOP(OP_BINMODE,XTERM);
7741
7742         case KEY_bless:
7743             LOP(OP_BLESS,XTERM);
7744
7745         case KEY_break:
7746             FUN0(OP_BREAK);
7747
7748         case KEY_chop:
7749             UNI(OP_CHOP);
7750
7751         case KEY_continue:
7752                     /* We have to disambiguate the two senses of
7753                       "continue". If the next token is a '{' then
7754                       treat it as the start of a continue block;
7755                       otherwise treat it as a control operator.
7756                      */
7757                     s = skipspace(s);
7758                     if (*s == '{')
7759             PREBLOCK(CONTINUE);
7760                     else
7761                         FUN0(OP_CONTINUE);
7762
7763         case KEY_chdir:
7764             /* may use HOME */
7765             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7766             UNI(OP_CHDIR);
7767
7768         case KEY_close:
7769             UNI(OP_CLOSE);
7770
7771         case KEY_closedir:
7772             UNI(OP_CLOSEDIR);
7773
7774         case KEY_cmp:
7775             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7776                 return REPORT(0);
7777             Eop(OP_SCMP);
7778
7779         case KEY_caller:
7780             UNI(OP_CALLER);
7781
7782         case KEY_crypt:
7783 #ifdef FCRYPT
7784             if (!PL_cryptseen) {
7785                 PL_cryptseen = TRUE;
7786                 init_des();
7787             }
7788 #endif
7789             LOP(OP_CRYPT,XTERM);
7790
7791         case KEY_chmod:
7792             LOP(OP_CHMOD,XTERM);
7793
7794         case KEY_chown:
7795             LOP(OP_CHOWN,XTERM);
7796
7797         case KEY_connect:
7798             LOP(OP_CONNECT,XTERM);
7799
7800         case KEY_chr:
7801             UNI(OP_CHR);
7802
7803         case KEY_cos:
7804             UNI(OP_COS);
7805
7806         case KEY_chroot:
7807             UNI(OP_CHROOT);
7808
7809         case KEY_default:
7810             PREBLOCK(DEFAULT);
7811
7812         case KEY_do:
7813             s = SKIPSPACE1(s);
7814             if (*s == '{')
7815                 PRETERMBLOCK(DO);
7816             if (*s != '\'') {
7817                 *PL_tokenbuf = '&';
7818                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7819                               1, &len);
7820                 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7821                     d = SKIPSPACE1(d);
7822                     if (*d == '(') {
7823                         force_ident_maybe_lex('&');
7824                         s = d;
7825                     }
7826                 }
7827             }
7828             if (orig_keyword == KEY_do) {
7829                 orig_keyword = 0;
7830                 pl_yylval.ival = 1;
7831             }
7832             else
7833                 pl_yylval.ival = 0;
7834             OPERATOR(DO);
7835
7836         case KEY_die:
7837             PL_hints |= HINT_BLOCK_SCOPE;
7838             LOP(OP_DIE,XTERM);
7839
7840         case KEY_defined:
7841             UNI(OP_DEFINED);
7842
7843         case KEY_delete:
7844             UNI(OP_DELETE);
7845
7846         case KEY_dbmopen:
7847             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7848                               STR_WITH_LEN("NDBM_File::"),
7849                               STR_WITH_LEN("DB_File::"),
7850                               STR_WITH_LEN("GDBM_File::"),
7851                               STR_WITH_LEN("SDBM_File::"),
7852                               STR_WITH_LEN("ODBM_File::"),
7853                               NULL);
7854             LOP(OP_DBMOPEN,XTERM);
7855
7856         case KEY_dbmclose:
7857             UNI(OP_DBMCLOSE);
7858
7859         case KEY_dump:
7860             PL_expect = XOPERATOR;
7861             s = force_word(s,WORD,TRUE,FALSE);
7862             LOOPX(OP_DUMP);
7863
7864         case KEY_else:
7865             PREBLOCK(ELSE);
7866
7867         case KEY_elsif:
7868             pl_yylval.ival = CopLINE(PL_curcop);
7869             OPERATOR(ELSIF);
7870
7871         case KEY_eq:
7872             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7873                 return REPORT(0);
7874             Eop(OP_SEQ);
7875
7876         case KEY_exists:
7877             UNI(OP_EXISTS);
7878
7879         case KEY_exit:
7880             if (PL_madskills)
7881                 UNI(OP_INT);
7882             UNI(OP_EXIT);
7883
7884         case KEY_eval:
7885             s = SKIPSPACE1(s);
7886             if (*s == '{') { /* block eval */
7887                 PL_expect = XTERMBLOCK;
7888                 UNIBRACK(OP_ENTERTRY);
7889             }
7890             else { /* string eval */
7891                 PL_expect = XTERM;
7892                 UNIBRACK(OP_ENTEREVAL);
7893             }
7894
7895         case KEY_evalbytes:
7896             PL_expect = XTERM;
7897             UNIBRACK(-OP_ENTEREVAL);
7898
7899         case KEY_eof:
7900             UNI(OP_EOF);
7901
7902         case KEY_exp:
7903             UNI(OP_EXP);
7904
7905         case KEY_each:
7906             UNI(OP_EACH);
7907
7908         case KEY_exec:
7909             LOP(OP_EXEC,XREF);
7910
7911         case KEY_endhostent:
7912             FUN0(OP_EHOSTENT);
7913
7914         case KEY_endnetent:
7915             FUN0(OP_ENETENT);
7916
7917         case KEY_endservent:
7918             FUN0(OP_ESERVENT);
7919
7920         case KEY_endprotoent:
7921             FUN0(OP_EPROTOENT);
7922
7923         case KEY_endpwent:
7924             FUN0(OP_EPWENT);
7925
7926         case KEY_endgrent:
7927             FUN0(OP_EGRENT);
7928
7929         case KEY_for:
7930         case KEY_foreach:
7931             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7932                 return REPORT(0);
7933             pl_yylval.ival = CopLINE(PL_curcop);
7934             s = SKIPSPACE1(s);
7935             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7936                 char *p = s;
7937 #ifdef PERL_MAD
7938                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7939 #endif
7940
7941                 if ((PL_bufend - p) >= 3 &&
7942                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7943                     p += 2;
7944                 else if ((PL_bufend - p) >= 4 &&
7945                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7946                     p += 3;
7947                 p = PEEKSPACE(p);
7948                 if (isIDFIRST_lazy_if(p,UTF)) {
7949                     p = scan_ident(p, PL_bufend,
7950                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7951                     p = PEEKSPACE(p);
7952                 }
7953                 if (*p != '$')
7954                     Perl_croak(aTHX_ "Missing $ on loop variable");
7955 #ifdef PERL_MAD
7956                 s = SvPVX(PL_linestr) + soff;
7957 #endif
7958             }
7959             OPERATOR(FOR);
7960
7961         case KEY_formline:
7962             LOP(OP_FORMLINE,XTERM);
7963
7964         case KEY_fork:
7965             FUN0(OP_FORK);
7966
7967         case KEY_fc:
7968             UNI(OP_FC);
7969
7970         case KEY_fcntl:
7971             LOP(OP_FCNTL,XTERM);
7972
7973         case KEY_fileno:
7974             UNI(OP_FILENO);
7975
7976         case KEY_flock:
7977             LOP(OP_FLOCK,XTERM);
7978
7979         case KEY_gt:
7980             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7981                 return REPORT(0);
7982             Rop(OP_SGT);
7983
7984         case KEY_ge:
7985             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7986                 return REPORT(0);
7987             Rop(OP_SGE);
7988
7989         case KEY_grep:
7990             LOP(OP_GREPSTART, XREF);
7991
7992         case KEY_goto:
7993             PL_expect = XOPERATOR;
7994             s = force_word(s,WORD,TRUE,FALSE);
7995             LOOPX(OP_GOTO);
7996
7997         case KEY_gmtime:
7998             UNI(OP_GMTIME);
7999
8000         case KEY_getc:
8001             UNIDOR(OP_GETC);
8002
8003         case KEY_getppid:
8004             FUN0(OP_GETPPID);
8005
8006         case KEY_getpgrp:
8007             UNI(OP_GETPGRP);
8008
8009         case KEY_getpriority:
8010             LOP(OP_GETPRIORITY,XTERM);
8011
8012         case KEY_getprotobyname:
8013             UNI(OP_GPBYNAME);
8014
8015         case KEY_getprotobynumber:
8016             LOP(OP_GPBYNUMBER,XTERM);
8017
8018         case KEY_getprotoent:
8019             FUN0(OP_GPROTOENT);
8020
8021         case KEY_getpwent:
8022             FUN0(OP_GPWENT);
8023
8024         case KEY_getpwnam:
8025             UNI(OP_GPWNAM);
8026
8027         case KEY_getpwuid:
8028             UNI(OP_GPWUID);
8029
8030         case KEY_getpeername:
8031             UNI(OP_GETPEERNAME);
8032
8033         case KEY_gethostbyname:
8034             UNI(OP_GHBYNAME);
8035
8036         case KEY_gethostbyaddr:
8037             LOP(OP_GHBYADDR,XTERM);
8038
8039         case KEY_gethostent:
8040             FUN0(OP_GHOSTENT);
8041
8042         case KEY_getnetbyname:
8043             UNI(OP_GNBYNAME);
8044
8045         case KEY_getnetbyaddr:
8046             LOP(OP_GNBYADDR,XTERM);
8047
8048         case KEY_getnetent:
8049             FUN0(OP_GNETENT);
8050
8051         case KEY_getservbyname:
8052             LOP(OP_GSBYNAME,XTERM);
8053
8054         case KEY_getservbyport:
8055             LOP(OP_GSBYPORT,XTERM);
8056
8057         case KEY_getservent:
8058             FUN0(OP_GSERVENT);
8059
8060         case KEY_getsockname:
8061             UNI(OP_GETSOCKNAME);
8062
8063         case KEY_getsockopt:
8064             LOP(OP_GSOCKOPT,XTERM);
8065
8066         case KEY_getgrent:
8067             FUN0(OP_GGRENT);
8068
8069         case KEY_getgrnam:
8070             UNI(OP_GGRNAM);
8071
8072         case KEY_getgrgid:
8073             UNI(OP_GGRGID);
8074
8075         case KEY_getlogin:
8076             FUN0(OP_GETLOGIN);
8077
8078         case KEY_given:
8079             pl_yylval.ival = CopLINE(PL_curcop);
8080             Perl_ck_warner_d(aTHX_
8081                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8082                 "given is experimental");
8083             OPERATOR(GIVEN);
8084
8085         case KEY_glob:
8086             LOP(
8087              orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
8088              XTERM
8089             );
8090
8091         case KEY_hex:
8092             UNI(OP_HEX);
8093
8094         case KEY_if:
8095             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8096                 return REPORT(0);
8097             pl_yylval.ival = CopLINE(PL_curcop);
8098             OPERATOR(IF);
8099
8100         case KEY_index:
8101             LOP(OP_INDEX,XTERM);
8102
8103         case KEY_int:
8104             UNI(OP_INT);
8105
8106         case KEY_ioctl:
8107             LOP(OP_IOCTL,XTERM);
8108
8109         case KEY_join:
8110             LOP(OP_JOIN,XTERM);
8111
8112         case KEY_keys:
8113             UNI(OP_KEYS);
8114
8115         case KEY_kill:
8116             LOP(OP_KILL,XTERM);
8117
8118         case KEY_last:
8119             PL_expect = XOPERATOR;
8120             s = force_word(s,WORD,TRUE,FALSE);
8121             LOOPX(OP_LAST);
8122
8123         case KEY_lc:
8124             UNI(OP_LC);
8125
8126         case KEY_lcfirst:
8127             UNI(OP_LCFIRST);
8128
8129         case KEY_local:
8130             pl_yylval.ival = 0;
8131             OPERATOR(LOCAL);
8132
8133         case KEY_length:
8134             UNI(OP_LENGTH);
8135
8136         case KEY_lt:
8137             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8138                 return REPORT(0);
8139             Rop(OP_SLT);
8140
8141         case KEY_le:
8142             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8143                 return REPORT(0);
8144             Rop(OP_SLE);
8145
8146         case KEY_localtime:
8147             UNI(OP_LOCALTIME);
8148
8149         case KEY_log:
8150             UNI(OP_LOG);
8151
8152         case KEY_link:
8153             LOP(OP_LINK,XTERM);
8154
8155         case KEY_listen:
8156             LOP(OP_LISTEN,XTERM);
8157
8158         case KEY_lock:
8159             UNI(OP_LOCK);
8160
8161         case KEY_lstat:
8162             UNI(OP_LSTAT);
8163
8164         case KEY_m:
8165             s = scan_pat(s,OP_MATCH);
8166             TERM(sublex_start());
8167
8168         case KEY_map:
8169             LOP(OP_MAPSTART, XREF);
8170
8171         case KEY_mkdir:
8172             LOP(OP_MKDIR,XTERM);
8173
8174         case KEY_msgctl:
8175             LOP(OP_MSGCTL,XTERM);
8176
8177         case KEY_msgget:
8178             LOP(OP_MSGGET,XTERM);
8179
8180         case KEY_msgrcv:
8181             LOP(OP_MSGRCV,XTERM);
8182
8183         case KEY_msgsnd:
8184             LOP(OP_MSGSND,XTERM);
8185
8186         case KEY_our:
8187         case KEY_my:
8188         case KEY_state:
8189             PL_in_my = (U16)tmp;
8190             s = SKIPSPACE1(s);
8191             if (isIDFIRST_lazy_if(s,UTF)) {
8192 #ifdef PERL_MAD
8193                 char* start = s;
8194 #endif
8195                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8196                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8197                 {
8198                     if (!FEATURE_LEXSUBS_IS_ENABLED)
8199                         Perl_croak(aTHX_
8200                                   "Experimental \"%s\" subs not enabled",
8201                                    tmp == KEY_my    ? "my"    :
8202                                    tmp == KEY_state ? "state" : "our");
8203                     Perl_ck_warner_d(aTHX_
8204                         packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8205                         "The lexical_subs feature is experimental");
8206                     goto really_sub;
8207                 }
8208                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8209                 if (!PL_in_my_stash) {
8210                     char tmpbuf[1024];
8211                     PL_bufptr = s;
8212                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8213                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8214                 }
8215 #ifdef PERL_MAD
8216                 if (PL_madskills) {     /* just add type to declarator token */
8217                     sv_catsv(PL_thistoken, PL_nextwhite);
8218                     PL_nextwhite = 0;
8219                     sv_catpvn(PL_thistoken, start, s - start);
8220                 }
8221 #endif
8222             }
8223             pl_yylval.ival = 1;
8224             OPERATOR(MY);
8225
8226         case KEY_next:
8227             PL_expect = XOPERATOR;
8228             s = force_word(s,WORD,TRUE,FALSE);
8229             LOOPX(OP_NEXT);
8230
8231         case KEY_ne:
8232             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8233                 return REPORT(0);
8234             Eop(OP_SNE);
8235
8236         case KEY_no:
8237             s = tokenize_use(0, s);
8238             TERM(USE);
8239
8240         case KEY_not:
8241             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8242                 FUN1(OP_NOT);
8243             else {
8244                 if (!PL_lex_allbrackets &&
8245                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8246                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8247                 OPERATOR(NOTOP);
8248             }
8249
8250         case KEY_open:
8251             s = SKIPSPACE1(s);
8252             if (isIDFIRST_lazy_if(s,UTF)) {
8253           const char *t;
8254           d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8255               &len);
8256                 for (t=d; isSPACE(*t);)
8257                     t++;
8258                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8259                     /* [perl #16184] */
8260                     && !(t[0] == '=' && t[1] == '>')
8261                     && !(t[0] == ':' && t[1] == ':')
8262                     && !keyword(s, d-s, 0)
8263                 ) {
8264                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8265                        "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8266                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8267                 }
8268             }
8269             LOP(OP_OPEN,XTERM);
8270
8271         case KEY_or:
8272             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8273                 return REPORT(0);
8274             pl_yylval.ival = OP_OR;
8275             OPERATOR(OROP);
8276
8277         case KEY_ord:
8278             UNI(OP_ORD);
8279
8280         case KEY_oct:
8281             UNI(OP_OCT);
8282
8283         case KEY_opendir:
8284             LOP(OP_OPEN_DIR,XTERM);
8285
8286         case KEY_print:
8287             checkcomma(s,PL_tokenbuf,"filehandle");
8288             LOP(OP_PRINT,XREF);
8289
8290         case KEY_printf:
8291             checkcomma(s,PL_tokenbuf,"filehandle");
8292             LOP(OP_PRTF,XREF);
8293
8294         case KEY_prototype:
8295             UNI(OP_PROTOTYPE);
8296
8297         case KEY_push:
8298             LOP(OP_PUSH,XTERM);
8299
8300         case KEY_pop:
8301             UNIDOR(OP_POP);
8302
8303         case KEY_pos:
8304             UNIDOR(OP_POS);
8305
8306         case KEY_pack:
8307             LOP(OP_PACK,XTERM);
8308
8309         case KEY_package:
8310             s = force_word(s,WORD,FALSE,TRUE);
8311             s = SKIPSPACE1(s);
8312             s = force_strict_version(s);
8313             PL_lex_expect = XBLOCK;
8314             OPERATOR(PACKAGE);
8315
8316         case KEY_pipe:
8317             LOP(OP_PIPE_OP,XTERM);
8318
8319         case KEY_q:
8320             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8321             if (!s)
8322                 missingterm(NULL);
8323             pl_yylval.ival = OP_CONST;
8324             TERM(sublex_start());
8325
8326         case KEY_quotemeta:
8327             UNI(OP_QUOTEMETA);
8328
8329         case KEY_qw: {
8330             OP *words = NULL;
8331             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8332             if (!s)
8333                 missingterm(NULL);
8334             PL_expect = XOPERATOR;
8335             if (SvCUR(PL_lex_stuff)) {
8336                 int warned_comma = !ckWARN(WARN_QW);
8337                 int warned_comment = warned_comma;
8338                 d = SvPV_force(PL_lex_stuff, len);
8339                 while (len) {
8340                     for (; isSPACE(*d) && len; --len, ++d)
8341                         /**/;
8342                     if (len) {
8343                         SV *sv;
8344                         const char *b = d;
8345                         if (!warned_comma || !warned_comment) {
8346                             for (; !isSPACE(*d) && len; --len, ++d) {
8347                                 if (!warned_comma && *d == ',') {
8348                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8349                                         "Possible attempt to separate words with commas");
8350                                     ++warned_comma;
8351                                 }
8352                                 else if (!warned_comment && *d == '#') {
8353                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8354                                         "Possible attempt to put comments in qw() list");
8355                                     ++warned_comment;
8356                                 }
8357                             }
8358                         }
8359                         else {
8360                             for (; !isSPACE(*d) && len; --len, ++d)
8361                                 /**/;
8362                         }
8363                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8364                         words = op_append_elem(OP_LIST, words,
8365                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8366                     }
8367                 }
8368             }
8369             if (!words)
8370                 words = newNULLLIST();
8371             if (PL_lex_stuff) {
8372                 SvREFCNT_dec(PL_lex_stuff);
8373                 PL_lex_stuff = NULL;
8374             }
8375             PL_expect = XOPERATOR;
8376             pl_yylval.opval = sawparens(words);
8377             TOKEN(QWLIST);
8378         }
8379
8380         case KEY_qq:
8381             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8382             if (!s)
8383                 missingterm(NULL);
8384             pl_yylval.ival = OP_STRINGIFY;
8385             if (SvIVX(PL_lex_stuff) == '\'')
8386                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8387             TERM(sublex_start());
8388
8389         case KEY_qr:
8390             s = scan_pat(s,OP_QR);
8391             TERM(sublex_start());
8392
8393         case KEY_qx:
8394             s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8395             if (!s)
8396                 missingterm(NULL);
8397             readpipe_override();
8398             TERM(sublex_start());
8399
8400         case KEY_return:
8401             OLDLOP(OP_RETURN);
8402
8403         case KEY_require:
8404             s = SKIPSPACE1(s);
8405             PL_expect = XOPERATOR;
8406             if (isDIGIT(*s)) {
8407                 s = force_version(s, FALSE);
8408             }
8409             else if (*s != 'v' || !isDIGIT(s[1])
8410                     || (s = force_version(s, TRUE), *s == 'v'))
8411             {
8412                 *PL_tokenbuf = '\0';
8413                 s = force_word(s,WORD,TRUE,TRUE);
8414                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8415                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8416                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8417                 else if (*s == '<')
8418                     yyerror("<> should be quotes");
8419             }
8420             if (orig_keyword == KEY_require) {
8421                 orig_keyword = 0;
8422                 pl_yylval.ival = 1;
8423             }
8424             else
8425                 pl_yylval.ival = 0;
8426             PL_expect = XTERM;
8427             PL_bufptr = s;
8428             PL_last_uni = PL_oldbufptr;
8429             PL_last_lop_op = OP_REQUIRE;
8430             s = skipspace(s);
8431             return REPORT( (int)REQUIRE );
8432
8433         case KEY_reset:
8434             UNI(OP_RESET);
8435
8436         case KEY_redo:
8437             PL_expect = XOPERATOR;
8438             s = force_word(s,WORD,TRUE,FALSE);
8439             LOOPX(OP_REDO);
8440
8441         case KEY_rename:
8442             LOP(OP_RENAME,XTERM);
8443
8444         case KEY_rand:
8445             UNI(OP_RAND);
8446
8447         case KEY_rmdir:
8448             UNI(OP_RMDIR);
8449
8450         case KEY_rindex:
8451             LOP(OP_RINDEX,XTERM);
8452
8453         case KEY_read:
8454             LOP(OP_READ,XTERM);
8455
8456         case KEY_readdir:
8457             UNI(OP_READDIR);
8458
8459         case KEY_readline:
8460             UNIDOR(OP_READLINE);
8461
8462         case KEY_readpipe:
8463             UNIDOR(OP_BACKTICK);
8464
8465         case KEY_rewinddir:
8466             UNI(OP_REWINDDIR);
8467
8468         case KEY_recv:
8469             LOP(OP_RECV,XTERM);
8470
8471         case KEY_reverse:
8472             LOP(OP_REVERSE,XTERM);
8473
8474         case KEY_readlink:
8475             UNIDOR(OP_READLINK);
8476
8477         case KEY_ref:
8478             UNI(OP_REF);
8479
8480         case KEY_s:
8481             s = scan_subst(s);
8482             if (pl_yylval.opval)
8483                 TERM(sublex_start());
8484             else
8485                 TOKEN(1);       /* force error */
8486
8487         case KEY_say:
8488             checkcomma(s,PL_tokenbuf,"filehandle");
8489             LOP(OP_SAY,XREF);
8490
8491         case KEY_chomp:
8492             UNI(OP_CHOMP);
8493
8494         case KEY_scalar:
8495             UNI(OP_SCALAR);
8496
8497         case KEY_select:
8498             LOP(OP_SELECT,XTERM);
8499
8500         case KEY_seek:
8501             LOP(OP_SEEK,XTERM);
8502
8503         case KEY_semctl:
8504             LOP(OP_SEMCTL,XTERM);
8505
8506         case KEY_semget:
8507             LOP(OP_SEMGET,XTERM);
8508
8509         case KEY_semop:
8510             LOP(OP_SEMOP,XTERM);
8511
8512         case KEY_send:
8513             LOP(OP_SEND,XTERM);
8514
8515         case KEY_setpgrp:
8516             LOP(OP_SETPGRP,XTERM);
8517
8518         case KEY_setpriority:
8519             LOP(OP_SETPRIORITY,XTERM);
8520
8521         case KEY_sethostent:
8522             UNI(OP_SHOSTENT);
8523
8524         case KEY_setnetent:
8525             UNI(OP_SNETENT);
8526
8527         case KEY_setservent:
8528             UNI(OP_SSERVENT);
8529
8530         case KEY_setprotoent:
8531             UNI(OP_SPROTOENT);
8532
8533         case KEY_setpwent:
8534             FUN0(OP_SPWENT);
8535
8536         case KEY_setgrent:
8537             FUN0(OP_SGRENT);
8538
8539         case KEY_seekdir:
8540             LOP(OP_SEEKDIR,XTERM);
8541
8542         case KEY_setsockopt:
8543             LOP(OP_SSOCKOPT,XTERM);
8544
8545         case KEY_shift:
8546             UNIDOR(OP_SHIFT);
8547
8548         case KEY_shmctl:
8549             LOP(OP_SHMCTL,XTERM);
8550
8551         case KEY_shmget:
8552             LOP(OP_SHMGET,XTERM);
8553
8554         case KEY_shmread:
8555             LOP(OP_SHMREAD,XTERM);
8556
8557         case KEY_shmwrite:
8558             LOP(OP_SHMWRITE,XTERM);
8559
8560         case KEY_shutdown:
8561             LOP(OP_SHUTDOWN,XTERM);
8562
8563         case KEY_sin:
8564             UNI(OP_SIN);
8565
8566         case KEY_sleep:
8567             UNI(OP_SLEEP);
8568
8569         case KEY_socket:
8570             LOP(OP_SOCKET,XTERM);
8571
8572         case KEY_socketpair:
8573             LOP(OP_SOCKPAIR,XTERM);
8574
8575         case KEY_sort:
8576             checkcomma(s,PL_tokenbuf,"subroutine name");
8577             s = SKIPSPACE1(s);
8578             PL_expect = XTERM;
8579             s = force_word(s,WORD,TRUE,TRUE);
8580             LOP(OP_SORT,XREF);
8581
8582         case KEY_split:
8583             LOP(OP_SPLIT,XTERM);
8584
8585         case KEY_sprintf:
8586             LOP(OP_SPRINTF,XTERM);
8587
8588         case KEY_splice:
8589             LOP(OP_SPLICE,XTERM);
8590
8591         case KEY_sqrt:
8592             UNI(OP_SQRT);
8593
8594         case KEY_srand:
8595             UNI(OP_SRAND);
8596
8597         case KEY_stat:
8598             UNI(OP_STAT);
8599
8600         case KEY_study:
8601             UNI(OP_STUDY);
8602
8603         case KEY_substr:
8604             LOP(OP_SUBSTR,XTERM);
8605
8606         case KEY_format:
8607         case KEY_sub:
8608           really_sub:
8609             {
8610                 char * const tmpbuf = PL_tokenbuf + 1;
8611                 expectation attrful;
8612                 bool have_name, have_proto;
8613                 const int key = tmp;
8614 #ifndef PERL_MAD
8615                 SV *format_name = NULL;
8616 #endif
8617
8618 #ifdef PERL_MAD
8619                 SV *tmpwhite = 0;
8620
8621                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8622                 SV *subtoken = PL_madskills
8623                    ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8624                    : NULL;
8625                 PL_thistoken = 0;
8626
8627                 d = s;
8628                 s = SKIPSPACE2(s,tmpwhite);
8629 #else
8630                 d = s;
8631                 s = skipspace(s);
8632 #endif
8633
8634                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8635                     (*s == ':' && s[1] == ':'))
8636                 {
8637 #ifdef PERL_MAD
8638                     SV *nametoke = NULL;
8639 #endif
8640
8641                     PL_expect = XBLOCK;
8642                     attrful = XATTRBLOCK;
8643                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8644                                   &len);
8645 #ifdef PERL_MAD
8646                     if (PL_madskills)
8647                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8648 #else
8649                     if (key == KEY_format)
8650                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8651 #endif
8652                     *PL_tokenbuf = '&';
8653                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8654                      || pad_findmy_pvn(
8655                             PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8656                         ) != NOT_IN_PAD)
8657                         sv_setpvn(PL_subname, tmpbuf, len);
8658                     else {
8659                         sv_setsv(PL_subname,PL_curstname);
8660                         sv_catpvs(PL_subname,"::");
8661                         sv_catpvn(PL_subname,tmpbuf,len);
8662                     }
8663                     if (SvUTF8(PL_linestr))
8664                         SvUTF8_on(PL_subname);
8665                     have_name = TRUE;
8666
8667
8668 #ifdef PERL_MAD
8669                     start_force(0);
8670                     CURMAD('X', nametoke);
8671                     CURMAD('_', tmpwhite);
8672                     force_ident_maybe_lex('&');
8673
8674                     s = SKIPSPACE2(d,tmpwhite);
8675 #else
8676                     s = skipspace(d);
8677 #endif
8678                 }
8679                 else {
8680                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8681                     {
8682                         *d = '\0';
8683                         /* diag_listed_as: Missing name in "%s sub" */
8684                         Perl_croak(aTHX_
8685                                   "Missing name in \"%s\"", PL_bufptr);
8686                     }
8687                     PL_expect = XTERMBLOCK;
8688                     attrful = XATTRTERM;
8689                     sv_setpvs(PL_subname,"?");
8690                     have_name = FALSE;
8691                 }
8692
8693                 if (key == KEY_format) {
8694 #ifdef PERL_MAD
8695                     PL_thistoken = subtoken;
8696                     s = d;
8697 #else
8698                     if (format_name) {
8699                         start_force(PL_curforce);
8700                         NEXTVAL_NEXTTOKE.opval
8701                             = (OP*)newSVOP(OP_CONST,0, format_name);
8702                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8703                         force_next(WORD);
8704                     }
8705 #endif
8706                     PREBLOCK(FORMAT);
8707                 }
8708
8709                 /* Look for a prototype */
8710                 if (*s == '(') {
8711                     s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8712                     if (!s)
8713                         Perl_croak(aTHX_ "Prototype not terminated");
8714                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8715                     have_proto = TRUE;
8716
8717 #ifdef PERL_MAD
8718                     start_force(0);
8719                     CURMAD('q', PL_thisopen);
8720                     CURMAD('_', tmpwhite);
8721                     CURMAD('=', PL_thisstuff);
8722                     CURMAD('Q', PL_thisclose);
8723                     NEXTVAL_NEXTTOKE.opval =
8724                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8725                     PL_lex_stuff = NULL;
8726                     force_next(THING);
8727
8728                     s = SKIPSPACE2(s,tmpwhite);
8729 #else
8730                     s = skipspace(s);
8731 #endif
8732                 }
8733                 else
8734                     have_proto = FALSE;
8735
8736                 if (*s == ':' && s[1] != ':')
8737                     PL_expect = attrful;
8738                 else if (*s != '{' && key == KEY_sub) {
8739                     if (!have_name)
8740                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8741                     else if (*s != ';' && *s != '}')
8742                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8743                 }
8744
8745 #ifdef PERL_MAD
8746                 start_force(0);
8747                 if (tmpwhite) {
8748                     if (PL_madskills)
8749                         curmad('^', newSVpvs(""));
8750                     CURMAD('_', tmpwhite);
8751                 }
8752                 force_next(0);
8753
8754                 PL_thistoken = subtoken;
8755                 PERL_UNUSED_VAR(have_proto);
8756 #else
8757                 if (have_proto) {
8758                     NEXTVAL_NEXTTOKE.opval =
8759                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8760                     PL_lex_stuff = NULL;
8761                     force_next(THING);
8762                 }
8763 #endif
8764                 if (!have_name) {
8765                     if (PL_curstash)
8766                         sv_setpvs(PL_subname, "__ANON__");
8767                     else
8768                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8769                     TOKEN(ANONSUB);
8770                 }
8771 #ifndef PERL_MAD
8772                 force_ident_maybe_lex('&');
8773 #endif
8774                 TOKEN(SUB);
8775             }
8776
8777         case KEY_system:
8778             LOP(OP_SYSTEM,XREF);
8779
8780         case KEY_symlink:
8781             LOP(OP_SYMLINK,XTERM);
8782
8783         case KEY_syscall:
8784             LOP(OP_SYSCALL,XTERM);
8785
8786         case KEY_sysopen:
8787             LOP(OP_SYSOPEN,XTERM);
8788
8789         case KEY_sysseek:
8790             LOP(OP_SYSSEEK,XTERM);
8791
8792         case KEY_sysread:
8793             LOP(OP_SYSREAD,XTERM);
8794
8795         case KEY_syswrite:
8796             LOP(OP_SYSWRITE,XTERM);
8797
8798         case KEY_tr:
8799         case KEY_y:
8800             s = scan_trans(s);
8801             TERM(sublex_start());
8802
8803         case KEY_tell:
8804             UNI(OP_TELL);
8805
8806         case KEY_telldir:
8807             UNI(OP_TELLDIR);
8808
8809         case KEY_tie:
8810             LOP(OP_TIE,XTERM);
8811
8812         case KEY_tied:
8813             UNI(OP_TIED);
8814
8815         case KEY_time:
8816             FUN0(OP_TIME);
8817
8818         case KEY_times:
8819             FUN0(OP_TMS);
8820
8821         case KEY_truncate:
8822             LOP(OP_TRUNCATE,XTERM);
8823
8824         case KEY_uc:
8825             UNI(OP_UC);
8826
8827         case KEY_ucfirst:
8828             UNI(OP_UCFIRST);
8829
8830         case KEY_untie:
8831             UNI(OP_UNTIE);
8832
8833         case KEY_until:
8834             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8835                 return REPORT(0);
8836             pl_yylval.ival = CopLINE(PL_curcop);
8837             OPERATOR(UNTIL);
8838
8839         case KEY_unless:
8840             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8841                 return REPORT(0);
8842             pl_yylval.ival = CopLINE(PL_curcop);
8843             OPERATOR(UNLESS);
8844
8845         case KEY_unlink:
8846             LOP(OP_UNLINK,XTERM);
8847
8848         case KEY_undef:
8849             UNIDOR(OP_UNDEF);
8850
8851         case KEY_unpack:
8852             LOP(OP_UNPACK,XTERM);
8853
8854         case KEY_utime:
8855             LOP(OP_UTIME,XTERM);
8856
8857         case KEY_umask:
8858             UNIDOR(OP_UMASK);
8859
8860         case KEY_unshift:
8861             LOP(OP_UNSHIFT,XTERM);
8862
8863         case KEY_use:
8864             s = tokenize_use(1, s);
8865             OPERATOR(USE);
8866
8867         case KEY_values:
8868             UNI(OP_VALUES);
8869
8870         case KEY_vec:
8871             LOP(OP_VEC,XTERM);
8872
8873         case KEY_when:
8874             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8875                 return REPORT(0);
8876             pl_yylval.ival = CopLINE(PL_curcop);
8877             Perl_ck_warner_d(aTHX_
8878                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8879                 "when is experimental");
8880             OPERATOR(WHEN);
8881
8882         case KEY_while:
8883             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8884                 return REPORT(0);
8885             pl_yylval.ival = CopLINE(PL_curcop);
8886             OPERATOR(WHILE);
8887
8888         case KEY_warn:
8889             PL_hints |= HINT_BLOCK_SCOPE;
8890             LOP(OP_WARN,XTERM);
8891
8892         case KEY_wait:
8893             FUN0(OP_WAIT);
8894
8895         case KEY_waitpid:
8896             LOP(OP_WAITPID,XTERM);
8897
8898         case KEY_wantarray:
8899             FUN0(OP_WANTARRAY);
8900
8901         case KEY_write:
8902 #ifdef EBCDIC
8903         {
8904             char ctl_l[2];
8905             ctl_l[0] = toCTRL('L');
8906             ctl_l[1] = '\0';
8907             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8908         }
8909 #else
8910             /* Make sure $^L is defined */
8911             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8912 #endif
8913             UNI(OP_ENTERWRITE);
8914
8915         case KEY_x:
8916             if (PL_expect == XOPERATOR) {
8917                 if (*s == '=' && !PL_lex_allbrackets &&
8918                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8919                     return REPORT(0);
8920                 Mop(OP_REPEAT);
8921             }
8922             check_uni();
8923             goto just_a_word;
8924
8925         case KEY_xor:
8926             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8927                 return REPORT(0);
8928             pl_yylval.ival = OP_XOR;
8929             OPERATOR(OROP);
8930         }
8931     }}
8932 }
8933 #ifdef __SC__
8934 #pragma segment Main
8935 #endif
8936
8937 /*
8938   S_pending_ident
8939
8940   Looks up an identifier in the pad or in a package
8941
8942   Returns:
8943     PRIVATEREF if this is a lexical name.
8944     WORD       if this belongs to a package.
8945
8946   Structure:
8947       if we're in a my declaration
8948           croak if they tried to say my($foo::bar)
8949           build the ops for a my() declaration
8950       if it's an access to a my() variable
8951           build ops for access to a my() variable
8952       if in a dq string, and they've said @foo and we can't find @foo
8953           warn
8954       build ops for a bareword
8955 */
8956
8957 static int
8958 S_pending_ident(pTHX)
8959 {
8960     dVAR;
8961     PADOFFSET tmp = 0;
8962     const char pit = (char)pl_yylval.ival;
8963     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8964     /* All routes through this function want to know if there is a colon.  */
8965     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8966
8967     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8968           "### Pending identifier '%s'\n", PL_tokenbuf); });
8969
8970     /* if we're in a my(), we can't allow dynamics here.
8971        $foo'bar has already been turned into $foo::bar, so
8972        just check for colons.
8973
8974        if it's a legal name, the OP is a PADANY.
8975     */
8976     if (PL_in_my) {
8977         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8978             if (has_colon)
8979                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8980                                   "variable %s in \"our\"",
8981                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8982             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8983         }
8984         else {
8985             if (has_colon)
8986                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8987                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8988                             UTF ? SVf_UTF8 : 0);
8989
8990             pl_yylval.opval = newOP(OP_PADANY, 0);
8991             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8992                                                         UTF ? SVf_UTF8 : 0);
8993             return PRIVATEREF;
8994         }
8995     }
8996
8997     /*
8998        build the ops for accesses to a my() variable.
8999     */
9000
9001     if (!has_colon) {
9002         if (!PL_in_my)
9003             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9004                                     UTF ? SVf_UTF8 : 0);
9005         if (tmp != NOT_IN_PAD) {
9006             /* might be an "our" variable" */
9007             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9008                 /* build ops for a bareword */
9009                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9010                 HEK * const stashname = HvNAME_HEK(stash);
9011                 SV *  const sym = newSVhek(stashname);
9012                 sv_catpvs(sym, "::");
9013                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9014                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9015                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9016                 if (pit != '&')
9017                   gv_fetchsv(sym,
9018                     (PL_in_eval
9019                         ? (GV_ADDMULTI | GV_ADDINEVAL)
9020                         : GV_ADDMULTI
9021                     ),
9022                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9023                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9024                      : SVt_PVHV));
9025                 return WORD;
9026             }
9027
9028             pl_yylval.opval = newOP(OP_PADANY, 0);
9029             pl_yylval.opval->op_targ = tmp;
9030             return PRIVATEREF;
9031         }
9032     }
9033
9034     /*
9035        Whine if they've said @foo in a doublequoted string,
9036        and @foo isn't a variable we can find in the symbol
9037        table.
9038     */
9039     if (ckWARN(WARN_AMBIGUOUS) &&
9040         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9041         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9042                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9043         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9044                 /* DO NOT warn for @- and @+ */
9045                 && !( PL_tokenbuf[2] == '\0' &&
9046                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9047            )
9048         {
9049             /* Downgraded from fatal to warning 20000522 mjd */
9050             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9051                         "Possible unintended interpolation of %"UTF8f
9052                         " in string",
9053                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9054         }
9055     }
9056
9057     /* build ops for a bareword */
9058     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9059                                    newSVpvn_flags(PL_tokenbuf + 1,
9060                                                       tokenbuf_len - 1,
9061                                                       UTF ? SVf_UTF8 : 0 ));
9062     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9063     if (pit != '&')
9064         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9065                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9066                      | ( UTF ? SVf_UTF8 : 0 ),
9067                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9068                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9069                       : SVt_PVHV));
9070     return WORD;
9071 }
9072
9073 STATIC void
9074 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9075 {
9076     dVAR;
9077
9078     PERL_ARGS_ASSERT_CHECKCOMMA;
9079
9080     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9081         if (ckWARN(WARN_SYNTAX)) {
9082             int level = 1;
9083             const char *w;
9084             for (w = s+2; *w && level; w++) {
9085                 if (*w == '(')
9086                     ++level;
9087                 else if (*w == ')')
9088                     --level;
9089             }
9090             while (isSPACE(*w))
9091                 ++w;
9092             /* the list of chars below is for end of statements or
9093              * block / parens, boolean operators (&&, ||, //) and branch
9094              * constructs (or, and, if, until, unless, while, err, for).
9095              * Not a very solid hack... */
9096             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9097                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9098                             "%s (...) interpreted as function",name);
9099         }
9100     }
9101     while (s < PL_bufend && isSPACE(*s))
9102         s++;
9103     if (*s == '(')
9104         s++;
9105     while (s < PL_bufend && isSPACE(*s))
9106         s++;
9107     if (isIDFIRST_lazy_if(s,UTF)) {
9108         const char * const w = s;
9109         s += UTF ? UTF8SKIP(s) : 1;
9110         while (isWORDCHAR_lazy_if(s,UTF))
9111             s += UTF ? UTF8SKIP(s) : 1;
9112         while (s < PL_bufend && isSPACE(*s))
9113             s++;
9114         if (*s == ',') {
9115             GV* gv;
9116             if (keyword(w, s - w, 0))
9117                 return;
9118
9119             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9120             if (gv && GvCVu(gv))
9121                 return;
9122             Perl_croak(aTHX_ "No comma allowed after %s", what);
9123         }
9124     }
9125 }
9126
9127 /* S_new_constant(): do any overload::constant lookup.
9128
9129    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9130    Best used as sv=new_constant(..., sv, ...).
9131    If s, pv are NULL, calls subroutine with one argument,
9132    and <type> is used with error messages only.
9133    <type> is assumed to be well formed UTF-8 */
9134
9135 STATIC SV *
9136 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9137                SV *sv, SV *pv, const char *type, STRLEN typelen)
9138 {
9139     dVAR; dSP;
9140     HV * table = GvHV(PL_hintgv);                /* ^H */
9141     SV *res;
9142     SV *errsv = NULL;
9143     SV **cvp;
9144     SV *cv, *typesv;
9145     const char *why1 = "", *why2 = "", *why3 = "";
9146
9147     PERL_ARGS_ASSERT_NEW_CONSTANT;
9148     /* We assume that this is true: */
9149     if (*key == 'c') { assert (strEQ(key, "charnames")); }
9150     assert(type || s);
9151
9152     /* charnames doesn't work well if there have been errors found */
9153     if (PL_error_count > 0 && *key == 'c')
9154     {
9155         SvREFCNT_dec_NN(sv);
9156         return &PL_sv_undef;
9157     }
9158
9159     sv_2mortal(sv);                     /* Parent created it permanently */
9160     if (!table
9161         || ! (PL_hints & HINT_LOCALIZE_HH)
9162         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9163         || ! SvOK(*cvp))
9164     {
9165         char *msg;
9166
9167         /* Here haven't found what we're looking for.  If it is charnames,
9168          * perhaps it needs to be loaded.  Try doing that before giving up */
9169         if (*key == 'c') {
9170             Perl_load_module(aTHX_
9171                             0,
9172                             newSVpvs("_charnames"),
9173                              /* version parameter; no need to specify it, as if
9174                               * we get too early a version, will fail anyway,
9175                               * not being able to find '_charnames' */
9176                             NULL,
9177                             newSVpvs(":full"),
9178                             newSVpvs(":short"),
9179                             NULL);
9180             SPAGAIN;
9181             table = GvHV(PL_hintgv);
9182             if (table
9183                 && (PL_hints & HINT_LOCALIZE_HH)
9184                 && (cvp = hv_fetch(table, key, keylen, FALSE))
9185                 && SvOK(*cvp))
9186             {
9187                 goto now_ok;
9188             }
9189         }
9190         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9191             msg = Perl_form(aTHX_
9192                                "Constant(%.*s) unknown",
9193                                 (int)(type ? typelen : len),
9194                                 (type ? type: s));
9195         }
9196         else {
9197             why1 = "$^H{";
9198             why2 = key;
9199             why3 = "} is not defined";
9200         report:
9201             if (*key == 'c') {
9202                 msg = Perl_form(aTHX_
9203                             /* The +3 is for '\N{'; -4 for that, plus '}' */
9204                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9205                       );
9206             }
9207             else {
9208                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9209                                     (int)(type ? typelen : len),
9210                                     (type ? type: s), why1, why2, why3);
9211             }
9212         }
9213         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9214         return SvREFCNT_inc_simple_NN(sv);
9215     }
9216 now_ok:
9217     cv = *cvp;
9218     if (!pv && s)
9219         pv = newSVpvn_flags(s, len, SVs_TEMP);
9220     if (type && pv)
9221         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9222     else
9223         typesv = &PL_sv_undef;
9224
9225     PUSHSTACKi(PERLSI_OVERLOAD);
9226     ENTER ;
9227     SAVETMPS;
9228
9229     PUSHMARK(SP) ;
9230     EXTEND(sp, 3);
9231     if (pv)
9232         PUSHs(pv);
9233     PUSHs(sv);
9234     if (pv)
9235         PUSHs(typesv);
9236     PUTBACK;
9237     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9238
9239     SPAGAIN ;
9240
9241     /* Check the eval first */
9242     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9243         STRLEN errlen;
9244         const char * errstr;
9245         sv_catpvs(errsv, "Propagated");
9246         errstr = SvPV_const(errsv, errlen);
9247         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9248         (void)POPs;
9249         res = SvREFCNT_inc_simple_NN(sv);
9250     }
9251     else {
9252         res = POPs;
9253         SvREFCNT_inc_simple_void_NN(res);
9254     }
9255
9256     PUTBACK ;
9257     FREETMPS ;
9258     LEAVE ;
9259     POPSTACK;
9260
9261     if (!SvOK(res)) {
9262         why1 = "Call to &{$^H{";
9263         why2 = key;
9264         why3 = "}} did not return a defined value";
9265         sv = res;
9266         (void)sv_2mortal(sv);
9267         goto report;
9268     }
9269
9270     return res;
9271 }
9272
9273 PERL_STATIC_INLINE void
9274 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9275     dVAR;
9276     PERL_ARGS_ASSERT_PARSE_IDENT;
9277
9278     for (;;) {
9279         if (*d >= e)
9280             Perl_croak(aTHX_ "%s", ident_too_long);
9281         if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9282              /* The UTF-8 case must come first, otherwise things
9283              * like c\N{COMBINING TILDE} would start failing, as the
9284              * isWORDCHAR_A case below would gobble the 'c' up.
9285              */
9286
9287             char *t = *s + UTF8SKIP(*s);
9288             while (isIDCONT_utf8((U8*)t))
9289                 t += UTF8SKIP(t);
9290             if (*d + (t - *s) > e)
9291                 Perl_croak(aTHX_ "%s", ident_too_long);
9292             Copy(*s, *d, t - *s, char);
9293             *d += t - *s;
9294             *s = t;
9295         }
9296         else if ( isWORDCHAR_A(**s) ) {
9297             do {
9298                 *(*d)++ = *(*s)++;
9299             } while isWORDCHAR_A(**s);
9300         }
9301         else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9302             *(*d)++ = ':';
9303             *(*d)++ = ':';
9304             (*s)++;
9305         }
9306         else if (allow_package && **s == ':' && (*s)[1] == ':'
9307            /* Disallow things like Foo::$bar. For the curious, this is
9308             * the code path that triggers the "Bad name after" warning
9309             * when looking for barewords.
9310             */
9311            && (*s)[2] != '$') {
9312             *(*d)++ = *(*s)++;
9313             *(*d)++ = *(*s)++;
9314         }
9315         else
9316             break;
9317     }
9318     return;
9319 }
9320
9321 /* Returns a NUL terminated string, with the length of the string written to
9322    *slp
9323    */
9324 STATIC char *
9325 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9326 {
9327     dVAR;
9328     char *d = dest;
9329     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9330     bool is_utf8 = cBOOL(UTF);
9331
9332     PERL_ARGS_ASSERT_SCAN_WORD;
9333
9334     parse_ident(&s, &d, e, allow_package, is_utf8);
9335     *d = '\0';
9336     *slp = d - dest;
9337     return s;
9338 }
9339
9340 STATIC char *
9341 S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9342 {
9343     dVAR;
9344     char *bracket = NULL;
9345     char funny = *s++;
9346     char *d = dest;
9347     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9348     bool is_utf8 = cBOOL(UTF);
9349
9350     PERL_ARGS_ASSERT_SCAN_IDENT;
9351
9352     if (isSPACE(*s))
9353         s = PEEKSPACE(s);
9354     if (isDIGIT(*s)) {
9355         while (isDIGIT(*s)) {
9356             if (d >= e)
9357                 Perl_croak(aTHX_ "%s", ident_too_long);
9358             *d++ = *s++;
9359         }
9360     }
9361     else {
9362         parse_ident(&s, &d, e, 1, is_utf8);
9363     }
9364     *d = '\0';
9365     d = dest;
9366     if (*d) {
9367         /* Either a digit variable, or parse_ident() found an identifier
9368            (anything valid as a bareword), so job done and return.  */
9369         if (PL_lex_state != LEX_NORMAL)
9370             PL_lex_state = LEX_INTERPENDMAYBE;
9371         return s;
9372     }
9373     if (*s == '$' && s[1] &&
9374       (isIDFIRST_lazy_if(s+1,is_utf8)
9375          || isDIGIT_A((U8)s[1])
9376          || s[1] == '$'
9377          || s[1] == '{'
9378          || strnEQ(s+1,"::",2)) )
9379     {
9380         /* Dereferencing a value in a scalar variable.
9381            The alternatives are different syntaxes for a scalar variable.
9382            Using ' as a leading package separator isn't allowed. :: is.   */
9383         return s;
9384     }
9385     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9386     if (*s == '{') {
9387         bracket = s;
9388         s++;
9389         while (s < send && SPACE_OR_TAB(*s))
9390            s++;
9391     }
9392
9393 #define VALID_LEN_ONE_IDENT(d, u)     (isPUNCT_A((U8)(d))     \
9394                                         || isCNTRL_A((U8)(d)) \
9395                                         || isDIGIT_A((U8)(d)) \
9396                                         || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
9397     if (s < send
9398         && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9399     {
9400         if (is_utf8) {
9401             const STRLEN skip = UTF8SKIP(s);
9402             STRLEN i;
9403             d[skip] = '\0';
9404             for ( i = 0; i < skip; i++ )
9405                 d[i] = *s++;
9406         }
9407         else {
9408             *d = *s++;
9409             d[1] = '\0';
9410         }
9411     }
9412     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9413     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9414         *d = toCTRL(*s);
9415         s++;
9416     }
9417     /* Warn about ambiguous code after unary operators if {...} notation isn't
9418        used.  There's no difference in ambiguity; it's merely a heuristic
9419        about when not to warn.  */
9420     else if (ck_uni && !bracket)
9421         check_uni();
9422     if (bracket) {
9423         /* If we were processing {...} notation then...  */
9424         if (isIDFIRST_lazy_if(d,is_utf8)) {
9425             /* if it starts as a valid identifier, assume that it is one.
9426                (the later check for } being at the expected point will trap
9427                cases where this doesn't pan out.)  */
9428         d += is_utf8 ? UTF8SKIP(d) : 1;
9429         parse_ident(&s, &d, e, 1, is_utf8);
9430             *d = '\0';
9431             while (s < send && SPACE_OR_TAB(*s))
9432                 s++;
9433             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9434                 /* ${foo[0]} and ${foo{bar}} notation.  */
9435                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9436                     const char * const brack =
9437                         (const char *)
9438                         ((*s == '[') ? "[...]" : "{...}");
9439    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9440                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9441                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9442                         funny, dest, brack, funny, dest, brack);
9443                 }
9444                 bracket++;
9445                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9446                 PL_lex_allbrackets++;
9447                 return s;
9448             }
9449         }
9450         /* Handle extended ${^Foo} variables
9451          * 1999-02-27 mjd-perl-patch@plover.com */
9452         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9453                  && isWORDCHAR(*s))
9454         {
9455             d++;
9456             while (isWORDCHAR(*s) && d < e) {
9457                 *d++ = *s++;
9458             }
9459             if (d >= e)
9460                 Perl_croak(aTHX_ "%s", ident_too_long);
9461             *d = '\0';
9462         }
9463
9464         while (s < send && SPACE_OR_TAB(*s))
9465             s++;
9466
9467         /* Expect to find a closing } after consuming any trailing whitespace.
9468          */
9469         if (*s == '}') {
9470             s++;
9471             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9472                 PL_lex_state = LEX_INTERPEND;
9473                 PL_expect = XREF;
9474             }
9475             if (PL_lex_state == LEX_NORMAL) {
9476                 if (ckWARN(WARN_AMBIGUOUS) &&
9477                     (keyword(dest, d - dest, 0)
9478                      || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9479                 {
9480                     SV *tmp = newSVpvn_flags( dest, d - dest,
9481                                             SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9482                     if (funny == '#')
9483                         funny = '@';
9484                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9485                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9486                         funny, tmp, funny, tmp);
9487                 }
9488             }
9489         }
9490         else {
9491             /* Didn't find the closing } at the point we expected, so restore
9492                state such that the next thing to process is the opening { and */
9493             s = bracket;                /* let the parser handle it */
9494             *dest = '\0';
9495         }
9496     }
9497     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9498         PL_lex_state = LEX_INTERPEND;
9499     return s;
9500 }
9501
9502 static bool
9503 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9504
9505     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9506      * the parse starting at 's', based on the subset that are valid in this
9507      * context input to this routine in 'valid_flags'. Advances s.  Returns
9508      * TRUE if the input should be treated as a valid flag, so the next char
9509      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9510      * first call on the current regex.  This routine will set it to any
9511      * charset modifier found.  The caller shouldn't change it.  This way,
9512      * another charset modifier encountered in the parse can be detected as an
9513      * error, as we have decided to allow only one */
9514
9515     const char c = **s;
9516     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9517
9518     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9519         if (isWORDCHAR_lazy_if(*s, UTF)) {
9520             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9521                        UTF ? SVf_UTF8 : 0);
9522             (*s) += charlen;
9523             /* Pretend that it worked, so will continue processing before
9524              * dieing */
9525             return TRUE;
9526         }
9527         return FALSE;
9528     }
9529
9530     switch (c) {
9531
9532         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9533         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9534         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9535         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9536         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9537         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9538         case LOCALE_PAT_MOD:
9539             if (*charset) {
9540                 goto multiple_charsets;
9541             }
9542             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9543             *charset = c;
9544             break;
9545         case UNICODE_PAT_MOD:
9546             if (*charset) {
9547                 goto multiple_charsets;
9548             }
9549             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9550             *charset = c;
9551             break;
9552         case ASCII_RESTRICT_PAT_MOD:
9553             if (! *charset) {
9554                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9555             }
9556             else {
9557
9558                 /* Error if previous modifier wasn't an 'a', but if it was, see
9559                  * if, and accept, a second occurrence (only) */
9560                 if (*charset != 'a'
9561                     || get_regex_charset(*pmfl)
9562                         != REGEX_ASCII_RESTRICTED_CHARSET)
9563                 {
9564                         goto multiple_charsets;
9565                 }
9566                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9567             }
9568             *charset = c;
9569             break;
9570         case DEPENDS_PAT_MOD:
9571             if (*charset) {
9572                 goto multiple_charsets;
9573             }
9574             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9575             *charset = c;
9576             break;
9577     }
9578
9579     (*s)++;
9580     return TRUE;
9581
9582     multiple_charsets:
9583         if (*charset != c) {
9584             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9585         }
9586         else if (c == 'a') {
9587             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9588         }
9589         else {
9590             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9591         }
9592
9593         /* Pretend that it worked, so will continue processing before dieing */
9594         (*s)++;
9595         return TRUE;
9596 }
9597
9598 STATIC char *
9599 S_scan_pat(pTHX_ char *start, I32 type)
9600 {
9601     dVAR;
9602     PMOP *pm;
9603     char *s;
9604     const char * const valid_flags =
9605         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9606     char charset = '\0';    /* character set modifier */
9607 #ifdef PERL_MAD
9608     char *modstart;
9609 #endif
9610
9611     PERL_ARGS_ASSERT_SCAN_PAT;
9612
9613     s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9614                        TRUE /* look for escaped bracketed metas */ );
9615
9616     if (!s) {
9617         const char * const delimiter = skipspace(start);
9618         Perl_croak(aTHX_
9619                    (const char *)
9620                    (*delimiter == '?'
9621                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9622                     : "Search pattern not terminated" ));
9623     }
9624
9625     pm = (PMOP*)newPMOP(type, 0);
9626     if (PL_multi_open == '?') {
9627         /* This is the only point in the code that sets PMf_ONCE:  */
9628         pm->op_pmflags |= PMf_ONCE;
9629
9630         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9631            allows us to restrict the list needed by reset to just the ??
9632            matches.  */
9633         assert(type != OP_TRANS);
9634         if (PL_curstash) {
9635             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9636             U32 elements;
9637             if (!mg) {
9638                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9639                                  0);
9640             }
9641             elements = mg->mg_len / sizeof(PMOP**);
9642             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9643             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9644             mg->mg_len = elements * sizeof(PMOP**);
9645             PmopSTASH_set(pm,PL_curstash);
9646         }
9647     }
9648 #ifdef PERL_MAD
9649     modstart = s;
9650 #endif
9651
9652     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9653      * anon CV. False positives like qr/[(?{]/ are harmless */
9654
9655     if (type == OP_QR) {
9656         STRLEN len;
9657         char *e, *p = SvPV(PL_lex_stuff, len);
9658         e = p + len;
9659         for (; p < e; p++) {
9660             if (p[0] == '(' && p[1] == '?'
9661                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9662             {
9663                 pm->op_pmflags |= PMf_HAS_CV;
9664                 break;
9665             }
9666         }
9667         pm->op_pmflags |= PMf_IS_QR;
9668     }
9669
9670     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9671 #ifdef PERL_MAD
9672     if (PL_madskills && modstart != s) {
9673         SV* tmptoken = newSVpvn(modstart, s - modstart);
9674         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9675     }
9676 #endif
9677     /* issue a warning if /c is specified,but /g is not */
9678     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9679     {
9680         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9681                        "Use of /c modifier is meaningless without /g" );
9682     }
9683
9684     PL_lex_op = (OP*)pm;
9685     pl_yylval.ival = OP_MATCH;
9686     return s;
9687 }
9688
9689 STATIC char *
9690 S_scan_subst(pTHX_ char *start)
9691 {
9692     dVAR;
9693     char *s;
9694     PMOP *pm;
9695     I32 first_start;
9696     I32 es = 0;
9697     char charset = '\0';    /* character set modifier */
9698 #ifdef PERL_MAD
9699     char *modstart;
9700 #endif
9701
9702     PERL_ARGS_ASSERT_SCAN_SUBST;
9703
9704     pl_yylval.ival = OP_NULL;
9705
9706     s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9707                  TRUE /* look for escaped bracketed metas */ );
9708
9709     if (!s)
9710         Perl_croak(aTHX_ "Substitution pattern not terminated");
9711
9712     if (s[-1] == PL_multi_open)
9713         s--;
9714 #ifdef PERL_MAD
9715     if (PL_madskills) {
9716         CURMAD('q', PL_thisopen);
9717         CURMAD('_', PL_thiswhite);
9718         CURMAD('E', PL_thisstuff);
9719         CURMAD('Q', PL_thisclose);
9720         PL_realtokenstart = s - SvPVX(PL_linestr);
9721     }
9722 #endif
9723
9724     first_start = PL_multi_start;
9725     s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9726     if (!s) {
9727         if (PL_lex_stuff) {
9728             SvREFCNT_dec(PL_lex_stuff);
9729             PL_lex_stuff = NULL;
9730         }
9731         Perl_croak(aTHX_ "Substitution replacement not terminated");
9732     }
9733     PL_multi_start = first_start;       /* so whole substitution is taken together */
9734
9735     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9736
9737 #ifdef PERL_MAD
9738     if (PL_madskills) {
9739         CURMAD('z', PL_thisopen);
9740         CURMAD('R', PL_thisstuff);
9741         CURMAD('Z', PL_thisclose);
9742     }
9743     modstart = s;
9744 #endif
9745
9746     while (*s) {
9747         if (*s == EXEC_PAT_MOD) {
9748             s++;
9749             es++;
9750         }
9751         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9752         {
9753             break;
9754         }
9755     }
9756
9757 #ifdef PERL_MAD
9758     if (PL_madskills) {
9759         if (modstart != s)
9760             curmad('m', newSVpvn(modstart, s - modstart));
9761         append_madprops(PL_thismad, (OP*)pm, 0);
9762         PL_thismad = 0;
9763     }
9764 #endif
9765     if ((pm->op_pmflags & PMf_CONTINUE)) {
9766         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9767     }
9768
9769     if (es) {
9770         SV * const repl = newSVpvs("");
9771
9772         PL_multi_end = 0;
9773         pm->op_pmflags |= PMf_EVAL;
9774         while (es-- > 0) {
9775             if (es)
9776                 sv_catpvs(repl, "eval ");
9777             else
9778                 sv_catpvs(repl, "do ");
9779         }
9780         sv_catpvs(repl, "{");
9781         sv_catsv(repl, PL_sublex_info.repl);
9782         sv_catpvs(repl, "}");
9783         SvEVALED_on(repl);
9784         SvREFCNT_dec(PL_sublex_info.repl);
9785         PL_sublex_info.repl = repl;
9786     }
9787
9788     PL_lex_op = (OP*)pm;
9789     pl_yylval.ival = OP_SUBST;
9790     return s;
9791 }
9792
9793 STATIC char *
9794 S_scan_trans(pTHX_ char *start)
9795 {
9796     dVAR;
9797     char* s;
9798     OP *o;
9799     U8 squash;
9800     U8 del;
9801     U8 complement;
9802     bool nondestruct = 0;
9803 #ifdef PERL_MAD
9804     char *modstart;
9805 #endif
9806
9807     PERL_ARGS_ASSERT_SCAN_TRANS;
9808
9809     pl_yylval.ival = OP_NULL;
9810
9811     s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
9812     if (!s)
9813         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9814
9815     if (s[-1] == PL_multi_open)
9816         s--;
9817 #ifdef PERL_MAD
9818     if (PL_madskills) {
9819         CURMAD('q', PL_thisopen);
9820         CURMAD('_', PL_thiswhite);
9821         CURMAD('E', PL_thisstuff);
9822         CURMAD('Q', PL_thisclose);
9823         PL_realtokenstart = s - SvPVX(PL_linestr);
9824     }
9825 #endif
9826
9827     s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9828     if (!s) {
9829         if (PL_lex_stuff) {
9830             SvREFCNT_dec(PL_lex_stuff);
9831             PL_lex_stuff = NULL;
9832         }
9833         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9834     }
9835     if (PL_madskills) {
9836         CURMAD('z', PL_thisopen);
9837         CURMAD('R', PL_thisstuff);
9838         CURMAD('Z', PL_thisclose);
9839     }
9840
9841     complement = del = squash = 0;
9842 #ifdef PERL_MAD
9843     modstart = s;
9844 #endif
9845     while (1) {
9846         switch (*s) {
9847         case 'c':
9848             complement = OPpTRANS_COMPLEMENT;
9849             break;
9850         case 'd':
9851             del = OPpTRANS_DELETE;
9852             break;
9853         case 's':
9854             squash = OPpTRANS_SQUASH;
9855             break;
9856         case 'r':
9857             nondestruct = 1;
9858             break;
9859         default:
9860             goto no_more;
9861         }
9862         s++;
9863     }
9864   no_more:
9865
9866     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9867     o->op_private &= ~OPpTRANS_ALL;
9868     o->op_private |= del|squash|complement|
9869       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9870       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9871
9872     PL_lex_op = o;
9873     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9874
9875 #ifdef PERL_MAD
9876     if (PL_madskills) {
9877         if (modstart != s)
9878             curmad('m', newSVpvn(modstart, s - modstart));
9879         append_madprops(PL_thismad, o, 0);
9880         PL_thismad = 0;
9881     }
9882 #endif
9883
9884     return s;
9885 }
9886
9887 /* scan_heredoc
9888    Takes a pointer to the first < in <<FOO.
9889    Returns a pointer to the byte following <<FOO.
9890
9891    This function scans a heredoc, which involves different methods
9892    depending on whether we are in a string eval, quoted construct, etc.
9893    This is because PL_linestr could containing a single line of input, or
9894    a whole string being evalled, or the contents of the current quote-
9895    like operator.
9896
9897    The two basic methods are:
9898     - Steal lines from the input stream
9899     - Scan the heredoc in PL_linestr and remove it therefrom
9900
9901    In a file scope or filtered eval, the first method is used; in a
9902    string eval, the second.
9903
9904    In a quote-like operator, we have to choose between the two,
9905    depending on where we can find a newline.  We peek into outer lex-
9906    ing scopes until we find one with a newline in it.  If we reach the
9907    outermost lexing scope and it is a file, we use the stream method.
9908    Otherwise it is treated as an eval.
9909 */
9910
9911 STATIC char *
9912 S_scan_heredoc(pTHX_ char *s)
9913 {
9914     dVAR;
9915     I32 op_type = OP_SCALAR;
9916     I32 len;
9917     SV *tmpstr;
9918     char term;
9919     char *d;
9920     char *e;
9921     char *peek;
9922     const bool infile = PL_rsfp || PL_parser->filtered;
9923     LEXSHARED *shared = PL_parser->lex_shared;
9924 #ifdef PERL_MAD
9925     I32 stuffstart = s - SvPVX(PL_linestr);
9926     char *tstart;
9927
9928     PL_realtokenstart = -1;
9929 #endif
9930
9931     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9932
9933     s += 2;
9934     d = PL_tokenbuf + 1;
9935     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9936     *PL_tokenbuf = '\n';
9937     peek = s;
9938     while (SPACE_OR_TAB(*peek))
9939         peek++;
9940     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9941         s = peek;
9942         term = *s++;
9943         s = delimcpy(d, e, s, PL_bufend, term, &len);
9944         if (s == PL_bufend)
9945             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9946         d += len;
9947         s++;
9948     }
9949     else {
9950         if (*s == '\\')
9951             /* <<\FOO is equivalent to <<'FOO' */
9952             s++, term = '\'';
9953         else
9954             term = '"';
9955         if (!isWORDCHAR_lazy_if(s,UTF))
9956             deprecate("bare << to mean <<\"\"");
9957         for (; isWORDCHAR_lazy_if(s,UTF); s++) {
9958             if (d < e)
9959                 *d++ = *s;
9960         }
9961     }
9962     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9963         Perl_croak(aTHX_ "Delimiter for here document is too long");
9964     *d++ = '\n';
9965     *d = '\0';
9966     len = d - PL_tokenbuf;
9967
9968 #ifdef PERL_MAD
9969     if (PL_madskills) {
9970         tstart = PL_tokenbuf + 1;
9971         PL_thisclose = newSVpvn(tstart, len - 1);
9972         tstart = SvPVX(PL_linestr) + stuffstart;
9973         PL_thisopen = newSVpvn(tstart, s - tstart);
9974         stuffstart = s - SvPVX(PL_linestr);
9975     }
9976 #endif
9977 #ifndef PERL_STRICT_CR
9978     d = strchr(s, '\r');
9979     if (d) {
9980         char * const olds = s;
9981         s = d;
9982         while (s < PL_bufend) {
9983             if (*s == '\r') {
9984                 *d++ = '\n';
9985                 if (*++s == '\n')
9986                     s++;
9987             }
9988             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9989                 *d++ = *s++;
9990                 s++;
9991             }
9992             else
9993                 *d++ = *s++;
9994         }
9995         *d = '\0';
9996         PL_bufend = d;
9997         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9998         s = olds;
9999     }
10000 #endif
10001 #ifdef PERL_MAD
10002     if (PL_madskills) {
10003         tstart = SvPVX(PL_linestr) + stuffstart;
10004         if (PL_thisstuff)
10005             sv_catpvn(PL_thisstuff, tstart, s - tstart);
10006         else
10007             PL_thisstuff = newSVpvn(tstart, s - tstart);
10008     }
10009
10010     stuffstart = s - SvPVX(PL_linestr);
10011 #endif
10012
10013     tmpstr = newSV_type(SVt_PVIV);
10014     SvGROW(tmpstr, 80);
10015     if (term == '\'') {
10016         op_type = OP_CONST;
10017         SvIV_set(tmpstr, -1);
10018     }
10019     else if (term == '`') {
10020         op_type = OP_BACKTICK;
10021         SvIV_set(tmpstr, '\\');
10022     }
10023
10024     PL_multi_start = CopLINE(PL_curcop) + 1;
10025     PL_multi_open = PL_multi_close = '<';
10026     /* inside a string eval or quote-like operator */
10027     if (!infile || PL_lex_inwhat) {
10028         SV *linestr;
10029         char *bufend;
10030         char * const olds = s;
10031         PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10032         /* These two fields are not set until an inner lexing scope is
10033            entered.  But we need them set here. */
10034         shared->ls_bufptr  = s;
10035         shared->ls_linestr = PL_linestr;
10036         if (PL_lex_inwhat)
10037           /* Look for a newline.  If the current buffer does not have one,
10038              peek into the line buffer of the parent lexing scope, going
10039              up as many levels as necessary to find one with a newline
10040              after bufptr.
10041            */
10042           while (!(s = (char *)memchr(
10043                     (void *)shared->ls_bufptr, '\n',
10044                     SvEND(shared->ls_linestr)-shared->ls_bufptr
10045                 ))) {
10046             shared = shared->ls_prev;
10047             /* shared is only null if we have gone beyond the outermost
10048                lexing scope.  In a file, we will have broken out of the
10049                loop in the previous iteration.  In an eval, the string buf-
10050                fer ends with "\n;", so the while condition above will have
10051                evaluated to false.  So shared can never be null. */
10052             assert(shared);
10053             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10054                most lexing scope.  In a file, shared->ls_linestr at that
10055                level is just one line, so there is no body to steal. */
10056             if (infile && !shared->ls_prev) {
10057                 s = olds;
10058                 goto streaming;
10059             }
10060           }
10061         else {  /* eval */
10062             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10063             assert(s);
10064         }
10065         linestr = shared->ls_linestr;
10066         bufend = SvEND(linestr);
10067         d = s;
10068         while (s < bufend - len + 1 &&
10069           memNE(s,PL_tokenbuf,len) ) {
10070             if (*s++ == '\n')
10071                 ++shared->herelines;
10072         }
10073         if (s >= bufend - len + 1) {
10074             goto interminable;
10075         }
10076         sv_setpvn(tmpstr,d+1,s-d);
10077 #ifdef PERL_MAD
10078         if (PL_madskills) {
10079             if (PL_thisstuff)
10080                 sv_catpvn(PL_thisstuff, d + 1, s - d);
10081             else
10082                 PL_thisstuff = newSVpvn(d + 1, s - d);
10083             stuffstart = s - SvPVX(PL_linestr);
10084         }
10085 #endif
10086         s += len - 1;
10087         /* the preceding stmt passes a newline */
10088         shared->herelines++;
10089
10090         /* s now points to the newline after the heredoc terminator.
10091            d points to the newline before the body of the heredoc.
10092          */
10093
10094         /* We are going to modify linestr in place here, so set
10095            aside copies of the string if necessary for re-evals or
10096            (caller $n)[6]. */
10097         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10098            check shared->re_eval_str. */
10099         if (shared->re_eval_start || shared->re_eval_str) {
10100             /* Set aside the rest of the regexp */
10101             if (!shared->re_eval_str)
10102                 shared->re_eval_str =
10103                        newSVpvn(shared->re_eval_start,
10104                                 bufend - shared->re_eval_start);
10105             shared->re_eval_start -= s-d;
10106         }
10107         if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10108             CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10109             cx->blk_eval.cur_text == linestr)
10110         {
10111             cx->blk_eval.cur_text = newSVsv(linestr);
10112             SvSCREAM_on(cx->blk_eval.cur_text);
10113         }
10114         /* Copy everything from s onwards back to d. */
10115         Move(s,d,bufend-s + 1,char);
10116         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10117         /* Setting PL_bufend only applies when we have not dug deeper
10118            into other scopes, because sublex_done sets PL_bufend to
10119            SvEND(PL_linestr). */
10120         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10121         s = olds;
10122     }
10123     else
10124     {
10125       SV *linestr_save;
10126      streaming:
10127       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
10128       term = PL_tokenbuf[1];
10129       len--;
10130       linestr_save = PL_linestr; /* must restore this afterwards */
10131       d = s;                     /* and this */
10132       PL_linestr = newSVpvs("");
10133       PL_bufend = SvPVX(PL_linestr);
10134       while (1) {
10135 #ifdef PERL_MAD
10136         if (PL_madskills) {
10137             tstart = SvPVX(PL_linestr) + stuffstart;
10138             if (PL_thisstuff)
10139                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10140             else
10141                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10142         }
10143 #endif
10144         PL_bufptr = PL_bufend;
10145         CopLINE_set(PL_curcop,
10146                     PL_multi_start + shared->herelines);
10147         if (!lex_next_chunk(LEX_NO_TERM)
10148          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10149             SvREFCNT_dec(linestr_save);
10150             goto interminable;
10151         }
10152         CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10153         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10154             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10155             /* ^That should be enough to avoid this needing to grow:  */
10156             sv_catpvs(PL_linestr, "\n\0");
10157             assert(s == SvPVX(PL_linestr));
10158             PL_bufend = SvEND(PL_linestr);
10159         }
10160         s = PL_bufptr;
10161 #ifdef PERL_MAD
10162         stuffstart = s - SvPVX(PL_linestr);
10163 #endif
10164         shared->herelines++;
10165         PL_last_lop = PL_last_uni = NULL;
10166 #ifndef PERL_STRICT_CR
10167         if (PL_bufend - PL_linestart >= 2) {
10168             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10169                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10170             {
10171                 PL_bufend[-2] = '\n';
10172                 PL_bufend--;
10173                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10174             }
10175             else if (PL_bufend[-1] == '\r')
10176                 PL_bufend[-1] = '\n';
10177         }
10178         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10179             PL_bufend[-1] = '\n';
10180 #endif
10181         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10182             SvREFCNT_dec(PL_linestr);
10183             PL_linestr = linestr_save;
10184             PL_linestart = SvPVX(linestr_save);
10185             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10186             s = d;
10187             break;
10188         }
10189         else {
10190             sv_catsv(tmpstr,PL_linestr);
10191         }
10192       }
10193     }
10194     PL_multi_end = CopLINE(PL_curcop);
10195     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10196         SvPV_shrink_to_cur(tmpstr);
10197     }
10198     if (!IN_BYTES) {
10199         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10200             SvUTF8_on(tmpstr);
10201         else if (PL_encoding)
10202             sv_recode_to_utf8(tmpstr, PL_encoding);
10203     }
10204     PL_lex_stuff = tmpstr;
10205     pl_yylval.ival = op_type;
10206     return s;
10207
10208   interminable:
10209     SvREFCNT_dec(tmpstr);
10210     CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10211     missingterm(PL_tokenbuf + 1);
10212 }
10213
10214 /* scan_inputsymbol
10215    takes: current position in input buffer
10216    returns: new position in input buffer
10217    side-effects: pl_yylval and lex_op are set.
10218
10219    This code handles:
10220
10221    <>           read from ARGV
10222    <FH>         read from filehandle
10223    <pkg::FH>    read from package qualified filehandle
10224    <pkg'FH>     read from package qualified filehandle
10225    <$fh>        read from filehandle in $fh
10226    <*.h>        filename glob
10227
10228 */
10229
10230 STATIC char *
10231 S_scan_inputsymbol(pTHX_ char *start)
10232 {
10233     dVAR;
10234     char *s = start;            /* current position in buffer */
10235     char *end;
10236     I32 len;
10237     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10238     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10239
10240     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10241
10242     end = strchr(s, '\n');
10243     if (!end)
10244         end = PL_bufend;
10245     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
10246
10247     /* die if we didn't have space for the contents of the <>,
10248        or if it didn't end, or if we see a newline
10249     */
10250
10251     if (len >= (I32)sizeof PL_tokenbuf)
10252         Perl_croak(aTHX_ "Excessively long <> operator");
10253     if (s >= end)
10254         Perl_croak(aTHX_ "Unterminated <> operator");
10255
10256     s++;
10257
10258     /* check for <$fh>
10259        Remember, only scalar variables are interpreted as filehandles by
10260        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10261        treated as a glob() call.
10262        This code makes use of the fact that except for the $ at the front,
10263        a scalar variable and a filehandle look the same.
10264     */
10265     if (*d == '$' && d[1]) d++;
10266
10267     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10268     while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10269         d += UTF ? UTF8SKIP(d) : 1;
10270
10271     /* If we've tried to read what we allow filehandles to look like, and
10272        there's still text left, then it must be a glob() and not a getline.
10273        Use scan_str to pull out the stuff between the <> and treat it
10274        as nothing more than a string.
10275     */
10276
10277     if (d - PL_tokenbuf != len) {
10278         pl_yylval.ival = OP_GLOB;
10279         s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
10280         if (!s)
10281            Perl_croak(aTHX_ "Glob not terminated");
10282         return s;
10283     }
10284     else {
10285         bool readline_overriden = FALSE;
10286         GV *gv_readline;
10287         GV **gvp;
10288         /* we're in a filehandle read situation */
10289         d = PL_tokenbuf;
10290
10291         /* turn <> into <ARGV> */
10292         if (!len)
10293             Copy("ARGV",d,5,char);
10294
10295         /* Check whether readline() is overriden */
10296         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10297         if ((gv_readline
10298                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10299                 ||
10300                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10301                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
10302                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10303             readline_overriden = TRUE;
10304
10305         /* if <$fh>, create the ops to turn the variable into a
10306            filehandle
10307         */
10308         if (*d == '$') {
10309             /* try to find it in the pad for this block, otherwise find
10310                add symbol table ops
10311             */
10312             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10313             if (tmp != NOT_IN_PAD) {
10314                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10315                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10316                     HEK * const stashname = HvNAME_HEK(stash);
10317                     SV * const sym = sv_2mortal(newSVhek(stashname));
10318                     sv_catpvs(sym, "::");
10319                     sv_catpv(sym, d+1);
10320                     d = SvPVX(sym);
10321                     goto intro_sym;
10322                 }
10323                 else {
10324                     OP * const o = newOP(OP_PADSV, 0);
10325                     o->op_targ = tmp;
10326                     PL_lex_op = readline_overriden
10327                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10328                                 op_append_elem(OP_LIST, o,
10329                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10330                         : (OP*)newUNOP(OP_READLINE, 0, o);
10331                 }
10332             }
10333             else {
10334                 GV *gv;
10335                 ++d;
10336 intro_sym:
10337                 gv = gv_fetchpv(d,
10338                                 (PL_in_eval
10339                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
10340                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10341                                 SVt_PV);
10342                 PL_lex_op = readline_overriden
10343                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10344                             op_append_elem(OP_LIST,
10345                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10346                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10347                     : (OP*)newUNOP(OP_READLINE, 0,
10348                             newUNOP(OP_RV2SV, 0,
10349                                 newGVOP(OP_GV, 0, gv)));
10350             }
10351             if (!readline_overriden)
10352                 PL_lex_op->op_flags |= OPf_SPECIAL;
10353             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10354             pl_yylval.ival = OP_NULL;
10355         }
10356
10357         /* If it's none of the above, it must be a literal filehandle
10358            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10359         else {
10360             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10361             PL_lex_op = readline_overriden
10362                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10363                         op_append_elem(OP_LIST,
10364                             newGVOP(OP_GV, 0, gv),
10365                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10366                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10367             pl_yylval.ival = OP_NULL;
10368         }
10369     }
10370
10371     return s;
10372 }
10373
10374
10375 /* scan_str
10376    takes: start position in buffer
10377           keep_quoted preserve \ on the embedded delimiter(s)
10378           keep_delims preserve the delimiters around the string
10379           re_reparse  compiling a run-time /(?{})/:
10380                         collapse // to /,  and skip encoding src
10381    returns: position to continue reading from buffer
10382    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10383         updates the read buffer.
10384
10385    This subroutine pulls a string out of the input.  It is called for:
10386         q               single quotes           q(literal text)
10387         '               single quotes           'literal text'
10388         qq              double quotes           qq(interpolate $here please)
10389         "               double quotes           "interpolate $here please"
10390         qx              backticks               qx(/bin/ls -l)
10391         `               backticks               `/bin/ls -l`
10392         qw              quote words             @EXPORT_OK = qw( func() $spam )
10393         m//             regexp match            m/this/
10394         s///            regexp substitute       s/this/that/
10395         tr///           string transliterate    tr/this/that/
10396         y///            string transliterate    y/this/that/
10397         ($*@)           sub prototypes          sub foo ($)
10398         (stuff)         sub attr parameters     sub foo : attr(stuff)
10399         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10400
10401    In most of these cases (all but <>, patterns and transliterate)
10402    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10403    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10404    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10405    calls scan_str().
10406
10407    It skips whitespace before the string starts, and treats the first
10408    character as the delimiter.  If the delimiter is one of ([{< then
10409    the corresponding "close" character )]}> is used as the closing
10410    delimiter.  It allows quoting of delimiters, and if the string has
10411    balanced delimiters ([{<>}]) it allows nesting.
10412
10413    On success, the SV with the resulting string is put into lex_stuff or,
10414    if that is already non-NULL, into lex_repl. The second case occurs only
10415    when parsing the RHS of the special constructs s/// and tr/// (y///).
10416    For convenience, the terminating delimiter character is stuffed into
10417    SvIVX of the SV.
10418 */
10419
10420 STATIC char *
10421 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10422         bool deprecate_escaped_meta /* Should we issue a deprecation warning
10423                                        for certain paired metacharacters that
10424                                        appear escaped within it */
10425     )
10426 {
10427     dVAR;
10428     SV *sv;                     /* scalar value: string */
10429     const char *tmps;           /* temp string, used for delimiter matching */
10430     char *s = start;            /* current position in the buffer */
10431     char term;                  /* terminating character */
10432     char *to;                   /* current position in the sv's data */
10433     I32 brackets = 1;           /* bracket nesting level */
10434     bool has_utf8 = FALSE;      /* is there any utf8 content? */
10435     I32 termcode;               /* terminating char. code */
10436     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
10437     STRLEN termlen;             /* length of terminating string */
10438     int last_off = 0;           /* last position for nesting bracket */
10439     char *escaped_open = NULL;
10440 #ifdef PERL_MAD
10441     int stuffstart;
10442     char *tstart;
10443 #endif
10444
10445     PERL_ARGS_ASSERT_SCAN_STR;
10446
10447     /* skip space before the delimiter */
10448     if (isSPACE(*s)) {
10449         s = PEEKSPACE(s);
10450     }
10451
10452 #ifdef PERL_MAD
10453     if (PL_realtokenstart >= 0) {
10454         stuffstart = PL_realtokenstart;
10455         PL_realtokenstart = -1;
10456     }
10457     else
10458         stuffstart = start - SvPVX(PL_linestr);
10459 #endif
10460     /* mark where we are, in case we need to report errors */
10461     CLINE;
10462
10463     /* after skipping whitespace, the next character is the terminator */
10464     term = *s;
10465     if (!UTF) {
10466         termcode = termstr[0] = term;
10467         termlen = 1;
10468     }
10469     else {
10470         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10471         Copy(s, termstr, termlen, U8);
10472         if (!UTF8_IS_INVARIANT(term))
10473             has_utf8 = TRUE;
10474     }
10475
10476     /* mark where we are */
10477     PL_multi_start = CopLINE(PL_curcop);
10478     PL_multi_open = term;
10479
10480     /* find corresponding closing delimiter */
10481     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10482         termcode = termstr[0] = term = tmps[5];
10483
10484     PL_multi_close = term;
10485
10486     /* A warning is raised if the input parameter requires it for escaped (by a
10487      * backslash) paired metacharacters {} [] and () when the delimiters are
10488      * those same characters, and the backslash is ineffective.  This doesn't
10489      * happen for <>, as they aren't metas. */
10490     if (deprecate_escaped_meta
10491         && (PL_multi_open == PL_multi_close
10492             || ! ckWARN_d(WARN_DEPRECATED)
10493             || PL_multi_open == '<'))
10494     {
10495         deprecate_escaped_meta = FALSE;
10496     }
10497
10498     /* create a new SV to hold the contents.  79 is the SV's initial length.
10499        What a random number. */
10500     sv = newSV_type(SVt_PVIV);
10501     SvGROW(sv, 80);
10502     SvIV_set(sv, termcode);
10503     (void)SvPOK_only(sv);               /* validate pointer */
10504
10505     /* move past delimiter and try to read a complete string */
10506     if (keep_delims)
10507         sv_catpvn(sv, s, termlen);
10508     s += termlen;
10509 #ifdef PERL_MAD
10510     tstart = SvPVX(PL_linestr) + stuffstart;
10511     if (PL_madskills && !PL_thisopen && !keep_delims) {
10512         PL_thisopen = newSVpvn(tstart, s - tstart);
10513         stuffstart = s - SvPVX(PL_linestr);
10514     }
10515 #endif
10516     for (;;) {
10517         if (PL_encoding && !UTF && !re_reparse) {
10518             bool cont = TRUE;
10519
10520             while (cont) {
10521                 int offset = s - SvPVX_const(PL_linestr);
10522                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10523                                            &offset, (char*)termstr, termlen);
10524                 const char *ns;
10525                 char *svlast;
10526
10527                 if (SvIsCOW(PL_linestr)) {
10528                     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10529                     STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10530                     STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10531                     char *buf = SvPVX(PL_linestr);
10532                     bufend_pos = PL_parser->bufend - buf;
10533                     bufptr_pos = PL_parser->bufptr - buf;
10534                     oldbufptr_pos = PL_parser->oldbufptr - buf;
10535                     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10536                     linestart_pos = PL_parser->linestart - buf;
10537                     last_uni_pos = PL_parser->last_uni
10538                         ? PL_parser->last_uni - buf
10539                         : 0;
10540                     last_lop_pos = PL_parser->last_lop
10541                         ? PL_parser->last_lop - buf
10542                         : 0;
10543                     re_eval_start_pos =
10544                         PL_parser->lex_shared->re_eval_start ?
10545                             PL_parser->lex_shared->re_eval_start - buf : 0;
10546                     s_pos = s - buf;
10547
10548                     sv_force_normal(PL_linestr);
10549
10550                     buf = SvPVX(PL_linestr);
10551                     PL_parser->bufend = buf + bufend_pos;
10552                     PL_parser->bufptr = buf + bufptr_pos;
10553                     PL_parser->oldbufptr = buf + oldbufptr_pos;
10554                     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10555                     PL_parser->linestart = buf + linestart_pos;
10556                     if (PL_parser->last_uni)
10557                         PL_parser->last_uni = buf + last_uni_pos;
10558                     if (PL_parser->last_lop)
10559                         PL_parser->last_lop = buf + last_lop_pos;
10560                     if (PL_parser->lex_shared->re_eval_start)
10561                         PL_parser->lex_shared->re_eval_start  =
10562                             buf + re_eval_start_pos;
10563                     s = buf + s_pos;
10564                 }
10565                 ns = SvPVX_const(PL_linestr) + offset;
10566                 svlast = SvEND(sv) - 1;
10567
10568                 for (; s < ns; s++) {
10569                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10570                         COPLINE_INC_WITH_HERELINES;
10571                 }
10572                 if (!found)
10573                     goto read_more_line;
10574                 else {
10575                     /* handle quoted delimiters */
10576                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10577                         const char *t;
10578                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10579                             t--;
10580                         if ((svlast-1 - t) % 2) {
10581                             if (!keep_quoted) {
10582                                 *(svlast-1) = term;
10583                                 *svlast = '\0';
10584                                 SvCUR_set(sv, SvCUR(sv) - 1);
10585                             }
10586                             continue;
10587                         }
10588                     }
10589                     if (PL_multi_open == PL_multi_close) {
10590                         cont = FALSE;
10591                     }
10592                     else {
10593                         const char *t;
10594                         char *w;
10595                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10596                             /* At here, all closes are "was quoted" one,
10597                                so we don't check PL_multi_close. */
10598                             if (*t == '\\') {
10599                                 if (!keep_quoted && *(t+1) == PL_multi_open)
10600                                     t++;
10601                                 else
10602                                     *w++ = *t++;
10603                             }
10604                             else if (*t == PL_multi_open)
10605                                 brackets++;
10606
10607                             *w = *t;
10608                         }
10609                         if (w < t) {
10610                             *w++ = term;
10611                             *w = '\0';
10612                             SvCUR_set(sv, w - SvPVX_const(sv));
10613                         }
10614                         last_off = w - SvPVX(sv);
10615                         if (--brackets <= 0)
10616                             cont = FALSE;
10617                     }
10618                 }
10619             }
10620             if (!keep_delims) {
10621                 SvCUR_set(sv, SvCUR(sv) - 1);
10622                 *SvEND(sv) = '\0';
10623             }
10624             break;
10625         }
10626
10627         /* extend sv if need be */
10628         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10629         /* set 'to' to the next character in the sv's string */
10630         to = SvPVX(sv)+SvCUR(sv);
10631
10632         /* if open delimiter is the close delimiter read unbridle */
10633         if (PL_multi_open == PL_multi_close) {
10634             for (; s < PL_bufend; s++,to++) {
10635                 /* embedded newlines increment the current line number */
10636                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10637                     COPLINE_INC_WITH_HERELINES;
10638                 /* handle quoted delimiters */
10639                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10640                     if (!keep_quoted
10641                         && (s[1] == term
10642                             || (re_reparse && s[1] == '\\'))
10643                     )
10644                         s++;
10645                     /* any other quotes are simply copied straight through */
10646                     else
10647                         *to++ = *s++;
10648                 }
10649                 /* terminate when run out of buffer (the for() condition), or
10650                    have found the terminator */
10651                 else if (*s == term) {
10652                     if (termlen == 1)
10653                         break;
10654                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10655                         break;
10656                 }
10657                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10658                     has_utf8 = TRUE;
10659                 *to = *s;
10660             }
10661         }
10662
10663         /* if the terminator isn't the same as the start character (e.g.,
10664            matched brackets), we have to allow more in the quoting, and
10665            be prepared for nested brackets.
10666         */
10667         else {
10668             /* read until we run out of string, or we find the terminator */
10669             for (; s < PL_bufend; s++,to++) {
10670                 /* embedded newlines increment the line count */
10671                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10672                     COPLINE_INC_WITH_HERELINES;
10673                 /* backslashes can escape the open or closing characters */
10674                 if (*s == '\\' && s+1 < PL_bufend) {
10675                     if (!keep_quoted &&
10676                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10677                     {
10678                         s++;
10679
10680                         /* Here, 'deprecate_escaped_meta' is true iff the
10681                          * delimiters are paired metacharacters, and 's' points
10682                          * to an occurrence of one of them within the string,
10683                          * which was preceded by a backslash.  If this is a
10684                          * context where the delimiter is also a metacharacter,
10685                          * the backslash is useless, and deprecated.  () and []
10686                          * are meta in any context. {} are meta only when
10687                          * appearing in a quantifier or in things like '\p{'
10688                          * (but '\\p{' isn't meta).  They also aren't meta
10689                          * unless there is a matching closed, escaped char
10690                          * later on within the string.  If 's' points to an
10691                          * open, set a flag; if to a close, test that flag, and
10692                          * raise a warning if it was set */
10693
10694                         if (deprecate_escaped_meta) {
10695                             if (*s == PL_multi_open) {
10696                                 if (*s != '{') {
10697                                     escaped_open = s;
10698                                 }
10699                                      /* Look for a closing '\}' */
10700                                 else if (regcurly(s, TRUE)) {
10701                                     escaped_open = s;
10702                                 }
10703                                      /* Look for e.g.  '\x{' */
10704                                 else if (s - start > 2
10705                                          && _generic_isCC(*(s-2),
10706                                              _CC_BACKSLASH_FOO_LBRACE_IS_META))
10707                                 { /* Exclude '\\x', '\\\\x', etc. */
10708                                     char *lookbehind = s - 4;
10709                                     bool is_meta = TRUE;
10710                                     while (lookbehind >= start
10711                                            && *lookbehind == '\\')
10712                                     {
10713                                         is_meta = ! is_meta;
10714                                         lookbehind--;
10715                                     }
10716                                     if (is_meta) {
10717                                         escaped_open = s;
10718                                     }
10719                                 }
10720                             }
10721                             else if (escaped_open) {
10722                                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10723                                     "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10724                                 escaped_open = NULL;
10725                             }
10726                         }
10727                     }
10728                     else
10729                         *to++ = *s++;
10730                 }
10731                 /* allow nested opens and closes */
10732                 else if (*s == PL_multi_close && --brackets <= 0)
10733                     break;
10734                 else if (*s == PL_multi_open)
10735                     brackets++;
10736                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10737                     has_utf8 = TRUE;
10738                 *to = *s;
10739             }
10740         }
10741         /* terminate the copied string and update the sv's end-of-string */
10742         *to = '\0';
10743         SvCUR_set(sv, to - SvPVX_const(sv));
10744
10745         /*
10746          * this next chunk reads more into the buffer if we're not done yet
10747          */
10748
10749         if (s < PL_bufend)
10750             break;              /* handle case where we are done yet :-) */
10751
10752 #ifndef PERL_STRICT_CR
10753         if (to - SvPVX_const(sv) >= 2) {
10754             if ((to[-2] == '\r' && to[-1] == '\n') ||
10755                 (to[-2] == '\n' && to[-1] == '\r'))
10756             {
10757                 to[-2] = '\n';
10758                 to--;
10759                 SvCUR_set(sv, to - SvPVX_const(sv));
10760             }
10761             else if (to[-1] == '\r')
10762                 to[-1] = '\n';
10763         }
10764         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10765             to[-1] = '\n';
10766 #endif
10767
10768      read_more_line:
10769         /* if we're out of file, or a read fails, bail and reset the current
10770            line marker so we can report where the unterminated string began
10771         */
10772 #ifdef PERL_MAD
10773         if (PL_madskills) {
10774             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10775             if (PL_thisstuff)
10776                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10777             else
10778                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10779         }
10780 #endif
10781         COPLINE_INC_WITH_HERELINES;
10782         PL_bufptr = PL_bufend;
10783         if (!lex_next_chunk(0)) {
10784             sv_free(sv);
10785             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10786             return NULL;
10787         }
10788         s = PL_bufptr;
10789 #ifdef PERL_MAD
10790         stuffstart = 0;
10791 #endif
10792     }
10793
10794     /* at this point, we have successfully read the delimited string */
10795
10796     if (!PL_encoding || UTF || re_reparse) {
10797 #ifdef PERL_MAD
10798         if (PL_madskills) {
10799             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10800             const int len = s - tstart;
10801             if (PL_thisstuff)
10802                 sv_catpvn(PL_thisstuff, tstart, len);
10803             else
10804                 PL_thisstuff = newSVpvn(tstart, len);
10805             if (!PL_thisclose && !keep_delims)
10806                 PL_thisclose = newSVpvn(s,termlen);
10807         }
10808 #endif
10809
10810         if (keep_delims)
10811             sv_catpvn(sv, s, termlen);
10812         s += termlen;
10813     }
10814 #ifdef PERL_MAD
10815     else {
10816         if (PL_madskills) {
10817             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10818             const int len = s - tstart - termlen;
10819             if (PL_thisstuff)
10820                 sv_catpvn(PL_thisstuff, tstart, len);
10821             else
10822                 PL_thisstuff = newSVpvn(tstart, len);
10823             if (!PL_thisclose && !keep_delims)
10824                 PL_thisclose = newSVpvn(s - termlen,termlen);
10825         }
10826     }
10827 #endif
10828     if (has_utf8 || (PL_encoding && !re_reparse))
10829         SvUTF8_on(sv);
10830
10831     PL_multi_end = CopLINE(PL_curcop);
10832
10833     /* if we allocated too much space, give some back */
10834     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10835         SvLEN_set(sv, SvCUR(sv) + 1);
10836         SvPV_renew(sv, SvLEN(sv));
10837     }
10838
10839     /* decide whether this is the first or second quoted string we've read
10840        for this op
10841     */
10842
10843     if (PL_lex_stuff)
10844         PL_sublex_info.repl = sv;
10845     else
10846         PL_lex_stuff = sv;
10847     return s;
10848 }
10849
10850 /*
10851   scan_num
10852   takes: pointer to position in buffer
10853   returns: pointer to new position in buffer
10854   side-effects: builds ops for the constant in pl_yylval.op
10855
10856   Read a number in any of the formats that Perl accepts:
10857
10858   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10859   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10860   0b[01](_?[01])*
10861   0[0-7](_?[0-7])*
10862   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10863
10864   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10865   thing it reads.
10866
10867   If it reads a number without a decimal point or an exponent, it will
10868   try converting the number to an integer and see if it can do so
10869   without loss of precision.
10870 */
10871
10872 char *
10873 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10874 {
10875     dVAR;
10876     const char *s = start;      /* current position in buffer */
10877     char *d;                    /* destination in temp buffer */
10878     char *e;                    /* end of temp buffer */
10879     NV nv;                              /* number read, as a double */
10880     SV *sv = NULL;                      /* place to put the converted number */
10881     bool floatit;                       /* boolean: int or float? */
10882     const char *lastub = NULL;          /* position of last underbar */
10883     static const char* const number_too_long = "Number too long";
10884
10885     PERL_ARGS_ASSERT_SCAN_NUM;
10886
10887     /* We use the first character to decide what type of number this is */
10888
10889     switch (*s) {
10890     default:
10891         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10892
10893     /* if it starts with a 0, it could be an octal number, a decimal in
10894        0.13 disguise, or a hexadecimal number, or a binary number. */
10895     case '0':
10896         {
10897           /* variables:
10898              u          holds the "number so far"
10899              shift      the power of 2 of the base
10900                         (hex == 4, octal == 3, binary == 1)
10901              overflowed was the number more than we can hold?
10902
10903              Shift is used when we add a digit.  It also serves as an "are
10904              we in octal/hex/binary?" indicator to disallow hex characters
10905              when in octal mode.
10906            */
10907             NV n = 0.0;
10908             UV u = 0;
10909             I32 shift;
10910             bool overflowed = FALSE;
10911             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10912             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10913             static const char* const bases[5] =
10914               { "", "binary", "", "octal", "hexadecimal" };
10915             static const char* const Bases[5] =
10916               { "", "Binary", "", "Octal", "Hexadecimal" };
10917             static const char* const maxima[5] =
10918               { "",
10919                 "0b11111111111111111111111111111111",
10920                 "",
10921                 "037777777777",
10922                 "0xffffffff" };
10923             const char *base, *Base, *max;
10924
10925             /* check for hex */
10926             if (s[1] == 'x' || s[1] == 'X') {
10927                 shift = 4;
10928                 s += 2;
10929                 just_zero = FALSE;
10930             } else if (s[1] == 'b' || s[1] == 'B') {
10931                 shift = 1;
10932                 s += 2;
10933                 just_zero = FALSE;
10934             }
10935             /* check for a decimal in disguise */
10936             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10937                 goto decimal;
10938             /* so it must be octal */
10939             else {
10940                 shift = 3;
10941                 s++;
10942             }
10943
10944             if (*s == '_') {
10945                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10946                                "Misplaced _ in number");
10947                lastub = s++;
10948             }
10949
10950             base = bases[shift];
10951             Base = Bases[shift];
10952             max  = maxima[shift];
10953
10954             /* read the rest of the number */
10955             for (;;) {
10956                 /* x is used in the overflow test,
10957                    b is the digit we're adding on. */
10958                 UV x, b;
10959
10960                 switch (*s) {
10961
10962                 /* if we don't mention it, we're done */
10963                 default:
10964                     goto out;
10965
10966                 /* _ are ignored -- but warned about if consecutive */
10967                 case '_':
10968                     if (lastub && s == lastub + 1)
10969                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10970                                        "Misplaced _ in number");
10971                     lastub = s++;
10972                     break;
10973
10974                 /* 8 and 9 are not octal */
10975                 case '8': case '9':
10976                     if (shift == 3)
10977                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10978                     /* FALL THROUGH */
10979
10980                 /* octal digits */
10981                 case '2': case '3': case '4':
10982                 case '5': case '6': case '7':
10983                     if (shift == 1)
10984                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10985                     /* FALL THROUGH */
10986
10987                 case '0': case '1':
10988                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10989                     goto digit;
10990
10991                 /* hex digits */
10992                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10993                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10994                     /* make sure they said 0x */
10995                     if (shift != 4)
10996                         goto out;
10997                     b = (*s++ & 7) + 9;
10998
10999                     /* Prepare to put the digit we have onto the end
11000                        of the number so far.  We check for overflows.
11001                     */
11002
11003                   digit:
11004                     just_zero = FALSE;
11005                     if (!overflowed) {
11006                         x = u << shift; /* make room for the digit */
11007
11008                         if ((x >> shift) != u
11009                             && !(PL_hints & HINT_NEW_BINARY)) {
11010                             overflowed = TRUE;
11011                             n = (NV) u;
11012                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11013                                              "Integer overflow in %s number",
11014                                              base);
11015                         } else
11016                             u = x | b;          /* add the digit to the end */
11017                     }
11018                     if (overflowed) {
11019                         n *= nvshift[shift];
11020                         /* If an NV has not enough bits in its
11021                          * mantissa to represent an UV this summing of
11022                          * small low-order numbers is a waste of time
11023                          * (because the NV cannot preserve the
11024                          * low-order bits anyway): we could just
11025                          * remember when did we overflow and in the
11026                          * end just multiply n by the right
11027                          * amount. */
11028                         n += (NV) b;
11029                     }
11030                     break;
11031                 }
11032             }
11033
11034           /* if we get here, we had success: make a scalar value from
11035              the number.
11036           */
11037           out:
11038
11039             /* final misplaced underbar check */
11040             if (s[-1] == '_') {
11041                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11042             }
11043
11044             if (overflowed) {
11045                 if (n > 4294967295.0)
11046                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11047                                    "%s number > %s non-portable",
11048                                    Base, max);
11049                 sv = newSVnv(n);
11050             }
11051             else {
11052 #if UVSIZE > 4
11053                 if (u > 0xffffffff)
11054                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11055                                    "%s number > %s non-portable",
11056                                    Base, max);
11057 #endif
11058                 sv = newSVuv(u);
11059             }
11060             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11061                 sv = new_constant(start, s - start, "integer",
11062                                   sv, NULL, NULL, 0);
11063             else if (PL_hints & HINT_NEW_BINARY)
11064                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11065         }
11066         break;
11067
11068     /*
11069       handle decimal numbers.
11070       we're also sent here when we read a 0 as the first digit
11071     */
11072     case '1': case '2': case '3': case '4': case '5':
11073     case '6': case '7': case '8': case '9': case '.':
11074       decimal:
11075         d = PL_tokenbuf;
11076         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11077         floatit = FALSE;
11078
11079         /* read next group of digits and _ and copy into d */
11080         while (isDIGIT(*s) || *s == '_') {
11081             /* skip underscores, checking for misplaced ones
11082                if -w is on
11083             */
11084             if (*s == '_') {
11085                 if (lastub && s == lastub + 1)
11086                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11087                                    "Misplaced _ in number");
11088                 lastub = s++;
11089             }
11090             else {
11091                 /* check for end of fixed-length buffer */
11092                 if (d >= e)
11093                     Perl_croak(aTHX_ "%s", number_too_long);
11094                 /* if we're ok, copy the character */
11095                 *d++ = *s++;
11096             }
11097         }
11098
11099         /* final misplaced underbar check */
11100         if (lastub && s == lastub + 1) {
11101             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11102         }
11103
11104         /* read a decimal portion if there is one.  avoid
11105            3..5 being interpreted as the number 3. followed
11106            by .5
11107         */
11108         if (*s == '.' && s[1] != '.') {
11109             floatit = TRUE;
11110             *d++ = *s++;
11111
11112             if (*s == '_') {
11113                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11114                                "Misplaced _ in number");
11115                 lastub = s;
11116             }
11117
11118             /* copy, ignoring underbars, until we run out of digits.
11119             */
11120             for (; isDIGIT(*s) || *s == '_'; s++) {
11121                 /* fixed length buffer check */
11122                 if (d >= e)
11123                     Perl_croak(aTHX_ "%s", number_too_long);
11124                 if (*s == '_') {
11125                    if (lastub && s == lastub + 1)
11126                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11127                                       "Misplaced _ in number");
11128                    lastub = s;
11129                 }
11130                 else
11131                     *d++ = *s;
11132             }
11133             /* fractional part ending in underbar? */
11134             if (s[-1] == '_') {
11135                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11136                                "Misplaced _ in number");
11137             }
11138             if (*s == '.' && isDIGIT(s[1])) {
11139                 /* oops, it's really a v-string, but without the "v" */
11140                 s = start;
11141                 goto vstring;
11142             }
11143         }
11144
11145         /* read exponent part, if present */
11146         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11147             floatit = TRUE;
11148             s++;
11149
11150             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11151             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
11152
11153             /* stray preinitial _ */
11154             if (*s == '_') {
11155                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11156                                "Misplaced _ in number");
11157                 lastub = s++;
11158             }
11159
11160             /* allow positive or negative exponent */
11161             if (*s == '+' || *s == '-')
11162                 *d++ = *s++;
11163
11164             /* stray initial _ */
11165             if (*s == '_') {
11166                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11167                                "Misplaced _ in number");
11168                 lastub = s++;
11169             }
11170
11171             /* read digits of exponent */
11172             while (isDIGIT(*s) || *s == '_') {
11173                 if (isDIGIT(*s)) {
11174                     if (d >= e)
11175                         Perl_croak(aTHX_ "%s", number_too_long);
11176                     *d++ = *s++;
11177                 }
11178                 else {
11179                    if (((lastub && s == lastub + 1) ||
11180                         (!isDIGIT(s[1]) && s[1] != '_')))
11181                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11182                                       "Misplaced _ in number");
11183                    lastub = s++;
11184                 }
11185             }
11186         }
11187
11188
11189         /*
11190            We try to do an integer conversion first if no characters
11191            indicating "float" have been found.
11192          */
11193
11194         if (!floatit) {
11195             UV uv;
11196             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11197
11198             if (flags == IS_NUMBER_IN_UV) {
11199               if (uv <= IV_MAX)
11200                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11201               else
11202                 sv = newSVuv(uv);
11203             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11204               if (uv <= (UV) IV_MIN)
11205                 sv = newSViv(-(IV)uv);
11206               else
11207                 floatit = TRUE;
11208             } else
11209               floatit = TRUE;
11210         }
11211         if (floatit) {
11212             /* terminate the string */
11213             *d = '\0';
11214             nv = Atof(PL_tokenbuf);
11215             sv = newSVnv(nv);
11216         }
11217
11218         if ( floatit
11219              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11220             const char *const key = floatit ? "float" : "integer";
11221             const STRLEN keylen = floatit ? 5 : 7;
11222             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11223                                 key, keylen, sv, NULL, NULL, 0);
11224         }
11225         break;
11226
11227     /* if it starts with a v, it could be a v-string */
11228     case 'v':
11229 vstring:
11230                 sv = newSV(5); /* preallocate storage space */
11231                 ENTER_with_name("scan_vstring");
11232                 SAVEFREESV(sv);
11233                 s = scan_vstring(s, PL_bufend, sv);
11234                 SvREFCNT_inc_simple_void_NN(sv);
11235                 LEAVE_with_name("scan_vstring");
11236         break;
11237     }
11238
11239     /* make the op for the constant and return */
11240
11241     if (sv)
11242         lvalp->opval = newSVOP(OP_CONST, 0, sv);
11243     else
11244         lvalp->opval = NULL;
11245
11246     return (char *)s;
11247 }
11248
11249 STATIC char *
11250 S_scan_formline(pTHX_ char *s)
11251 {
11252     dVAR;
11253     char *eol;
11254     char *t;
11255     SV * const stuff = newSVpvs("");
11256     bool needargs = FALSE;
11257     bool eofmt = FALSE;
11258 #ifdef PERL_MAD
11259     char *tokenstart = s;
11260     SV* savewhite = NULL;
11261
11262     if (PL_madskills) {
11263         savewhite = PL_thiswhite;
11264         PL_thiswhite = 0;
11265     }
11266 #endif
11267
11268     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11269
11270     while (!needargs) {
11271         if (*s == '.') {
11272             t = s+1;
11273 #ifdef PERL_STRICT_CR
11274             while (SPACE_OR_TAB(*t))
11275                 t++;
11276 #else
11277             while (SPACE_OR_TAB(*t) || *t == '\r')
11278                 t++;
11279 #endif
11280             if (*t == '\n' || t == PL_bufend) {
11281                 eofmt = TRUE;
11282                 break;
11283             }
11284         }
11285         eol = (char *) memchr(s,'\n',PL_bufend-s);
11286         if (!eol++)
11287                 eol = PL_bufend;
11288         if (*s != '#') {
11289             for (t = s; t < eol; t++) {
11290                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11291                     needargs = FALSE;
11292                     goto enough;        /* ~~ must be first line in formline */
11293                 }
11294                 if (*t == '@' || *t == '^')
11295                     needargs = TRUE;
11296             }
11297             if (eol > s) {
11298                 sv_catpvn(stuff, s, eol-s);
11299 #ifndef PERL_STRICT_CR
11300                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11301                     char *end = SvPVX(stuff) + SvCUR(stuff);
11302                     end[-2] = '\n';
11303                     end[-1] = '\0';
11304                     SvCUR_set(stuff, SvCUR(stuff) - 1);
11305                 }
11306 #endif
11307             }
11308             else
11309               break;
11310         }
11311         s = (char*)eol;
11312         if ((PL_rsfp || PL_parser->filtered)
11313          && PL_parser->form_lex_state == LEX_NORMAL) {
11314             bool got_some;
11315 #ifdef PERL_MAD
11316             if (PL_madskills) {
11317                 if (PL_thistoken)
11318                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11319                 else
11320                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11321             }
11322 #endif
11323             PL_bufptr = PL_bufend;
11324             COPLINE_INC_WITH_HERELINES;
11325             got_some = lex_next_chunk(0);
11326             CopLINE_dec(PL_curcop);
11327             s = PL_bufptr;
11328 #ifdef PERL_MAD
11329             tokenstart = PL_bufptr;
11330 #endif
11331             if (!got_some)
11332                 break;
11333         }
11334         incline(s);
11335     }
11336   enough:
11337     if (!SvCUR(stuff) || needargs)
11338         PL_lex_state = PL_parser->form_lex_state;
11339     if (SvCUR(stuff)) {
11340         PL_expect = XSTATE;
11341         if (needargs) {
11342             start_force(PL_curforce);
11343             NEXTVAL_NEXTTOKE.ival = 0;
11344             force_next(FORMLBRACK);
11345         }
11346         if (!IN_BYTES) {
11347             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11348                 SvUTF8_on(stuff);
11349             else if (PL_encoding)
11350                 sv_recode_to_utf8(stuff, PL_encoding);
11351         }
11352         start_force(PL_curforce);
11353         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11354         force_next(THING);
11355     }
11356     else {
11357         SvREFCNT_dec(stuff);
11358         if (eofmt)
11359             PL_lex_formbrack = 0;
11360     }
11361 #ifdef PERL_MAD
11362     if (PL_madskills) {
11363         if (PL_thistoken)
11364             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11365         else
11366             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11367         PL_thiswhite = savewhite;
11368     }
11369 #endif
11370     return s;
11371 }
11372
11373 I32
11374 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11375 {
11376     dVAR;
11377     const I32 oldsavestack_ix = PL_savestack_ix;
11378     CV* const outsidecv = PL_compcv;
11379
11380     SAVEI32(PL_subline);
11381     save_item(PL_subname);
11382     SAVESPTR(PL_compcv);
11383
11384     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11385     CvFLAGS(PL_compcv) |= flags;
11386
11387     PL_subline = CopLINE(PL_curcop);
11388     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11389     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11390     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11391     if (outsidecv && CvPADLIST(outsidecv))
11392         CvPADLIST(PL_compcv)->xpadl_outid =
11393             PadlistNAMES(CvPADLIST(outsidecv));
11394
11395     return oldsavestack_ix;
11396 }
11397
11398 #ifdef __SC__
11399 #pragma segment Perl_yylex
11400 #endif
11401 static int
11402 S_yywarn(pTHX_ const char *const s, U32 flags)
11403 {
11404     dVAR;
11405
11406     PERL_ARGS_ASSERT_YYWARN;
11407
11408     PL_in_eval |= EVAL_WARNONLY;
11409     yyerror_pv(s, flags);
11410     PL_in_eval &= ~EVAL_WARNONLY;
11411     return 0;
11412 }
11413
11414 int
11415 Perl_yyerror(pTHX_ const char *const s)
11416 {
11417     PERL_ARGS_ASSERT_YYERROR;
11418     return yyerror_pvn(s, strlen(s), 0);
11419 }
11420
11421 int
11422 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11423 {
11424     PERL_ARGS_ASSERT_YYERROR_PV;
11425     return yyerror_pvn(s, strlen(s), flags);
11426 }
11427
11428 int
11429 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11430 {
11431     dVAR;
11432     const char *context = NULL;
11433     int contlen = -1;
11434     SV *msg;
11435     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11436     int yychar  = PL_parser->yychar;
11437
11438     PERL_ARGS_ASSERT_YYERROR_PVN;
11439
11440     if (!yychar || (yychar == ';' && !PL_rsfp))
11441         sv_catpvs(where_sv, "at EOF");
11442     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11443       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11444       PL_oldbufptr != PL_bufptr) {
11445         /*
11446                 Only for NetWare:
11447                 The code below is removed for NetWare because it abends/crashes on NetWare
11448                 when the script has error such as not having the closing quotes like:
11449                     if ($var eq "value)
11450                 Checking of white spaces is anyway done in NetWare code.
11451         */
11452 #ifndef NETWARE
11453         while (isSPACE(*PL_oldoldbufptr))
11454             PL_oldoldbufptr++;
11455 #endif
11456         context = PL_oldoldbufptr;
11457         contlen = PL_bufptr - PL_oldoldbufptr;
11458     }
11459     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11460       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11461         /*
11462                 Only for NetWare:
11463                 The code below is removed for NetWare because it abends/crashes on NetWare
11464                 when the script has error such as not having the closing quotes like:
11465                     if ($var eq "value)
11466                 Checking of white spaces is anyway done in NetWare code.
11467         */
11468 #ifndef NETWARE
11469         while (isSPACE(*PL_oldbufptr))
11470             PL_oldbufptr++;
11471 #endif
11472         context = PL_oldbufptr;
11473         contlen = PL_bufptr - PL_oldbufptr;
11474     }
11475     else if (yychar > 255)
11476         sv_catpvs(where_sv, "next token ???");
11477     else if (yychar == -2) { /* YYEMPTY */
11478         if (PL_lex_state == LEX_NORMAL ||
11479            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11480             sv_catpvs(where_sv, "at end of line");
11481         else if (PL_lex_inpat)
11482             sv_catpvs(where_sv, "within pattern");
11483         else
11484             sv_catpvs(where_sv, "within string");
11485     }
11486     else {
11487         sv_catpvs(where_sv, "next char ");
11488         if (yychar < 32)
11489             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11490         else if (isPRINT_LC(yychar)) {
11491             const char string = yychar;
11492             sv_catpvn(where_sv, &string, 1);
11493         }
11494         else
11495             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11496     }
11497     msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11498     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11499         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11500     if (context)
11501         Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11502                              UTF8fARG(UTF, contlen, context));
11503     else
11504         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11505     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11506         Perl_sv_catpvf(aTHX_ msg,
11507         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11508                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11509         PL_multi_end = 0;
11510     }
11511     if (PL_in_eval & EVAL_WARNONLY) {
11512         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11513     }
11514     else
11515         qerror(msg);
11516     if (PL_error_count >= 10) {
11517         SV * errsv;
11518         if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11519             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11520                        SVfARG(errsv), OutCopFILE(PL_curcop));
11521         else
11522             Perl_croak(aTHX_ "%s has too many errors.\n",
11523             OutCopFILE(PL_curcop));
11524     }
11525     PL_in_my = 0;
11526     PL_in_my_stash = NULL;
11527     return 0;
11528 }
11529 #ifdef __SC__
11530 #pragma segment Main
11531 #endif
11532
11533 STATIC char*
11534 S_swallow_bom(pTHX_ U8 *s)
11535 {
11536     dVAR;
11537     const STRLEN slen = SvCUR(PL_linestr);
11538
11539     PERL_ARGS_ASSERT_SWALLOW_BOM;
11540
11541     switch (s[0]) {
11542     case 0xFF:
11543         if (s[1] == 0xFE) {
11544             /* UTF-16 little-endian? (or UTF-32LE?) */
11545             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11546                 /* diag_listed_as: Unsupported script encoding %s */
11547                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11548 #ifndef PERL_NO_UTF16_FILTER
11549             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11550             s += 2;
11551             if (PL_bufend > (char*)s) {
11552                 s = add_utf16_textfilter(s, TRUE);
11553             }
11554 #else
11555             /* diag_listed_as: Unsupported script encoding %s */
11556             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11557 #endif
11558         }
11559         break;
11560     case 0xFE:
11561         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11562 #ifndef PERL_NO_UTF16_FILTER
11563             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11564             s += 2;
11565             if (PL_bufend > (char *)s) {
11566                 s = add_utf16_textfilter(s, FALSE);
11567             }
11568 #else
11569             /* diag_listed_as: Unsupported script encoding %s */
11570             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11571 #endif
11572         }
11573         break;
11574     case 0xEF:
11575         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11576             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11577             s += 3;                      /* UTF-8 */
11578         }
11579         break;
11580     case 0:
11581         if (slen > 3) {
11582              if (s[1] == 0) {
11583                   if (s[2] == 0xFE && s[3] == 0xFF) {
11584                        /* UTF-32 big-endian */
11585                        /* diag_listed_as: Unsupported script encoding %s */
11586                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11587                   }
11588              }
11589              else if (s[2] == 0 && s[3] != 0) {
11590                   /* Leading bytes
11591                    * 00 xx 00 xx
11592                    * are a good indicator of UTF-16BE. */
11593 #ifndef PERL_NO_UTF16_FILTER
11594                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11595                   s = add_utf16_textfilter(s, FALSE);
11596 #else
11597                   /* diag_listed_as: Unsupported script encoding %s */
11598                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11599 #endif
11600              }
11601         }
11602 #ifdef EBCDIC
11603     case 0xDD:
11604         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11605             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11606             s += 4;                      /* UTF-8 */
11607         }
11608         break;
11609 #endif
11610
11611     default:
11612          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11613                   /* Leading bytes
11614                    * xx 00 xx 00
11615                    * are a good indicator of UTF-16LE. */
11616 #ifndef PERL_NO_UTF16_FILTER
11617               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11618               s = add_utf16_textfilter(s, TRUE);
11619 #else
11620               /* diag_listed_as: Unsupported script encoding %s */
11621               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11622 #endif
11623          }
11624     }
11625     return (char*)s;
11626 }
11627
11628
11629 #ifndef PERL_NO_UTF16_FILTER
11630 static I32
11631 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11632 {
11633     dVAR;
11634     SV *const filter = FILTER_DATA(idx);
11635     /* We re-use this each time round, throwing the contents away before we
11636        return.  */
11637     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11638     SV *const utf8_buffer = filter;
11639     IV status = IoPAGE(filter);
11640     const bool reverse = cBOOL(IoLINES(filter));
11641     I32 retval;
11642
11643     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11644
11645     /* As we're automatically added, at the lowest level, and hence only called
11646        from this file, we can be sure that we're not called in block mode. Hence
11647        don't bother writing code to deal with block mode.  */
11648     if (maxlen) {
11649         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11650     }
11651     if (status < 0) {
11652         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11653     }
11654     DEBUG_P(PerlIO_printf(Perl_debug_log,
11655                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11656                           FPTR2DPTR(void *, S_utf16_textfilter),
11657                           reverse ? 'l' : 'b', idx, maxlen, status,
11658                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11659
11660     while (1) {
11661         STRLEN chars;
11662         STRLEN have;
11663         I32 newlen;
11664         U8 *end;
11665         /* First, look in our buffer of existing UTF-8 data:  */
11666         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11667
11668         if (nl) {
11669             ++nl;
11670         } else if (status == 0) {
11671             /* EOF */
11672             IoPAGE(filter) = 0;
11673             nl = SvEND(utf8_buffer);
11674         }
11675         if (nl) {
11676             STRLEN got = nl - SvPVX(utf8_buffer);
11677             /* Did we have anything to append?  */
11678             retval = got != 0;
11679             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11680             /* Everything else in this code works just fine if SVp_POK isn't
11681                set.  This, however, needs it, and we need it to work, else
11682                we loop infinitely because the buffer is never consumed.  */
11683             sv_chop(utf8_buffer, nl);
11684             break;
11685         }
11686
11687         /* OK, not a complete line there, so need to read some more UTF-16.
11688            Read an extra octect if the buffer currently has an odd number. */
11689         while (1) {
11690             if (status <= 0)
11691                 break;
11692             if (SvCUR(utf16_buffer) >= 2) {
11693                 /* Location of the high octet of the last complete code point.
11694                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11695                    *coupled* with all the benefits of partial reads and
11696                    endianness.  */
11697                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11698                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11699
11700                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11701                     break;
11702                 }
11703
11704                 /* We have the first half of a surrogate. Read more.  */
11705                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11706             }
11707
11708             status = FILTER_READ(idx + 1, utf16_buffer,
11709                                  160 + (SvCUR(utf16_buffer) & 1));
11710             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11711             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11712             if (status < 0) {
11713                 /* Error */
11714                 IoPAGE(filter) = status;
11715                 return status;
11716             }
11717         }
11718
11719         chars = SvCUR(utf16_buffer) >> 1;
11720         have = SvCUR(utf8_buffer);
11721         SvGROW(utf8_buffer, have + chars * 3 + 1);
11722
11723         if (reverse) {
11724             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11725                                          (U8*)SvPVX_const(utf8_buffer) + have,
11726                                          chars * 2, &newlen);
11727         } else {
11728             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11729                                 (U8*)SvPVX_const(utf8_buffer) + have,
11730                                 chars * 2, &newlen);
11731         }
11732         SvCUR_set(utf8_buffer, have + newlen);
11733         *end = '\0';
11734
11735         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11736            it's private to us, and utf16_to_utf8{,reversed} take a
11737            (pointer,length) pair, rather than a NUL-terminated string.  */
11738         if(SvCUR(utf16_buffer) & 1) {
11739             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11740             SvCUR_set(utf16_buffer, 1);
11741         } else {
11742             SvCUR_set(utf16_buffer, 0);
11743         }
11744     }
11745     DEBUG_P(PerlIO_printf(Perl_debug_log,
11746                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11747                           status,
11748                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11749     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11750     return retval;
11751 }
11752
11753 static U8 *
11754 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11755 {
11756     SV *filter = filter_add(S_utf16_textfilter, NULL);
11757
11758     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11759
11760     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11761     sv_setpvs(filter, "");
11762     IoLINES(filter) = reversed;
11763     IoPAGE(filter) = 1; /* Not EOF */
11764
11765     /* Sadly, we have to return a valid pointer, come what may, so we have to
11766        ignore any error return from this.  */
11767     SvCUR_set(PL_linestr, 0);
11768     if (FILTER_READ(0, PL_linestr, 0)) {
11769         SvUTF8_on(PL_linestr);
11770     } else {
11771         SvUTF8_on(PL_linestr);
11772     }
11773     PL_bufend = SvEND(PL_linestr);
11774     return (U8*)SvPVX(PL_linestr);
11775 }
11776 #endif
11777
11778 /*
11779 Returns a pointer to the next character after the parsed
11780 vstring, as well as updating the passed in sv.
11781
11782 Function must be called like
11783
11784         sv = sv_2mortal(newSV(5));
11785         s = scan_vstring(s,e,sv);
11786
11787 where s and e are the start and end of the string.
11788 The sv should already be large enough to store the vstring
11789 passed in, for performance reasons.
11790
11791 This function may croak if fatal warnings are enabled in the
11792 calling scope, hence the sv_2mortal in the example (to prevent
11793 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11794 sv_2mortal.
11795
11796 */
11797
11798 char *
11799 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11800 {
11801     dVAR;
11802     const char *pos = s;
11803     const char *start = s;
11804
11805     PERL_ARGS_ASSERT_SCAN_VSTRING;
11806
11807     if (*pos == 'v') pos++;  /* get past 'v' */
11808     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11809         pos++;
11810     if ( *pos != '.') {
11811         /* this may not be a v-string if followed by => */
11812         const char *next = pos;
11813         while (next < e && isSPACE(*next))
11814             ++next;
11815         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11816             /* return string not v-string */
11817             sv_setpvn(sv,(char *)s,pos-s);
11818             return (char *)pos;
11819         }
11820     }
11821
11822     if (!isALPHA(*pos)) {
11823         U8 tmpbuf[UTF8_MAXBYTES+1];
11824
11825         if (*s == 'v')
11826             s++;  /* get past 'v' */
11827
11828         sv_setpvs(sv, "");
11829
11830         for (;;) {
11831             /* this is atoi() that tolerates underscores */
11832             U8 *tmpend;
11833             UV rev = 0;
11834             const char *end = pos;
11835             UV mult = 1;
11836             while (--end >= s) {
11837                 if (*end != '_') {
11838                     const UV orev = rev;
11839                     rev += (*end - '0') * mult;
11840                     mult *= 10;
11841                     if (orev > rev)
11842                         /* diag_listed_as: Integer overflow in %s number */
11843                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11844                                          "Integer overflow in decimal number");
11845                 }
11846             }
11847 #ifdef EBCDIC
11848             if (rev > 0x7FFFFFFF)
11849                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11850 #endif
11851             /* Append native character for the rev point */
11852             tmpend = uvchr_to_utf8(tmpbuf, rev);
11853             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11854             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11855                  SvUTF8_on(sv);
11856             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11857                  s = ++pos;
11858             else {
11859                  s = pos;
11860                  break;
11861             }
11862             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11863                  pos++;
11864         }
11865         SvPOK_on(sv);
11866         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11867         SvRMAGICAL_on(sv);
11868     }
11869     return (char *)s;
11870 }
11871
11872 int
11873 Perl_keyword_plugin_standard(pTHX_
11874         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11875 {
11876     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11877     PERL_UNUSED_CONTEXT;
11878     PERL_UNUSED_ARG(keyword_ptr);
11879     PERL_UNUSED_ARG(keyword_len);
11880     PERL_UNUSED_ARG(op_ptr);
11881     return KEYWORD_PLUGIN_DECLINE;
11882 }
11883
11884 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11885 static void
11886 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11887 {
11888     SAVEI32(PL_lex_brackets);
11889     if (PL_lex_brackets > 100)
11890         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11891     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11892     SAVEI32(PL_lex_allbrackets);
11893     PL_lex_allbrackets = 0;
11894     SAVEI8(PL_lex_fakeeof);
11895     PL_lex_fakeeof = (U8)fakeeof;
11896     if(yyparse(gramtype) && !PL_parser->error_count)
11897         qerror(Perl_mess(aTHX_ "Parse error"));
11898 }
11899
11900 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11901 static OP *
11902 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11903 {
11904     OP *o;
11905     ENTER;
11906     SAVEVPTR(PL_eval_root);
11907     PL_eval_root = NULL;
11908     parse_recdescent(gramtype, fakeeof);
11909     o = PL_eval_root;
11910     LEAVE;
11911     return o;
11912 }
11913
11914 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11915 static OP *
11916 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11917 {
11918     OP *exprop;
11919     if (flags & ~PARSE_OPTIONAL)
11920         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11921     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11922     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11923         if (!PL_parser->error_count)
11924             qerror(Perl_mess(aTHX_ "Parse error"));
11925         exprop = newOP(OP_NULL, 0);
11926     }
11927     return exprop;
11928 }
11929
11930 /*
11931 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11932
11933 Parse a Perl arithmetic expression.  This may contain operators of precedence
11934 down to the bit shift operators.  The expression must be followed (and thus
11935 terminated) either by a comparison or lower-precedence operator or by
11936 something that would normally terminate an expression such as semicolon.
11937 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11938 otherwise it is mandatory.  It is up to the caller to ensure that the
11939 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11940 the source of the code to be parsed and the lexical context for the
11941 expression.
11942
11943 The op tree representing the expression is returned.  If an optional
11944 expression is absent, a null pointer is returned, otherwise the pointer
11945 will be non-null.
11946
11947 If an error occurs in parsing or compilation, in most cases a valid op
11948 tree is returned anyway.  The error is reflected in the parser state,
11949 normally resulting in a single exception at the top level of parsing
11950 which covers all the compilation errors that occurred.  Some compilation
11951 errors, however, will throw an exception immediately.
11952
11953 =cut
11954 */
11955
11956 OP *
11957 Perl_parse_arithexpr(pTHX_ U32 flags)
11958 {
11959     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11960 }
11961
11962 /*
11963 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11964
11965 Parse a Perl term expression.  This may contain operators of precedence
11966 down to the assignment operators.  The expression must be followed (and thus
11967 terminated) either by a comma or lower-precedence operator or by
11968 something that would normally terminate an expression such as semicolon.
11969 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11970 otherwise it is mandatory.  It is up to the caller to ensure that the
11971 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11972 the source of the code to be parsed and the lexical context for the
11973 expression.
11974
11975 The op tree representing the expression is returned.  If an optional
11976 expression is absent, a null pointer is returned, otherwise the pointer
11977 will be non-null.
11978
11979 If an error occurs in parsing or compilation, in most cases a valid op
11980 tree is returned anyway.  The error is reflected in the parser state,
11981 normally resulting in a single exception at the top level of parsing
11982 which covers all the compilation errors that occurred.  Some compilation
11983 errors, however, will throw an exception immediately.
11984
11985 =cut
11986 */
11987
11988 OP *
11989 Perl_parse_termexpr(pTHX_ U32 flags)
11990 {
11991     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11992 }
11993
11994 /*
11995 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11996
11997 Parse a Perl list expression.  This may contain operators of precedence
11998 down to the comma operator.  The expression must be followed (and thus
11999 terminated) either by a low-precedence logic operator such as C<or> or by
12000 something that would normally terminate an expression such as semicolon.
12001 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12002 otherwise it is mandatory.  It is up to the caller to ensure that the
12003 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12004 the source of the code to be parsed and the lexical context for the
12005 expression.
12006
12007 The op tree representing the expression is returned.  If an optional
12008 expression is absent, a null pointer is returned, otherwise the pointer
12009 will be non-null.
12010
12011 If an error occurs in parsing or compilation, in most cases a valid op
12012 tree is returned anyway.  The error is reflected in the parser state,
12013 normally resulting in a single exception at the top level of parsing
12014 which covers all the compilation errors that occurred.  Some compilation
12015 errors, however, will throw an exception immediately.
12016
12017 =cut
12018 */
12019
12020 OP *
12021 Perl_parse_listexpr(pTHX_ U32 flags)
12022 {
12023     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12024 }
12025
12026 /*
12027 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12028
12029 Parse a single complete Perl expression.  This allows the full
12030 expression grammar, including the lowest-precedence operators such
12031 as C<or>.  The expression must be followed (and thus terminated) by a
12032 token that an expression would normally be terminated by: end-of-file,
12033 closing bracketing punctuation, semicolon, or one of the keywords that
12034 signals a postfix expression-statement modifier.  If I<flags> includes
12035 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12036 mandatory.  It is up to the caller to ensure that the dynamic parser
12037 state (L</PL_parser> et al) is correctly set to reflect the source of
12038 the code to be parsed and the lexical context for the expression.
12039
12040 The op tree representing the expression is returned.  If an optional
12041 expression is absent, a null pointer is returned, otherwise the pointer
12042 will be non-null.
12043
12044 If an error occurs in parsing or compilation, in most cases a valid op
12045 tree is returned anyway.  The error is reflected in the parser state,
12046 normally resulting in a single exception at the top level of parsing
12047 which covers all the compilation errors that occurred.  Some compilation
12048 errors, however, will throw an exception immediately.
12049
12050 =cut
12051 */
12052
12053 OP *
12054 Perl_parse_fullexpr(pTHX_ U32 flags)
12055 {
12056     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12057 }
12058
12059 /*
12060 =for apidoc Amx|OP *|parse_block|U32 flags
12061
12062 Parse a single complete Perl code block.  This consists of an opening
12063 brace, a sequence of statements, and a closing brace.  The block
12064 constitutes a lexical scope, so C<my> variables and various compile-time
12065 effects can be contained within it.  It is up to the caller to ensure
12066 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12067 reflect the source of the code to be parsed and the lexical context for
12068 the statement.
12069
12070 The op tree representing the code block is returned.  This is always a
12071 real op, never a null pointer.  It will normally be a C<lineseq> list,
12072 including C<nextstate> or equivalent ops.  No ops to construct any kind
12073 of runtime scope are included by virtue of it being a block.
12074
12075 If an error occurs in parsing or compilation, in most cases a valid op
12076 tree (most likely null) is returned anyway.  The error is reflected in
12077 the parser state, normally resulting in a single exception at the top
12078 level of parsing which covers all the compilation errors that occurred.
12079 Some compilation errors, however, will throw an exception immediately.
12080
12081 The I<flags> parameter is reserved for future use, and must always
12082 be zero.
12083
12084 =cut
12085 */
12086
12087 OP *
12088 Perl_parse_block(pTHX_ U32 flags)
12089 {
12090     if (flags)
12091         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12092     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12093 }
12094
12095 /*
12096 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12097
12098 Parse a single unadorned Perl statement.  This may be a normal imperative
12099 statement or a declaration that has compile-time effect.  It does not
12100 include any label or other affixture.  It is up to the caller to ensure
12101 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12102 reflect the source of the code to be parsed and the lexical context for
12103 the statement.
12104
12105 The op tree representing the statement is returned.  This may be a
12106 null pointer if the statement is null, for example if it was actually
12107 a subroutine definition (which has compile-time side effects).  If not
12108 null, it will be ops directly implementing the statement, suitable to
12109 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12110 equivalent op (except for those embedded in a scope contained entirely
12111 within the statement).
12112
12113 If an error occurs in parsing or compilation, in most cases a valid op
12114 tree (most likely null) is returned anyway.  The error is reflected in
12115 the parser state, normally resulting in a single exception at the top
12116 level of parsing which covers all the compilation errors that occurred.
12117 Some compilation errors, however, will throw an exception immediately.
12118
12119 The I<flags> parameter is reserved for future use, and must always
12120 be zero.
12121
12122 =cut
12123 */
12124
12125 OP *
12126 Perl_parse_barestmt(pTHX_ U32 flags)
12127 {
12128     if (flags)
12129         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12130     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12131 }
12132
12133 /*
12134 =for apidoc Amx|SV *|parse_label|U32 flags
12135
12136 Parse a single label, possibly optional, of the type that may prefix a
12137 Perl statement.  It is up to the caller to ensure that the dynamic parser
12138 state (L</PL_parser> et al) is correctly set to reflect the source of
12139 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
12140 label is optional, otherwise it is mandatory.
12141
12142 The name of the label is returned in the form of a fresh scalar.  If an
12143 optional label is absent, a null pointer is returned.
12144
12145 If an error occurs in parsing, which can only occur if the label is
12146 mandatory, a valid label is returned anyway.  The error is reflected in
12147 the parser state, normally resulting in a single exception at the top
12148 level of parsing which covers all the compilation errors that occurred.
12149
12150 =cut
12151 */
12152
12153 SV *
12154 Perl_parse_label(pTHX_ U32 flags)
12155 {
12156     if (flags & ~PARSE_OPTIONAL)
12157         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12158     if (PL_lex_state == LEX_KNOWNEXT) {
12159         PL_parser->yychar = yylex();
12160         if (PL_parser->yychar == LABEL) {
12161             char * const lpv = pl_yylval.pval;
12162             STRLEN llen = strlen(lpv);
12163             PL_parser->yychar = YYEMPTY;
12164             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12165         } else {
12166             yyunlex();
12167             goto no_label;
12168         }
12169     } else {
12170         char *s, *t;
12171         STRLEN wlen, bufptr_pos;
12172         lex_read_space(0);
12173         t = s = PL_bufptr;
12174         if (!isIDFIRST_lazy_if(s, UTF))
12175             goto no_label;
12176         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12177         if (word_takes_any_delimeter(s, wlen))
12178             goto no_label;
12179         bufptr_pos = s - SvPVX(PL_linestr);
12180         PL_bufptr = t;
12181         lex_read_space(LEX_KEEP_PREVIOUS);
12182         t = PL_bufptr;
12183         s = SvPVX(PL_linestr) + bufptr_pos;
12184         if (t[0] == ':' && t[1] != ':') {
12185             PL_oldoldbufptr = PL_oldbufptr;
12186             PL_oldbufptr = s;
12187             PL_bufptr = t+1;
12188             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12189         } else {
12190             PL_bufptr = s;
12191             no_label:
12192             if (flags & PARSE_OPTIONAL) {
12193                 return NULL;
12194             } else {
12195                 qerror(Perl_mess(aTHX_ "Parse error"));
12196                 return newSVpvs("x");
12197             }
12198         }
12199     }
12200 }
12201
12202 /*
12203 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12204
12205 Parse a single complete Perl statement.  This may be a normal imperative
12206 statement or a declaration that has compile-time effect, and may include
12207 optional labels.  It is up to the caller to ensure that the dynamic
12208 parser state (L</PL_parser> et al) is correctly set to reflect the source
12209 of the code to be parsed and the lexical context for the statement.
12210
12211 The op tree representing the statement is returned.  This may be a
12212 null pointer if the statement is null, for example if it was actually
12213 a subroutine definition (which has compile-time side effects).  If not
12214 null, it will be the result of a L</newSTATEOP> call, normally including
12215 a C<nextstate> or equivalent op.
12216
12217 If an error occurs in parsing or compilation, in most cases a valid op
12218 tree (most likely null) is returned anyway.  The error is reflected in
12219 the parser state, normally resulting in a single exception at the top
12220 level of parsing which covers all the compilation errors that occurred.
12221 Some compilation errors, however, will throw an exception immediately.
12222
12223 The I<flags> parameter is reserved for future use, and must always
12224 be zero.
12225
12226 =cut
12227 */
12228
12229 OP *
12230 Perl_parse_fullstmt(pTHX_ U32 flags)
12231 {
12232     if (flags)
12233         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12234     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12235 }
12236
12237 /*
12238 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12239
12240 Parse a sequence of zero or more Perl statements.  These may be normal
12241 imperative statements, including optional labels, or declarations
12242 that have compile-time effect, or any mixture thereof.  The statement
12243 sequence ends when a closing brace or end-of-file is encountered in a
12244 place where a new statement could have validly started.  It is up to
12245 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12246 is correctly set to reflect the source of the code to be parsed and the
12247 lexical context for the statements.
12248
12249 The op tree representing the statement sequence is returned.  This may
12250 be a null pointer if the statements were all null, for example if there
12251 were no statements or if there were only subroutine definitions (which
12252 have compile-time side effects).  If not null, it will be a C<lineseq>
12253 list, normally including C<nextstate> or equivalent ops.
12254
12255 If an error occurs in parsing or compilation, in most cases a valid op
12256 tree is returned anyway.  The error is reflected in the parser state,
12257 normally resulting in a single exception at the top level of parsing
12258 which covers all the compilation errors that occurred.  Some compilation
12259 errors, however, will throw an exception immediately.
12260
12261 The I<flags> parameter is reserved for future use, and must always
12262 be zero.
12263
12264 =cut
12265 */
12266
12267 OP *
12268 Perl_parse_stmtseq(pTHX_ U32 flags)
12269 {
12270     OP *stmtseqop;
12271     I32 c;
12272     if (flags)
12273         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12274     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12275     c = lex_peek_unichar(0);
12276     if (c != -1 && c != /*{*/'}')
12277         qerror(Perl_mess(aTHX_ "Parse error"));
12278     return stmtseqop;
12279 }
12280
12281 /*
12282  * Local variables:
12283  * c-indentation-style: bsd
12284  * c-basic-offset: 4
12285  * indent-tabs-mode: nil
12286  * End:
12287  *
12288  * ex: set ts=8 sts=4 sw=4 et:
12289  */