toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XENUMMASK  0x3f
 130 #define XFAKEEOF   0x40
 131 #define XFAKEBRACK 0x80
 132
 133 #ifdef USE_UTF8_SCRIPTS
 134 #   define UTF (!IN_BYTES)
 135 #else
 136 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 137 #endif
 138
 139 /* The maximum number of characters preceding the unrecognized one to display */
 140 #define UNRECOGNIZED_PRECEDE_COUNT 10
 141
 142 /* In variables named $^X, these are the legal values for X.
 143  * 1999-02-27 mjd-perl-patch@plover.com */
 144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 145
 146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 147
 148 /* LEX_* are values for PL_lex_state, the state of the lexer.
 149  * They are arranged oddly so that the guard on the switch statement
 150  * can get by with a single comparison (if the compiler is smart enough).
 151  */
 152
 153 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 154
 155 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 156 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 157 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 158 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 159 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 160
 161                                    /* at end of code, eg "$x" followed by:  */
 162 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 163 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 164
 165 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 166                                         string or after \E, $foo, etc       */
 167 #define LEX_INTERPCONST          2 /* NOT USED */
 168 #define LEX_FORMLINE             1 /* expecting a format line               */
 169 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 170
 171
 172 #ifdef DEBUGGING
 173 static const char* const lex_state_names[] = {
 174     "KNOWNEXT",
 175     "FORMLINE",
 176     "INTERPCONST",
 177     "INTERPCONCAT",
 178     "INTERPENDMAYBE",
 179     "INTERPEND",
 180     "INTERPSTART",
 181     "INTERPPUSH",
 182     "INTERPCASEMOD",
 183     "INTERPNORMAL",
 184     "NORMAL"
 185 };
 186 #endif
 187
 188 #ifdef ff_next
 189 #undef ff_next
 190 #endif
 191
 192 #include "keywords.h"
 193
 194 /* CLINE is a macro that ensures PL_copline has a sane value */
 195
 196 #ifdef CLINE
 197 #undef CLINE
 198 #endif
 199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 200
 201 #ifdef PERL_MAD
 202 #  define SKIPSPACE0(s) skipspace0(s)
 203 #  define SKIPSPACE1(s) skipspace1(s)
 204 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 205 #  define PEEKSPACE(s) skipspace2(s,0)
 206 #else
 207 #  define SKIPSPACE0(s) skipspace(s)
 208 #  define SKIPSPACE1(s) skipspace(s)
 209 #  define SKIPSPACE2(s,tsv) skipspace(s)
 210 #  define PEEKSPACE(s) skipspace(s)
 211 #endif
 212
 213 /*
 214  * Convenience functions to return different tokens and prime the
 215  * lexer for the next token.  They all take an argument.
 216  *
 217  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 218  * OPERATOR     : generic operator
 219  * AOPERATOR    : assignment operator
 220  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 221  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 222  * PREREF       : *EXPR where EXPR is not a simple identifier
 223  * TERM         : expression term
 224  * LOOPX        : loop exiting command (goto, last, dump, etc)
 225  * FTST         : file test operator
 226  * FUN0         : zero-argument function
 227  * FUN0OP       : zero-argument function, with its op created in this file
 228  * FUN1         : not used, except for not, which isn't a UNIOP
 229  * BOop         : bitwise or or xor
 230  * BAop         : bitwise and
 231  * SHop         : shift operator
 232  * PWop         : power operator
 233  * PMop         : pattern-matching operator
 234  * Aop          : addition-level operator
 235  * Mop          : multiplication-level operator
 236  * Eop          : equality-testing operator
 237  * Rop          : relational operator <= != gt
 238  *
 239  * Also see LOP and lop() below.
 240  */
 241
 242 #ifdef DEBUGGING /* Serve -DT. */
 243 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 244 #else
 245 #   define REPORT(retval) (retval)
 246 #endif
 247
 248 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 249 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 250 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 251 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 252 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 253 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 254 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 255 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 256 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 257 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 258 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 259 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 260 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 261 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 262 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 263 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 264 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 265 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 266 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 267 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 268 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 269
 270 /* This bit of chicanery makes a unary function followed by
 271  * a parenthesis into a function with one argument, highest precedence.
 272  * The UNIDOR macro is for unary functions that can be followed by the //
 273  * operator (such as C<shift // 0>).
 274  */
 275 #define UNI2(f,x) { \
 276         pl_yylval.ival = f; \
 277         PL_expect = x; \
 278         PL_bufptr = s; \
 279         PL_last_uni = PL_oldbufptr; \
 280         PL_last_lop_op = f; \
 281         if (*s == '(') \
 282             return REPORT( (int)FUNC1 ); \
 283         s = PEEKSPACE(s); \
 284         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 285         }
 286 #define UNI(f)    UNI2(f,XTERM)
 287 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 288 #define UNIPROTO(f,optional) { \
 289         if (optional) PL_last_uni = PL_oldbufptr; \
 290         OPERATOR(f); \
 291         }
 292
 293 #define UNIBRACK(f) { \
 294         pl_yylval.ival = f; \
 295         PL_bufptr = s; \
 296         PL_last_uni = PL_oldbufptr; \
 297         if (*s == '(') \
 298             return REPORT( (int)FUNC1 ); \
 299         s = PEEKSPACE(s); \
 300         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 301         }
 302
 303 /* grandfather return to old style */
 304 #define OLDLOP(f) \
 305         do { \
 306             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 307                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 308             pl_yylval.ival = (f); \
 309             PL_expect = XTERM; \
 310             PL_bufptr = s; \
 311             return (int)LSTOP; \
 312         } while(0)
 313
 314 #ifdef DEBUGGING
 315
 316 /* how to interpret the pl_yylval associated with the token */
 317 enum token_type {
 318     TOKENTYPE_NONE,
 319     TOKENTYPE_IVAL,
 320     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 321     TOKENTYPE_PVAL,
 322     TOKENTYPE_OPVAL,
 323     TOKENTYPE_GVVAL
 324 };
 325
 326 static struct debug_tokens {
 327     const int token;
 328     enum token_type type;
 329     const char *name;
 330 } const debug_tokens[] =
 331 {
 332     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 333     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 334     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 335     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 336     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 337     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 338     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 339     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 340     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 341     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 342     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 343     { DO,               TOKENTYPE_NONE,         "DO" },
 344     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 345     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 346     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 347     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 348     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 349     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 350     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 351     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 352     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 353     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 354     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 355     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 356     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 357     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 358     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 359     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 360     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 361     { IF,               TOKENTYPE_IVAL,         "IF" },
 362     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 363     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 364     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 365     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 366     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 367     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 368     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 369     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 370     { MY,               TOKENTYPE_IVAL,         "MY" },
 371     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 372     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 373     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 374     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 375     { OROR,             TOKENTYPE_NONE,         "OROR" },
 376     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 377     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 378     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 379     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 380     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 381     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 382     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 383     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 384     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 385     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 386     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 387     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 388     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 389     { SUB,              TOKENTYPE_NONE,         "SUB" },
 390     { THING,            TOKENTYPE_OPVAL,        "THING" },
 391     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 392     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 393     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 394     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 395     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 396     { USE,              TOKENTYPE_IVAL,         "USE" },
 397     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 398     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 399     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 400     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 401     { 0,                TOKENTYPE_NONE,         NULL }
 402 };
 403
 404 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 405
 406 STATIC int
 407 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 408 {
 409     dVAR;
 410
 411     PERL_ARGS_ASSERT_TOKEREPORT;
 412
 413     if (DEBUG_T_TEST) {
 414         const char *name = NULL;
 415         enum token_type type = TOKENTYPE_NONE;
 416         const struct debug_tokens *p;
 417         SV* const report = newSVpvs("<== ");
 418
 419         for (p = debug_tokens; p->token; p++) {
 420             if (p->token == (int)rv) {
 421                 name = p->name;
 422                 type = p->type;
 423                 break;
 424             }
 425         }
 426         if (name)
 427             Perl_sv_catpv(aTHX_ report, name);
 428         else if ((char)rv > ' ' && (char)rv < '~')
 429             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 430         else if (!rv)
 431             sv_catpvs(report, "EOF");
 432         else
 433             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 434         switch (type) {
 435         case TOKENTYPE_NONE:
 436         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 437             break;
 438         case TOKENTYPE_IVAL:
 439             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 440             break;
 441         case TOKENTYPE_OPNUM:
 442             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 443                                     PL_op_name[lvalp->ival]);
 444             break;
 445         case TOKENTYPE_PVAL:
 446             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 447             break;
 448         case TOKENTYPE_OPVAL:
 449             if (lvalp->opval) {
 450                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 451                                     PL_op_name[lvalp->opval->op_type]);
 452                 if (lvalp->opval->op_type == OP_CONST) {
 453                     Perl_sv_catpvf(aTHX_ report, " %s",
 454                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 455                 }
 456
 457             }
 458             else
 459                 sv_catpvs(report, "(opval=null)");
 460             break;
 461         }
 462         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 463     };
 464     return (int)rv;
 465 }
 466
 467
 468 /* print the buffer with suitable escapes */
 469
 470 STATIC void
 471 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 472 {
 473     SV* const tmp = newSVpvs("");
 474
 475     PERL_ARGS_ASSERT_PRINTBUF;
 476
 477     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 478     SvREFCNT_dec(tmp);
 479 }
 480
 481 #endif
 482
 483 static int
 484 S_deprecate_commaless_var_list(pTHX) {
 485     PL_expect = XTERM;
 486     deprecate("comma-less variable list");
 487     return REPORT(','); /* grandfather non-comma-format format */
 488 }
 489
 490 /*
 491  * S_ao
 492  *
 493  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 494  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 495  */
 496
 497 STATIC int
 498 S_ao(pTHX_ int toketype)
 499 {
 500     dVAR;
 501     if (*PL_bufptr == '=') {
 502         PL_bufptr++;
 503         if (toketype == ANDAND)
 504             pl_yylval.ival = OP_ANDASSIGN;
 505         else if (toketype == OROR)
 506             pl_yylval.ival = OP_ORASSIGN;
 507         else if (toketype == DORDOR)
 508             pl_yylval.ival = OP_DORASSIGN;
 509         toketype = ASSIGNOP;
 510     }
 511     return toketype;
 512 }
 513
 514 /*
 515  * S_no_op
 516  * When Perl expects an operator and finds something else, no_op
 517  * prints the warning.  It always prints "<something> found where
 518  * operator expected.  It prints "Missing semicolon on previous line?"
 519  * if the surprise occurs at the start of the line.  "do you need to
 520  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 521  * where the compiler doesn't know if foo is a method call or a function.
 522  * It prints "Missing operator before end of line" if there's nothing
 523  * after the missing operator, or "... before <...>" if there is something
 524  * after the missing operator.
 525  */
 526
 527 STATIC void
 528 S_no_op(pTHX_ const char *const what, char *s)
 529 {
 530     dVAR;
 531     char * const oldbp = PL_bufptr;
 532     const bool is_first = (PL_oldbufptr == PL_linestart);
 533
 534     PERL_ARGS_ASSERT_NO_OP;
 535
 536     if (!s)
 537         s = oldbp;
 538     else
 539         PL_bufptr = s;
 540     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 541     if (ckWARN_d(WARN_SYNTAX)) {
 542         if (is_first)
 543             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 544                     "\t(Missing semicolon on previous line?)\n");
 545         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 546             const char *t;
 547             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
 548                                                             t += UTF ? UTF8SKIP(t) : 1)
 549                 NOOP;
 550             if (t < PL_bufptr && isSPACE(*t))
 551                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 552                         "\t(Do you need to predeclare %"SVf"?)\n",
 553                     SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
 554                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 555         }
 556         else {
 557             assert(s >= oldbp);
 558             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 559                     "\t(Missing operator before %"SVf"?)\n",
 560                     SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
 561                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 562         }
 563     }
 564     PL_bufptr = oldbp;
 565 }
 566
 567 /*
 568  * S_missingterm
 569  * Complain about missing quote/regexp/heredoc terminator.
 570  * If it's called with NULL then it cauterizes the line buffer.
 571  * If we're in a delimited string and the delimiter is a control
 572  * character, it's reformatted into a two-char sequence like ^C.
 573  * This is fatal.
 574  */
 575
 576 STATIC void
 577 S_missingterm(pTHX_ char *s)
 578 {
 579     dVAR;
 580     char tmpbuf[3];
 581     char q;
 582     if (s) {
 583         char * const nl = strrchr(s,'\n');
 584         if (nl)
 585             *nl = '\0';
 586     }
 587     else if (isCNTRL(PL_multi_close)) {
 588         *tmpbuf = '^';
 589         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 590         tmpbuf[2] = '\0';
 591         s = tmpbuf;
 592     }
 593     else {
 594         *tmpbuf = (char)PL_multi_close;
 595         tmpbuf[1] = '\0';
 596         s = tmpbuf;
 597     }
 598     q = strchr(s,'"') ? '\'' : '"';
 599     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 600 }
 601
 602 #include "feature.h"
 603
 604 /*
 605  * Check whether the named feature is enabled.
 606  */
 607 bool
 608 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 609 {
 610     dVAR;
 611     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 612
 613     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 614
 615     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 616
 617     if (namelen > MAX_FEATURE_LEN)
 618         return FALSE;
 619     memcpy(&he_name[8], name, namelen);
 620
 621     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 622                                      REFCOUNTED_HE_EXISTS));
 623 }
 624
 625 /*
 626  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 627  * utf16-to-utf8-reversed.
 628  */
 629
 630 #ifdef PERL_CR_FILTER
 631 static void
 632 strip_return(SV *sv)
 633 {
 634     register const char *s = SvPVX_const(sv);
 635     register const char * const e = s + SvCUR(sv);
 636
 637     PERL_ARGS_ASSERT_STRIP_RETURN;
 638
 639     /* outer loop optimized to do nothing if there are no CR-LFs */
 640     while (s < e) {
 641         if (*s++ == '\r' && *s == '\n') {
 642             /* hit a CR-LF, need to copy the rest */
 643             register char *d = s - 1;
 644             *d++ = *s++;
 645             while (s < e) {
 646                 if (*s == '\r' && s[1] == '\n')
 647                     s++;
 648                 *d++ = *s++;
 649             }
 650             SvCUR(sv) -= s - d;
 651             return;
 652         }
 653     }
 654 }
 655
 656 STATIC I32
 657 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 658 {
 659     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 660     if (count > 0 && !maxlen)
 661         strip_return(sv);
 662     return count;
 663 }
 664 #endif
 665
 666 /*
 667 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 668
 669 Creates and initialises a new lexer/parser state object, supplying
 670 a context in which to lex and parse from a new source of Perl code.
 671 A pointer to the new state object is placed in L</PL_parser>.  An entry
 672 is made on the save stack so that upon unwinding the new state object
 673 will be destroyed and the former value of L</PL_parser> will be restored.
 674 Nothing else need be done to clean up the parsing context.
 675
 676 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 677 non-null, provides a string (in SV form) containing code to be parsed.
 678 A copy of the string is made, so subsequent modification of I<line>
 679 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 680 from which code will be read to be parsed.  If both are non-null, the
 681 code in I<line> comes first and must consist of complete lines of input,
 682 and I<rsfp> supplies the remainder of the source.
 683
 684 The I<flags> parameter is reserved for future use.  Currently it is only
 685 used by perl internally, so extensions should always pass zero.
 686
 687 =cut
 688 */
 689
 690 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 691    can share filters with the current parser.
 692    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 693    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 694    destruction. This is used to handle the case of defaulting to reading the
 695    script from the standard input because no filename was given on the command
 696    line (without getting confused by situation where STDIN has been closed, so
 697    the script handle is opened on fd 0)  */
 698
 699 void
 700 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 701 {
 702     dVAR;
 703     const char *s = NULL;
 704     yy_parser *parser, *oparser;
 705     if (flags && flags & ~LEX_START_FLAGS)
 706         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 707
 708     /* create and initialise a parser */
 709
 710     Newxz(parser, 1, yy_parser);
 711     parser->old_parser = oparser = PL_parser;
 712     PL_parser = parser;
 713
 714     parser->stack = NULL;
 715     parser->ps = NULL;
 716     parser->stack_size = 0;
 717
 718     /* on scope exit, free this parser and restore any outer one */
 719     SAVEPARSER(parser);
 720     parser->saved_curcop = PL_curcop;
 721
 722     /* initialise lexer state */
 723
 724 #ifdef PERL_MAD
 725     parser->curforce = -1;
 726 #else
 727     parser->nexttoke = 0;
 728 #endif
 729     parser->error_count = oparser ? oparser->error_count : 0;
 730     parser->copline = NOLINE;
 731     parser->lex_state = LEX_NORMAL;
 732     parser->expect = XSTATE;
 733     parser->rsfp = rsfp;
 734     parser->rsfp_filters =
 735       !(flags & LEX_START_SAME_FILTER) || !oparser
 736         ? NULL
 737         : MUTABLE_AV(SvREFCNT_inc(
 738             oparser->rsfp_filters
 739              ? oparser->rsfp_filters
 740              : (oparser->rsfp_filters = newAV())
 741           ));
 742
 743     Newx(parser->lex_brackstack, 120, char);
 744     Newx(parser->lex_casestack, 12, char);
 745     *parser->lex_casestack = '\0';
 746
 747     if (line) {
 748         STRLEN len;
 749         s = SvPV_const(line, len);
 750         parser->linestr = flags & LEX_START_COPIED
 751                             ? SvREFCNT_inc_simple_NN(line)
 752                             : newSVpvn_flags(s, len, SvUTF8(line));
 753         if (!len || s[len-1] != ';')
 754             sv_catpvs(parser->linestr, "\n;");
 755     } else {
 756         parser->linestr = newSVpvs("\n;");
 757     }
 758     parser->oldoldbufptr =
 759         parser->oldbufptr =
 760         parser->bufptr =
 761         parser->linestart = SvPVX(parser->linestr);
 762     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 763     parser->last_lop = parser->last_uni = NULL;
 764     parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 765                                  |LEX_DONT_CLOSE_RSFP);
 766
 767     parser->in_pod = parser->filtered = 0;
 768 }
 769
 770
 771 /* delete a parser object */
 772
 773 void
 774 Perl_parser_free(pTHX_  const yy_parser *parser)
 775 {
 776     PERL_ARGS_ASSERT_PARSER_FREE;
 777
 778     PL_curcop = parser->saved_curcop;
 779     SvREFCNT_dec(parser->linestr);
 780
 781     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 782         PerlIO_clearerr(parser->rsfp);
 783     else if (parser->rsfp && (!parser->old_parser ||
 784                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 785         PerlIO_close(parser->rsfp);
 786     SvREFCNT_dec(parser->rsfp_filters);
 787
 788     Safefree(parser->lex_brackstack);
 789     Safefree(parser->lex_casestack);
 790     PL_parser = parser->old_parser;
 791     Safefree(parser);
 792 }
 793
 794
 795 /*
 796 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 797
 798 Buffer scalar containing the chunk currently under consideration of the
 799 text currently being lexed.  This is always a plain string scalar (for
 800 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 801 normal scalar means; instead refer to the buffer directly by the pointer
 802 variables described below.
 803
 804 The lexer maintains various C<char*> pointers to things in the
 805 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 806 reallocated, all of these pointers must be updated.  Don't attempt to
 807 do this manually, but rather use L</lex_grow_linestr> if you need to
 808 reallocate the buffer.
 809
 810 The content of the text chunk in the buffer is commonly exactly one
 811 complete line of input, up to and including a newline terminator,
 812 but there are situations where it is otherwise.  The octets of the
 813 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 814 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 815 flag on this scalar, which may disagree with it.
 816
 817 For direct examination of the buffer, the variable
 818 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 819 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 820 of these pointers is usually preferable to examination of the scalar
 821 through normal scalar means.
 822
 823 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 824
 825 Direct pointer to the end of the chunk of text currently being lexed, the
 826 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 827 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 828 always located at the end of the buffer, and does not count as part of
 829 the buffer's contents.
 830
 831 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 832
 833 Points to the current position of lexing inside the lexer buffer.
 834 Characters around this point may be freely examined, within
 835 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 836 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 837 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 838
 839 Lexing code (whether in the Perl core or not) moves this pointer past
 840 the characters that it consumes.  It is also expected to perform some
 841 bookkeeping whenever a newline character is consumed.  This movement
 842 can be more conveniently performed by the function L</lex_read_to>,
 843 which handles newlines appropriately.
 844
 845 Interpretation of the buffer's octets can be abstracted out by
 846 using the slightly higher-level functions L</lex_peek_unichar> and
 847 L</lex_read_unichar>.
 848
 849 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 850
 851 Points to the start of the current line inside the lexer buffer.
 852 This is useful for indicating at which column an error occurred, and
 853 not much else.  This must be updated by any lexing code that consumes
 854 a newline; the function L</lex_read_to> handles this detail.
 855
 856 =cut
 857 */
 858
 859 /*
 860 =for apidoc Amx|bool|lex_bufutf8
 861
 862 Indicates whether the octets in the lexer buffer
 863 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 864 of Unicode characters.  If not, they should be interpreted as Latin-1
 865 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 866
 867 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 868 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 869 encoding.
 870
 871 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 872 is significant, but not the whole story regarding the input character
 873 encoding.  Normally, when a file is being read, the scalar contains octets
 874 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 875 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 876 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 877 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 878 is in effect.  This logic may change in the future; use this function
 879 instead of implementing the logic yourself.
 880
 881 =cut
 882 */
 883
 884 bool
 885 Perl_lex_bufutf8(pTHX)
 886 {
 887     return UTF;
 888 }
 889
 890 /*
 891 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 892
 893 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 894 at least I<len> octets (including terminating NUL).  Returns a
 895 pointer to the reallocated buffer.  This is necessary before making
 896 any direct modification of the buffer that would increase its length.
 897 L</lex_stuff_pvn> provides a more convenient way to insert text into
 898 the buffer.
 899
 900 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 901 this function updates all of the lexer's variables that point directly
 902 into the buffer.
 903
 904 =cut
 905 */
 906
 907 char *
 908 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 909 {
 910     SV *linestr;
 911     char *buf;
 912     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 913     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 914     linestr = PL_parser->linestr;
 915     buf = SvPVX(linestr);
 916     if (len <= SvLEN(linestr))
 917         return buf;
 918     bufend_pos = PL_parser->bufend - buf;
 919     bufptr_pos = PL_parser->bufptr - buf;
 920     oldbufptr_pos = PL_parser->oldbufptr - buf;
 921     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 922     linestart_pos = PL_parser->linestart - buf;
 923     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 924     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 925     buf = sv_grow(linestr, len);
 926     PL_parser->bufend = buf + bufend_pos;
 927     PL_parser->bufptr = buf + bufptr_pos;
 928     PL_parser->oldbufptr = buf + oldbufptr_pos;
 929     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 930     PL_parser->linestart = buf + linestart_pos;
 931     if (PL_parser->last_uni)
 932         PL_parser->last_uni = buf + last_uni_pos;
 933     if (PL_parser->last_lop)
 934         PL_parser->last_lop = buf + last_lop_pos;
 935     return buf;
 936 }
 937
 938 /*
 939 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 940
 941 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 942 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 943 reallocating the buffer if necessary.  This means that lexing code that
 944 runs later will see the characters as if they had appeared in the input.
 945 It is not recommended to do this as part of normal parsing, and most
 946 uses of this facility run the risk of the inserted characters being
 947 interpreted in an unintended manner.
 948
 949 The string to be inserted is represented by I<len> octets starting
 950 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 951 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 952 The characters are recoded for the lexer buffer, according to how the
 953 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 954 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 955 function is more convenient.
 956
 957 =cut
 958 */
 959
 960 void
 961 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 962 {
 963     dVAR;
 964     char *bufptr;
 965     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 966     if (flags & ~(LEX_STUFF_UTF8))
 967         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 968     if (UTF) {
 969         if (flags & LEX_STUFF_UTF8) {
 970             goto plain_copy;
 971         } else {
 972             STRLEN highhalf = 0;
 973             const char *p, *e = pv+len;
 974             for (p = pv; p != e; p++)
 975                 highhalf += !!(((U8)*p) & 0x80);
 976             if (!highhalf)
 977                 goto plain_copy;
 978             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 979             bufptr = PL_parser->bufptr;
 980             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 981             SvCUR_set(PL_parser->linestr,
 982                 SvCUR(PL_parser->linestr) + len+highhalf);
 983             PL_parser->bufend += len+highhalf;
 984             for (p = pv; p != e; p++) {
 985                 U8 c = (U8)*p;
 986                 if (c & 0x80) {
 987                     *bufptr++ = (char)(0xc0 | (c >> 6));
 988                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 989                 } else {
 990                     *bufptr++ = (char)c;
 991                 }
 992             }
 993         }
 994     } else {
 995         if (flags & LEX_STUFF_UTF8) {
 996             STRLEN highhalf = 0;
 997             const char *p, *e = pv+len;
 998             for (p = pv; p != e; p++) {
 999                 U8 c = (U8)*p;
1000                 if (c >= 0xc4) {
1001                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1002                                 "non-Latin-1 character into Latin-1 input");
1003                 } else if (c >= 0xc2 && p+1 != e &&
1004                             (((U8)p[1]) & 0xc0) == 0x80) {
1005                     p++;
1006                     highhalf++;
1007                 } else if (c >= 0x80) {
1008                     /* malformed UTF-8 */
1009                     ENTER;
1010                     SAVESPTR(PL_warnhook);
1011                     PL_warnhook = PERL_WARNHOOK_FATAL;
1012                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1013                     LEAVE;
1014                 }
1015             }
1016             if (!highhalf)
1017                 goto plain_copy;
1018             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1019             bufptr = PL_parser->bufptr;
1020             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1021             SvCUR_set(PL_parser->linestr,
1022                 SvCUR(PL_parser->linestr) + len-highhalf);
1023             PL_parser->bufend += len-highhalf;
1024             for (p = pv; p != e; p++) {
1025                 U8 c = (U8)*p;
1026                 if (c & 0x80) {
1027                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1028                     p++;
1029                 } else {
1030                     *bufptr++ = (char)c;
1031                 }
1032             }
1033         } else {
1034             plain_copy:
1035             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1036             bufptr = PL_parser->bufptr;
1037             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1038             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1039             PL_parser->bufend += len;
1040             Copy(pv, bufptr, len, char);
1041         }
1042     }
1043 }
1044
1045 /*
1046 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1047
1048 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1049 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1050 reallocating the buffer if necessary.  This means that lexing code that
1051 runs later will see the characters as if they had appeared in the input.
1052 It is not recommended to do this as part of normal parsing, and most
1053 uses of this facility run the risk of the inserted characters being
1054 interpreted in an unintended manner.
1055
1056 The string to be inserted is represented by octets starting at I<pv>
1057 and continuing to the first nul.  These octets are interpreted as either
1058 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1059 in I<flags>.  The characters are recoded for the lexer buffer, according
1060 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1061 If it is not convenient to nul-terminate a string to be inserted, the
1062 L</lex_stuff_pvn> function is more appropriate.
1063
1064 =cut
1065 */
1066
1067 void
1068 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1069 {
1070     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1071     lex_stuff_pvn(pv, strlen(pv), flags);
1072 }
1073
1074 /*
1075 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1076
1077 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1078 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1079 reallocating the buffer if necessary.  This means that lexing code that
1080 runs later will see the characters as if they had appeared in the input.
1081 It is not recommended to do this as part of normal parsing, and most
1082 uses of this facility run the risk of the inserted characters being
1083 interpreted in an unintended manner.
1084
1085 The string to be inserted is the string value of I<sv>.  The characters
1086 are recoded for the lexer buffer, according to how the buffer is currently
1087 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1088 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1089 need to construct a scalar.
1090
1091 =cut
1092 */
1093
1094 void
1095 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1096 {
1097     char *pv;
1098     STRLEN len;
1099     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1100     if (flags)
1101         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1102     pv = SvPV(sv, len);
1103     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1104 }
1105
1106 /*
1107 =for apidoc Amx|void|lex_unstuff|char *ptr
1108
1109 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1110 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1111 This hides the discarded text from any lexing code that runs later,
1112 as if the text had never appeared.
1113
1114 This is not the normal way to consume lexed text.  For that, use
1115 L</lex_read_to>.
1116
1117 =cut
1118 */
1119
1120 void
1121 Perl_lex_unstuff(pTHX_ char *ptr)
1122 {
1123     char *buf, *bufend;
1124     STRLEN unstuff_len;
1125     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1126     buf = PL_parser->bufptr;
1127     if (ptr < buf)
1128         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1129     if (ptr == buf)
1130         return;
1131     bufend = PL_parser->bufend;
1132     if (ptr > bufend)
1133         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1134     unstuff_len = ptr - buf;
1135     Move(ptr, buf, bufend+1-ptr, char);
1136     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1137     PL_parser->bufend = bufend - unstuff_len;
1138 }
1139
1140 /*
1141 =for apidoc Amx|void|lex_read_to|char *ptr
1142
1143 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1144 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1145 performing the correct bookkeeping whenever a newline character is passed.
1146 This is the normal way to consume lexed text.
1147
1148 Interpretation of the buffer's octets can be abstracted out by
1149 using the slightly higher-level functions L</lex_peek_unichar> and
1150 L</lex_read_unichar>.
1151
1152 =cut
1153 */
1154
1155 void
1156 Perl_lex_read_to(pTHX_ char *ptr)
1157 {
1158     char *s;
1159     PERL_ARGS_ASSERT_LEX_READ_TO;
1160     s = PL_parser->bufptr;
1161     if (ptr < s || ptr > PL_parser->bufend)
1162         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1163     for (; s != ptr; s++)
1164         if (*s == '\n') {
1165             CopLINE_inc(PL_curcop);
1166             PL_parser->linestart = s+1;
1167         }
1168     PL_parser->bufptr = ptr;
1169 }
1170
1171 /*
1172 =for apidoc Amx|void|lex_discard_to|char *ptr
1173
1174 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1175 up to I<ptr>.  The remaining content of the buffer will be moved, and
1176 all pointers into the buffer updated appropriately.  I<ptr> must not
1177 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1178 it is not permitted to discard text that has yet to be lexed.
1179
1180 Normally it is not necessarily to do this directly, because it suffices to
1181 use the implicit discarding behaviour of L</lex_next_chunk> and things
1182 based on it.  However, if a token stretches across multiple lines,
1183 and the lexing code has kept multiple lines of text in the buffer for
1184 that purpose, then after completion of the token it would be wise to
1185 explicitly discard the now-unneeded earlier lines, to avoid future
1186 multi-line tokens growing the buffer without bound.
1187
1188 =cut
1189 */
1190
1191 void
1192 Perl_lex_discard_to(pTHX_ char *ptr)
1193 {
1194     char *buf;
1195     STRLEN discard_len;
1196     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1197     buf = SvPVX(PL_parser->linestr);
1198     if (ptr < buf)
1199         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1200     if (ptr == buf)
1201         return;
1202     if (ptr > PL_parser->bufptr)
1203         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1204     discard_len = ptr - buf;
1205     if (PL_parser->oldbufptr < ptr)
1206         PL_parser->oldbufptr = ptr;
1207     if (PL_parser->oldoldbufptr < ptr)
1208         PL_parser->oldoldbufptr = ptr;
1209     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1210         PL_parser->last_uni = NULL;
1211     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1212         PL_parser->last_lop = NULL;
1213     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1214     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1215     PL_parser->bufend -= discard_len;
1216     PL_parser->bufptr -= discard_len;
1217     PL_parser->oldbufptr -= discard_len;
1218     PL_parser->oldoldbufptr -= discard_len;
1219     if (PL_parser->last_uni)
1220         PL_parser->last_uni -= discard_len;
1221     if (PL_parser->last_lop)
1222         PL_parser->last_lop -= discard_len;
1223 }
1224
1225 /*
1226 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1227
1228 Reads in the next chunk of text to be lexed, appending it to
1229 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1230 looked to the end of the current chunk and wants to know more.  It is
1231 usual, but not necessary, for lexing to have consumed the entirety of
1232 the current chunk at this time.
1233
1234 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1235 chunk (i.e., the current chunk has been entirely consumed), normally the
1236 current chunk will be discarded at the same time that the new chunk is
1237 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1238 will not be discarded.  If the current chunk has not been entirely
1239 consumed, then it will not be discarded regardless of the flag.
1240
1241 Returns true if some new text was added to the buffer, or false if the
1242 buffer has reached the end of the input text.
1243
1244 =cut
1245 */
1246
1247 #define LEX_FAKE_EOF 0x80000000
1248
1249 bool
1250 Perl_lex_next_chunk(pTHX_ U32 flags)
1251 {
1252     SV *linestr;
1253     char *buf;
1254     STRLEN old_bufend_pos, new_bufend_pos;
1255     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1256     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1257     bool got_some_for_debugger = 0;
1258     bool got_some;
1259     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1260         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1261     linestr = PL_parser->linestr;
1262     buf = SvPVX(linestr);
1263     if (!(flags & LEX_KEEP_PREVIOUS) &&
1264             PL_parser->bufptr == PL_parser->bufend) {
1265         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1266         linestart_pos = 0;
1267         if (PL_parser->last_uni != PL_parser->bufend)
1268             PL_parser->last_uni = NULL;
1269         if (PL_parser->last_lop != PL_parser->bufend)
1270             PL_parser->last_lop = NULL;
1271         last_uni_pos = last_lop_pos = 0;
1272         *buf = 0;
1273         SvCUR(linestr) = 0;
1274     } else {
1275         old_bufend_pos = PL_parser->bufend - buf;
1276         bufptr_pos = PL_parser->bufptr - buf;
1277         oldbufptr_pos = PL_parser->oldbufptr - buf;
1278         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1279         linestart_pos = PL_parser->linestart - buf;
1280         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1281         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1282     }
1283     if (flags & LEX_FAKE_EOF) {
1284         goto eof;
1285     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1286         got_some = 0;
1287     } else if (filter_gets(linestr, old_bufend_pos)) {
1288         got_some = 1;
1289         got_some_for_debugger = 1;
1290     } else {
1291         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1292             sv_setpvs(linestr, "");
1293         eof:
1294         /* End of real input.  Close filehandle (unless it was STDIN),
1295          * then add implicit termination.
1296          */
1297         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1298             PerlIO_clearerr(PL_parser->rsfp);
1299         else if (PL_parser->rsfp)
1300             (void)PerlIO_close(PL_parser->rsfp);
1301         PL_parser->rsfp = NULL;
1302         PL_parser->in_pod = PL_parser->filtered = 0;
1303 #ifdef PERL_MAD
1304         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1305             PL_faketokens = 1;
1306 #endif
1307         if (!PL_in_eval && PL_minus_p) {
1308             sv_catpvs(linestr,
1309                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1310             PL_minus_n = PL_minus_p = 0;
1311         } else if (!PL_in_eval && PL_minus_n) {
1312             sv_catpvs(linestr, /*{*/";}");
1313             PL_minus_n = 0;
1314         } else
1315             sv_catpvs(linestr, ";");
1316         got_some = 1;
1317     }
1318     buf = SvPVX(linestr);
1319     new_bufend_pos = SvCUR(linestr);
1320     PL_parser->bufend = buf + new_bufend_pos;
1321     PL_parser->bufptr = buf + bufptr_pos;
1322     PL_parser->oldbufptr = buf + oldbufptr_pos;
1323     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1324     PL_parser->linestart = buf + linestart_pos;
1325     if (PL_parser->last_uni)
1326         PL_parser->last_uni = buf + last_uni_pos;
1327     if (PL_parser->last_lop)
1328         PL_parser->last_lop = buf + last_lop_pos;
1329     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1330             PL_curstash != PL_debstash) {
1331         /* debugger active and we're not compiling the debugger code,
1332          * so store the line into the debugger's array of lines
1333          */
1334         update_debugger_info(NULL, buf+old_bufend_pos,
1335             new_bufend_pos-old_bufend_pos);
1336     }
1337     return got_some;
1338 }
1339
1340 /*
1341 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1342
1343 Looks ahead one (Unicode) character in the text currently being lexed.
1344 Returns the codepoint (unsigned integer value) of the next character,
1345 or -1 if lexing has reached the end of the input text.  To consume the
1346 peeked character, use L</lex_read_unichar>.
1347
1348 If the next character is in (or extends into) the next chunk of input
1349 text, the next chunk will be read in.  Normally the current chunk will be
1350 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1351 then the current chunk will not be discarded.
1352
1353 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1354 is encountered, an exception is generated.
1355
1356 =cut
1357 */
1358
1359 I32
1360 Perl_lex_peek_unichar(pTHX_ U32 flags)
1361 {
1362     dVAR;
1363     char *s, *bufend;
1364     if (flags & ~(LEX_KEEP_PREVIOUS))
1365         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1366     s = PL_parser->bufptr;
1367     bufend = PL_parser->bufend;
1368     if (UTF) {
1369         U8 head;
1370         I32 unichar;
1371         STRLEN len, retlen;
1372         if (s == bufend) {
1373             if (!lex_next_chunk(flags))
1374                 return -1;
1375             s = PL_parser->bufptr;
1376             bufend = PL_parser->bufend;
1377         }
1378         head = (U8)*s;
1379         if (!(head & 0x80))
1380             return head;
1381         if (head & 0x40) {
1382             len = PL_utf8skip[head];
1383             while ((STRLEN)(bufend-s) < len) {
1384                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1385                     break;
1386                 s = PL_parser->bufptr;
1387                 bufend = PL_parser->bufend;
1388             }
1389         }
1390         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1391         if (retlen == (STRLEN)-1) {
1392             /* malformed UTF-8 */
1393             ENTER;
1394             SAVESPTR(PL_warnhook);
1395             PL_warnhook = PERL_WARNHOOK_FATAL;
1396             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1397             LEAVE;
1398         }
1399         return unichar;
1400     } else {
1401         if (s == bufend) {
1402             if (!lex_next_chunk(flags))
1403                 return -1;
1404             s = PL_parser->bufptr;
1405         }
1406         return (U8)*s;
1407     }
1408 }
1409
1410 /*
1411 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1412
1413 Reads the next (Unicode) character in the text currently being lexed.
1414 Returns the codepoint (unsigned integer value) of the character read,
1415 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1416 if lexing has reached the end of the input text.  To non-destructively
1417 examine the next character, use L</lex_peek_unichar> instead.
1418
1419 If the next character is in (or extends into) the next chunk of input
1420 text, the next chunk will be read in.  Normally the current chunk will be
1421 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1422 then the current chunk will not be discarded.
1423
1424 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1425 is encountered, an exception is generated.
1426
1427 =cut
1428 */
1429
1430 I32
1431 Perl_lex_read_unichar(pTHX_ U32 flags)
1432 {
1433     I32 c;
1434     if (flags & ~(LEX_KEEP_PREVIOUS))
1435         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1436     c = lex_peek_unichar(flags);
1437     if (c != -1) {
1438         if (c == '\n')
1439             CopLINE_inc(PL_curcop);
1440         if (UTF)
1441             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1442         else
1443             ++(PL_parser->bufptr);
1444     }
1445     return c;
1446 }
1447
1448 /*
1449 =for apidoc Amx|void|lex_read_space|U32 flags
1450
1451 Reads optional spaces, in Perl style, in the text currently being
1452 lexed.  The spaces may include ordinary whitespace characters and
1453 Perl-style comments.  C<#line> directives are processed if encountered.
1454 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1455 at a non-space character (or the end of the input text).
1456
1457 If spaces extend into the next chunk of input text, the next chunk will
1458 be read in.  Normally the current chunk will be discarded at the same
1459 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1460 chunk will not be discarded.
1461
1462 =cut
1463 */
1464
1465 #define LEX_NO_NEXT_CHUNK 0x80000000
1466
1467 void
1468 Perl_lex_read_space(pTHX_ U32 flags)
1469 {
1470     char *s, *bufend;
1471     bool need_incline = 0;
1472     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1473         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1474 #ifdef PERL_MAD
1475     if (PL_skipwhite) {
1476         sv_free(PL_skipwhite);
1477         PL_skipwhite = NULL;
1478     }
1479     if (PL_madskills)
1480         PL_skipwhite = newSVpvs("");
1481 #endif /* PERL_MAD */
1482     s = PL_parser->bufptr;
1483     bufend = PL_parser->bufend;
1484     while (1) {
1485         char c = *s;
1486         if (c == '#') {
1487             do {
1488                 c = *++s;
1489             } while (!(c == '\n' || (c == 0 && s == bufend)));
1490         } else if (c == '\n') {
1491             s++;
1492             PL_parser->linestart = s;
1493             if (s == bufend)
1494                 need_incline = 1;
1495             else
1496                 incline(s);
1497         } else if (isSPACE(c)) {
1498             s++;
1499         } else if (c == 0 && s == bufend) {
1500             bool got_more;
1501 #ifdef PERL_MAD
1502             if (PL_madskills)
1503                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1504 #endif /* PERL_MAD */
1505             if (flags & LEX_NO_NEXT_CHUNK)
1506                 break;
1507             PL_parser->bufptr = s;
1508             CopLINE_inc(PL_curcop);
1509             got_more = lex_next_chunk(flags);
1510             CopLINE_dec(PL_curcop);
1511             s = PL_parser->bufptr;
1512             bufend = PL_parser->bufend;
1513             if (!got_more)
1514                 break;
1515             if (need_incline && PL_parser->rsfp) {
1516                 incline(s);
1517                 need_incline = 0;
1518             }
1519         } else {
1520             break;
1521         }
1522     }
1523 #ifdef PERL_MAD
1524     if (PL_madskills)
1525         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1526 #endif /* PERL_MAD */
1527     PL_parser->bufptr = s;
1528 }
1529
1530 /*
1531  * S_incline
1532  * This subroutine has nothing to do with tilting, whether at windmills
1533  * or pinball tables.  Its name is short for "increment line".  It
1534  * increments the current line number in CopLINE(PL_curcop) and checks
1535  * to see whether the line starts with a comment of the form
1536  *    # line 500 "foo.pm"
1537  * If so, it sets the current line number and file to the values in the comment.
1538  */
1539
1540 STATIC void
1541 S_incline(pTHX_ const char *s)
1542 {
1543     dVAR;
1544     const char *t;
1545     const char *n;
1546     const char *e;
1547     line_t line_num;
1548
1549     PERL_ARGS_ASSERT_INCLINE;
1550
1551     CopLINE_inc(PL_curcop);
1552     if (*s++ != '#')
1553         return;
1554     while (SPACE_OR_TAB(*s))
1555         s++;
1556     if (strnEQ(s, "line", 4))
1557         s += 4;
1558     else
1559         return;
1560     if (SPACE_OR_TAB(*s))
1561         s++;
1562     else
1563         return;
1564     while (SPACE_OR_TAB(*s))
1565         s++;
1566     if (!isDIGIT(*s))
1567         return;
1568
1569     n = s;
1570     while (isDIGIT(*s))
1571         s++;
1572     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1573         return;
1574     while (SPACE_OR_TAB(*s))
1575         s++;
1576     if (*s == '"' && (t = strchr(s+1, '"'))) {
1577         s++;
1578         e = t + 1;
1579     }
1580     else {
1581         t = s;
1582         while (!isSPACE(*t))
1583             t++;
1584         e = t;
1585     }
1586     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1587         e++;
1588     if (*e != '\n' && *e != '\0')
1589         return;         /* false alarm */
1590
1591     line_num = atoi(n)-1;
1592
1593     if (t - s > 0) {
1594         const STRLEN len = t - s;
1595         SV *const temp_sv = CopFILESV(PL_curcop);
1596         const char *cf;
1597         STRLEN tmplen;
1598
1599         if (temp_sv) {
1600             cf = SvPVX(temp_sv);
1601             tmplen = SvCUR(temp_sv);
1602         } else {
1603             cf = NULL;
1604             tmplen = 0;
1605         }
1606
1607         if (!PL_rsfp && !PL_parser->filtered) {
1608             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1609              * to *{"::_<newfilename"} */
1610             /* However, the long form of evals is only turned on by the
1611                debugger - usually they're "(eval %lu)" */
1612             char smallbuf[128];
1613             char *tmpbuf;
1614             GV **gvp;
1615             STRLEN tmplen2 = len;
1616             if (tmplen + 2 <= sizeof smallbuf)
1617                 tmpbuf = smallbuf;
1618             else
1619                 Newx(tmpbuf, tmplen + 2, char);
1620             tmpbuf[0] = '_';
1621             tmpbuf[1] = '<';
1622             memcpy(tmpbuf + 2, cf, tmplen);
1623             tmplen += 2;
1624             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1625             if (gvp) {
1626                 char *tmpbuf2;
1627                 GV *gv2;
1628
1629                 if (tmplen2 + 2 <= sizeof smallbuf)
1630                     tmpbuf2 = smallbuf;
1631                 else
1632                     Newx(tmpbuf2, tmplen2 + 2, char);
1633
1634                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1635                     /* Either they malloc'd it, or we malloc'd it,
1636                        so no prefix is present in ours.  */
1637                     tmpbuf2[0] = '_';
1638                     tmpbuf2[1] = '<';
1639                 }
1640
1641                 memcpy(tmpbuf2 + 2, s, tmplen2);
1642                 tmplen2 += 2;
1643
1644                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1645                 if (!isGV(gv2)) {
1646                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1647                     /* adjust ${"::_<newfilename"} to store the new file name */
1648                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1649                     /* The line number may differ. If that is the case,
1650                        alias the saved lines that are in the array.
1651                        Otherwise alias the whole array. */
1652                     if (CopLINE(PL_curcop) == line_num) {
1653                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1654                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1655                     }
1656                     else if (GvAV(*gvp)) {
1657                         AV * const av = GvAV(*gvp);
1658                         const I32 start = CopLINE(PL_curcop)+1;
1659                         I32 items = AvFILLp(av) - start;
1660                         if (items > 0) {
1661                             AV * const av2 = GvAVn(gv2);
1662                             SV **svp = AvARRAY(av) + start;
1663                             I32 l = (I32)line_num+1;
1664                             while (items--)
1665                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1666                         }
1667                     }
1668                 }
1669
1670                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1671             }
1672             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1673         }
1674         CopFILE_free(PL_curcop);
1675         CopFILE_setn(PL_curcop, s, len);
1676     }
1677     CopLINE_set(PL_curcop, line_num);
1678 }
1679
1680 #ifdef PERL_MAD
1681 /* skip space before PL_thistoken */
1682
1683 STATIC char *
1684 S_skipspace0(pTHX_ register char *s)
1685 {
1686     PERL_ARGS_ASSERT_SKIPSPACE0;
1687
1688     s = skipspace(s);
1689     if (!PL_madskills)
1690         return s;
1691     if (PL_skipwhite) {
1692         if (!PL_thiswhite)
1693             PL_thiswhite = newSVpvs("");
1694         sv_catsv(PL_thiswhite, PL_skipwhite);
1695         sv_free(PL_skipwhite);
1696         PL_skipwhite = 0;
1697     }
1698     PL_realtokenstart = s - SvPVX(PL_linestr);
1699     return s;
1700 }
1701
1702 /* skip space after PL_thistoken */
1703
1704 STATIC char *
1705 S_skipspace1(pTHX_ register char *s)
1706 {
1707     const char *start = s;
1708     I32 startoff = start - SvPVX(PL_linestr);
1709
1710     PERL_ARGS_ASSERT_SKIPSPACE1;
1711
1712     s = skipspace(s);
1713     if (!PL_madskills)
1714         return s;
1715     start = SvPVX(PL_linestr) + startoff;
1716     if (!PL_thistoken && PL_realtokenstart >= 0) {
1717         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1718         PL_thistoken = newSVpvn(tstart, start - tstart);
1719     }
1720     PL_realtokenstart = -1;
1721     if (PL_skipwhite) {
1722         if (!PL_nextwhite)
1723             PL_nextwhite = newSVpvs("");
1724         sv_catsv(PL_nextwhite, PL_skipwhite);
1725         sv_free(PL_skipwhite);
1726         PL_skipwhite = 0;
1727     }
1728     return s;
1729 }
1730
1731 STATIC char *
1732 S_skipspace2(pTHX_ register char *s, SV **svp)
1733 {
1734     char *start;
1735     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1736     const I32 startoff = s - SvPVX(PL_linestr);
1737
1738     PERL_ARGS_ASSERT_SKIPSPACE2;
1739
1740     s = skipspace(s);
1741     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1742     if (!PL_madskills || !svp)
1743         return s;
1744     start = SvPVX(PL_linestr) + startoff;
1745     if (!PL_thistoken && PL_realtokenstart >= 0) {
1746         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1747         PL_thistoken = newSVpvn(tstart, start - tstart);
1748         PL_realtokenstart = -1;
1749     }
1750     if (PL_skipwhite) {
1751         if (!*svp)
1752             *svp = newSVpvs("");
1753         sv_setsv(*svp, PL_skipwhite);
1754         sv_free(PL_skipwhite);
1755         PL_skipwhite = 0;
1756     }
1757
1758     return s;
1759 }
1760 #endif
1761
1762 STATIC void
1763 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1764 {
1765     AV *av = CopFILEAVx(PL_curcop);
1766     if (av) {
1767         SV * const sv = newSV_type(SVt_PVMG);
1768         if (orig_sv)
1769             sv_setsv(sv, orig_sv);
1770         else
1771             sv_setpvn(sv, buf, len);
1772         (void)SvIOK_on(sv);
1773         SvIV_set(sv, 0);
1774         av_store(av, (I32)CopLINE(PL_curcop), sv);
1775     }
1776 }
1777
1778 /*
1779  * S_skipspace
1780  * Called to gobble the appropriate amount and type of whitespace.
1781  * Skips comments as well.
1782  */
1783
1784 STATIC char *
1785 S_skipspace(pTHX_ register char *s)
1786 {
1787 #ifdef PERL_MAD
1788     char *start = s;
1789 #endif /* PERL_MAD */
1790     PERL_ARGS_ASSERT_SKIPSPACE;
1791 #ifdef PERL_MAD
1792     if (PL_skipwhite) {
1793         sv_free(PL_skipwhite);
1794         PL_skipwhite = NULL;
1795     }
1796 #endif /* PERL_MAD */
1797     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1798         while (s < PL_bufend && SPACE_OR_TAB(*s))
1799             s++;
1800     } else {
1801         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1802         PL_bufptr = s;
1803         lex_read_space(LEX_KEEP_PREVIOUS |
1804                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1805                     LEX_NO_NEXT_CHUNK : 0));
1806         s = PL_bufptr;
1807         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1808         if (PL_linestart > PL_bufptr)
1809             PL_bufptr = PL_linestart;
1810         return s;
1811     }
1812 #ifdef PERL_MAD
1813     if (PL_madskills)
1814         PL_skipwhite = newSVpvn(start, s-start);
1815 #endif /* PERL_MAD */
1816     return s;
1817 }
1818
1819 /*
1820  * S_check_uni
1821  * Check the unary operators to ensure there's no ambiguity in how they're
1822  * used.  An ambiguous piece of code would be:
1823  *     rand + 5
1824  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1825  * the +5 is its argument.
1826  */
1827
1828 STATIC void
1829 S_check_uni(pTHX)
1830 {
1831     dVAR;
1832     const char *s;
1833     const char *t;
1834
1835     if (PL_oldoldbufptr != PL_last_uni)
1836         return;
1837     while (isSPACE(*PL_last_uni))
1838         PL_last_uni++;
1839     s = PL_last_uni;
1840     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1841         s++;
1842     if ((t = strchr(s, '(')) && t < PL_bufptr)
1843         return;
1844
1845     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1846                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1847                      (int)(s - PL_last_uni), PL_last_uni);
1848 }
1849
1850 /*
1851  * LOP : macro to build a list operator.  Its behaviour has been replaced
1852  * with a subroutine, S_lop() for which LOP is just another name.
1853  */
1854
1855 #define LOP(f,x) return lop(f,x,s)
1856
1857 /*
1858  * S_lop
1859  * Build a list operator (or something that might be one).  The rules:
1860  *  - if we have a next token, then it's a list operator [why?]
1861  *  - if the next thing is an opening paren, then it's a function
1862  *  - else it's a list operator
1863  */
1864
1865 STATIC I32
1866 S_lop(pTHX_ I32 f, int x, char *s)
1867 {
1868     dVAR;
1869
1870     PERL_ARGS_ASSERT_LOP;
1871
1872     pl_yylval.ival = f;
1873     CLINE;
1874     PL_expect = x;
1875     PL_bufptr = s;
1876     PL_last_lop = PL_oldbufptr;
1877     PL_last_lop_op = (OPCODE)f;
1878 #ifdef PERL_MAD
1879     if (PL_lasttoke)
1880         goto lstop;
1881 #else
1882     if (PL_nexttoke)
1883         goto lstop;
1884 #endif
1885     if (*s == '(')
1886         return REPORT(FUNC);
1887     s = PEEKSPACE(s);
1888     if (*s == '(')
1889         return REPORT(FUNC);
1890     else {
1891         lstop:
1892         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1893             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1894         return REPORT(LSTOP);
1895     }
1896 }
1897
1898 #ifdef PERL_MAD
1899  /*
1900  * S_start_force
1901  * Sets up for an eventual force_next().  start_force(0) basically does
1902  * an unshift, while start_force(-1) does a push.  yylex removes items
1903  * on the "pop" end.
1904  */
1905
1906 STATIC void
1907 S_start_force(pTHX_ int where)
1908 {
1909     int i;
1910
1911     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1912         where = PL_lasttoke;
1913     assert(PL_curforce < 0 || PL_curforce == where);
1914     if (PL_curforce != where) {
1915         for (i = PL_lasttoke; i > where; --i) {
1916             PL_nexttoke[i] = PL_nexttoke[i-1];
1917         }
1918         PL_lasttoke++;
1919     }
1920     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1921         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1922     PL_curforce = where;
1923     if (PL_nextwhite) {
1924         if (PL_madskills)
1925             curmad('^', newSVpvs(""));
1926         CURMAD('_', PL_nextwhite);
1927     }
1928 }
1929
1930 STATIC void
1931 S_curmad(pTHX_ char slot, SV *sv)
1932 {
1933     MADPROP **where;
1934
1935     if (!sv)
1936         return;
1937     if (PL_curforce < 0)
1938         where = &PL_thismad;
1939     else
1940         where = &PL_nexttoke[PL_curforce].next_mad;
1941
1942     if (PL_faketokens)
1943         sv_setpvs(sv, "");
1944     else {
1945         if (!IN_BYTES) {
1946             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1947                 SvUTF8_on(sv);
1948             else if (PL_encoding) {
1949                 sv_recode_to_utf8(sv, PL_encoding);
1950             }
1951         }
1952     }
1953
1954     /* keep a slot open for the head of the list? */
1955     if (slot != '_' && *where && (*where)->mad_key == '^') {
1956         (*where)->mad_key = slot;
1957         sv_free(MUTABLE_SV(((*where)->mad_val)));
1958         (*where)->mad_val = (void*)sv;
1959     }
1960     else
1961         addmad(newMADsv(slot, sv), where, 0);
1962 }
1963 #else
1964 #  define start_force(where)    NOOP
1965 #  define curmad(slot, sv)      NOOP
1966 #endif
1967
1968 /*
1969  * S_force_next
1970  * When the lexer realizes it knows the next token (for instance,
1971  * it is reordering tokens for the parser) then it can call S_force_next
1972  * to know what token to return the next time the lexer is called.  Caller
1973  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1974  * and possibly PL_expect to ensure the lexer handles the token correctly.
1975  */
1976
1977 STATIC void
1978 S_force_next(pTHX_ I32 type)
1979 {
1980     dVAR;
1981 #ifdef DEBUGGING
1982     if (DEBUG_T_TEST) {
1983         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1984         tokereport(type, &NEXTVAL_NEXTTOKE);
1985     }
1986 #endif
1987 #ifdef PERL_MAD
1988     if (PL_curforce < 0)
1989         start_force(PL_lasttoke);
1990     PL_nexttoke[PL_curforce].next_type = type;
1991     if (PL_lex_state != LEX_KNOWNEXT)
1992         PL_lex_defer = PL_lex_state;
1993     PL_lex_state = LEX_KNOWNEXT;
1994     PL_lex_expect = PL_expect;
1995     PL_curforce = -1;
1996 #else
1997     PL_nexttype[PL_nexttoke] = type;
1998     PL_nexttoke++;
1999     if (PL_lex_state != LEX_KNOWNEXT) {
2000         PL_lex_defer = PL_lex_state;
2001         PL_lex_expect = PL_expect;
2002         PL_lex_state = LEX_KNOWNEXT;
2003     }
2004 #endif
2005 }
2006
2007 void
2008 Perl_yyunlex(pTHX)
2009 {
2010     int yyc = PL_parser->yychar;
2011     if (yyc != YYEMPTY) {
2012         if (yyc) {
2013             start_force(-1);
2014             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2015             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2016                 PL_lex_allbrackets--;
2017                 PL_lex_brackets--;
2018                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2019             } else if (yyc == '('/*)*/) {
2020                 PL_lex_allbrackets--;
2021                 yyc |= (2<<24);
2022             }
2023             force_next(yyc);
2024         }
2025         PL_parser->yychar = YYEMPTY;
2026     }
2027 }
2028
2029 STATIC SV *
2030 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2031 {
2032     dVAR;
2033     SV * const sv = newSVpvn_utf8(start, len,
2034                                   !IN_BYTES
2035                                   && UTF
2036                                   && !is_ascii_string((const U8*)start, len)
2037                                   && is_utf8_string((const U8*)start, len));
2038     return sv;
2039 }
2040
2041 /*
2042  * S_force_word
2043  * When the lexer knows the next thing is a word (for instance, it has
2044  * just seen -> and it knows that the next char is a word char, then
2045  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2046  * lookahead.
2047  *
2048  * Arguments:
2049  *   char *start : buffer position (must be within PL_linestr)
2050  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2051  *   int check_keyword : if true, Perl checks to make sure the word isn't
2052  *       a keyword (do this if the word is a label, e.g. goto FOO)
2053  *   int allow_pack : if true, : characters will also be allowed (require,
2054  *       use, etc. do this)
2055  *   int allow_initial_tick : used by the "sub" lexer only.
2056  */
2057
2058 STATIC char *
2059 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2060 {
2061     dVAR;
2062     register char *s;
2063     STRLEN len;
2064
2065     PERL_ARGS_ASSERT_FORCE_WORD;
2066
2067     start = SKIPSPACE1(start);
2068     s = start;
2069     if (isIDFIRST_lazy_if(s,UTF) ||
2070         (allow_pack && *s == ':') ||
2071         (allow_initial_tick && *s == '\'') )
2072     {
2073         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2074         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2075             return start;
2076         start_force(PL_curforce);
2077         if (PL_madskills)
2078             curmad('X', newSVpvn(start,s-start));
2079         if (token == METHOD) {
2080             s = SKIPSPACE1(s);
2081             if (*s == '(')
2082                 PL_expect = XTERM;
2083             else {
2084                 PL_expect = XOPERATOR;
2085             }
2086         }
2087         if (PL_madskills)
2088             curmad('g', newSVpvs( "forced" ));
2089         NEXTVAL_NEXTTOKE.opval
2090             = (OP*)newSVOP(OP_CONST,0,
2091                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2092         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2093         force_next(token);
2094     }
2095     return s;
2096 }
2097
2098 /*
2099  * S_force_ident
2100  * Called when the lexer wants $foo *foo &foo etc, but the program
2101  * text only contains the "foo" portion.  The first argument is a pointer
2102  * to the "foo", and the second argument is the type symbol to prefix.
2103  * Forces the next token to be a "WORD".
2104  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2105  */
2106
2107 STATIC void
2108 S_force_ident(pTHX_ register const char *s, int kind)
2109 {
2110     dVAR;
2111
2112     PERL_ARGS_ASSERT_FORCE_IDENT;
2113
2114     if (*s) {
2115         const STRLEN len = strlen(s);
2116         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2117                                                                 UTF ? SVf_UTF8 : 0));
2118         start_force(PL_curforce);
2119         NEXTVAL_NEXTTOKE.opval = o;
2120         force_next(WORD);
2121         if (kind) {
2122             o->op_private = OPpCONST_ENTERED;
2123             /* XXX see note in pp_entereval() for why we forgo typo
2124                warnings if the symbol must be introduced in an eval.
2125                GSAR 96-10-12 */
2126             gv_fetchpvn_flags(s, len,
2127                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2128                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2129                               kind == '$' ? SVt_PV :
2130                               kind == '@' ? SVt_PVAV :
2131                               kind == '%' ? SVt_PVHV :
2132                               SVt_PVGV
2133                               );
2134         }
2135     }
2136 }
2137
2138 NV
2139 Perl_str_to_version(pTHX_ SV *sv)
2140 {
2141     NV retval = 0.0;
2142     NV nshift = 1.0;
2143     STRLEN len;
2144     const char *start = SvPV_const(sv,len);
2145     const char * const end = start + len;
2146     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2147
2148     PERL_ARGS_ASSERT_STR_TO_VERSION;
2149
2150     while (start < end) {
2151         STRLEN skip;
2152         UV n;
2153         if (utf)
2154             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2155         else {
2156             n = *(U8*)start;
2157             skip = 1;
2158         }
2159         retval += ((NV)n)/nshift;
2160         start += skip;
2161         nshift *= 1000;
2162     }
2163     return retval;
2164 }
2165
2166 /*
2167  * S_force_version
2168  * Forces the next token to be a version number.
2169  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2170  * and if "guessing" is TRUE, then no new token is created (and the caller
2171  * must use an alternative parsing method).
2172  */
2173
2174 STATIC char *
2175 S_force_version(pTHX_ char *s, int guessing)
2176 {
2177     dVAR;
2178     OP *version = NULL;
2179     char *d;
2180 #ifdef PERL_MAD
2181     I32 startoff = s - SvPVX(PL_linestr);
2182 #endif
2183
2184     PERL_ARGS_ASSERT_FORCE_VERSION;
2185
2186     s = SKIPSPACE1(s);
2187
2188     d = s;
2189     if (*d == 'v')
2190         d++;
2191     if (isDIGIT(*d)) {
2192         while (isDIGIT(*d) || *d == '_' || *d == '.')
2193             d++;
2194 #ifdef PERL_MAD
2195         if (PL_madskills) {
2196             start_force(PL_curforce);
2197             curmad('X', newSVpvn(s,d-s));
2198         }
2199 #endif
2200         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2201             SV *ver;
2202 #ifdef USE_LOCALE_NUMERIC
2203             char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2204             setlocale(LC_NUMERIC, "C");
2205 #endif
2206             s = scan_num(s, &pl_yylval);
2207 #ifdef USE_LOCALE_NUMERIC
2208             setlocale(LC_NUMERIC, loc);
2209             Safefree(loc);
2210 #endif
2211             version = pl_yylval.opval;
2212             ver = cSVOPx(version)->op_sv;
2213             if (SvPOK(ver) && !SvNIOK(ver)) {
2214                 SvUPGRADE(ver, SVt_PVNV);
2215                 SvNV_set(ver, str_to_version(ver));
2216                 SvNOK_on(ver);          /* hint that it is a version */
2217             }
2218         }
2219         else if (guessing) {
2220 #ifdef PERL_MAD
2221             if (PL_madskills) {
2222                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2223                 PL_nextwhite = 0;
2224                 s = SvPVX(PL_linestr) + startoff;
2225             }
2226 #endif
2227             return s;
2228         }
2229     }
2230
2231 #ifdef PERL_MAD
2232     if (PL_madskills && !version) {
2233         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2234         PL_nextwhite = 0;
2235         s = SvPVX(PL_linestr) + startoff;
2236     }
2237 #endif
2238     /* NOTE: The parser sees the package name and the VERSION swapped */
2239     start_force(PL_curforce);
2240     NEXTVAL_NEXTTOKE.opval = version;
2241     force_next(WORD);
2242
2243     return s;
2244 }
2245
2246 /*
2247  * S_force_strict_version
2248  * Forces the next token to be a version number using strict syntax rules.
2249  */
2250
2251 STATIC char *
2252 S_force_strict_version(pTHX_ char *s)
2253 {
2254     dVAR;
2255     OP *version = NULL;
2256 #ifdef PERL_MAD
2257     I32 startoff = s - SvPVX(PL_linestr);
2258 #endif
2259     const char *errstr = NULL;
2260
2261     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2262
2263     while (isSPACE(*s)) /* leading whitespace */
2264         s++;
2265
2266     if (is_STRICT_VERSION(s,&errstr)) {
2267         SV *ver = newSV(0);
2268         s = (char *)scan_version(s, ver, 0);
2269         version = newSVOP(OP_CONST, 0, ver);
2270     }
2271     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2272             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2273     {
2274         PL_bufptr = s;
2275         if (errstr)
2276             yyerror(errstr); /* version required */
2277         return s;
2278     }
2279
2280 #ifdef PERL_MAD
2281     if (PL_madskills && !version) {
2282         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2283         PL_nextwhite = 0;
2284         s = SvPVX(PL_linestr) + startoff;
2285     }
2286 #endif
2287     /* NOTE: The parser sees the package name and the VERSION swapped */
2288     start_force(PL_curforce);
2289     NEXTVAL_NEXTTOKE.opval = version;
2290     force_next(WORD);
2291
2292     return s;
2293 }
2294
2295 /*
2296  * S_tokeq
2297  * Tokenize a quoted string passed in as an SV.  It finds the next
2298  * chunk, up to end of string or a backslash.  It may make a new
2299  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2300  * turns \\ into \.
2301  */
2302
2303 STATIC SV *
2304 S_tokeq(pTHX_ SV *sv)
2305 {
2306     dVAR;
2307     register char *s;
2308     register char *send;
2309     register char *d;
2310     STRLEN len = 0;
2311     SV *pv = sv;
2312
2313     PERL_ARGS_ASSERT_TOKEQ;
2314
2315     if (!SvLEN(sv))
2316         goto finish;
2317
2318     s = SvPV_force(sv, len);
2319     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2320         goto finish;
2321     send = s + len;
2322     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2323     while (s < send && !(*s == '\\' && s[1] == '\\'))
2324         s++;
2325     if (s == send)
2326         goto finish;
2327     d = s;
2328     if ( PL_hints & HINT_NEW_STRING ) {
2329         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2330     }
2331     while (s < send) {
2332         if (*s == '\\') {
2333             if (s + 1 < send && (s[1] == '\\'))
2334                 s++;            /* all that, just for this */
2335         }
2336         *d++ = *s++;
2337     }
2338     *d = '\0';
2339     SvCUR_set(sv, d - SvPVX_const(sv));
2340   finish:
2341     if ( PL_hints & HINT_NEW_STRING )
2342        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2343     return sv;
2344 }
2345
2346 /*
2347  * Now come three functions related to double-quote context,
2348  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2349  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2350  * interact with PL_lex_state, and create fake ( ... ) argument lists
2351  * to handle functions and concatenation.
2352  * They assume that whoever calls them will be setting up a fake
2353  * join call, because each subthing puts a ',' after it.  This lets
2354  *   "lower \luPpEr"
2355  * become
2356  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2357  *
2358  * (I'm not sure whether the spurious commas at the end of lcfirst's
2359  * arguments and join's arguments are created or not).
2360  */
2361
2362 /*
2363  * S_sublex_start
2364  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2365  *
2366  * Pattern matching will set PL_lex_op to the pattern-matching op to
2367  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2368  *
2369  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2370  *
2371  * Everything else becomes a FUNC.
2372  *
2373  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2374  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2375  * call to S_sublex_push().
2376  */
2377
2378 STATIC I32
2379 S_sublex_start(pTHX)
2380 {
2381     dVAR;
2382     register const I32 op_type = pl_yylval.ival;
2383
2384     if (op_type == OP_NULL) {
2385         pl_yylval.opval = PL_lex_op;
2386         PL_lex_op = NULL;
2387         return THING;
2388     }
2389     if (op_type == OP_CONST || op_type == OP_READLINE) {
2390         SV *sv = tokeq(PL_lex_stuff);
2391
2392         if (SvTYPE(sv) == SVt_PVIV) {
2393             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2394             STRLEN len;
2395             const char * const p = SvPV_const(sv, len);
2396             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2397             SvREFCNT_dec(sv);
2398             sv = nsv;
2399         }
2400         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2401         PL_lex_stuff = NULL;
2402         /* Allow <FH> // "foo" */
2403         if (op_type == OP_READLINE)
2404             PL_expect = XTERMORDORDOR;
2405         return THING;
2406     }
2407     else if (op_type == OP_BACKTICK && PL_lex_op) {
2408         /* readpipe() vas overriden */
2409         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2410         pl_yylval.opval = PL_lex_op;
2411         PL_lex_op = NULL;
2412         PL_lex_stuff = NULL;
2413         return THING;
2414     }
2415
2416     PL_sublex_info.super_state = PL_lex_state;
2417     PL_sublex_info.sub_inwhat = (U16)op_type;
2418     PL_sublex_info.sub_op = PL_lex_op;
2419     PL_lex_state = LEX_INTERPPUSH;
2420
2421     PL_expect = XTERM;
2422     if (PL_lex_op) {
2423         pl_yylval.opval = PL_lex_op;
2424         PL_lex_op = NULL;
2425         return PMFUNC;
2426     }
2427     else
2428         return FUNC;
2429 }
2430
2431 /*
2432  * S_sublex_push
2433  * Create a new scope to save the lexing state.  The scope will be
2434  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2435  * to the uc, lc, etc. found before.
2436  * Sets PL_lex_state to LEX_INTERPCONCAT.
2437  */
2438
2439 STATIC I32
2440 S_sublex_push(pTHX)
2441 {
2442     dVAR;
2443     ENTER;
2444
2445     PL_lex_state = PL_sublex_info.super_state;
2446     SAVEBOOL(PL_lex_dojoin);
2447     SAVEI32(PL_lex_brackets);
2448     SAVEI32(PL_lex_allbrackets);
2449     SAVEI8(PL_lex_fakeeof);
2450     SAVEI32(PL_lex_casemods);
2451     SAVEI32(PL_lex_starts);
2452     SAVEI8(PL_lex_state);
2453     SAVEVPTR(PL_lex_inpat);
2454     SAVEI16(PL_lex_inwhat);
2455     SAVECOPLINE(PL_curcop);
2456     SAVEPPTR(PL_bufptr);
2457     SAVEPPTR(PL_bufend);
2458     SAVEPPTR(PL_oldbufptr);
2459     SAVEPPTR(PL_oldoldbufptr);
2460     SAVEPPTR(PL_last_lop);
2461     SAVEPPTR(PL_last_uni);
2462     SAVEPPTR(PL_linestart);
2463     SAVESPTR(PL_linestr);
2464     SAVEGENERICPV(PL_lex_brackstack);
2465     SAVEGENERICPV(PL_lex_casestack);
2466
2467     PL_linestr = PL_lex_stuff;
2468     PL_lex_stuff = NULL;
2469
2470     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2471         = SvPVX(PL_linestr);
2472     PL_bufend += SvCUR(PL_linestr);
2473     PL_last_lop = PL_last_uni = NULL;
2474     SAVEFREESV(PL_linestr);
2475
2476     PL_lex_dojoin = FALSE;
2477     PL_lex_brackets = 0;
2478     PL_lex_allbrackets = 0;
2479     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2480     Newx(PL_lex_brackstack, 120, char);
2481     Newx(PL_lex_casestack, 12, char);
2482     PL_lex_casemods = 0;
2483     *PL_lex_casestack = '\0';
2484     PL_lex_starts = 0;
2485     PL_lex_state = LEX_INTERPCONCAT;
2486     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2487
2488     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2489     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2490     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2491         PL_lex_inpat = PL_sublex_info.sub_op;
2492     else
2493         PL_lex_inpat = NULL;
2494
2495     return '(';
2496 }
2497
2498 /*
2499  * S_sublex_done
2500  * Restores lexer state after a S_sublex_push.
2501  */
2502
2503 STATIC I32
2504 S_sublex_done(pTHX)
2505 {
2506     dVAR;
2507     if (!PL_lex_starts++) {
2508         SV * const sv = newSVpvs("");
2509         if (SvUTF8(PL_linestr))
2510             SvUTF8_on(sv);
2511         PL_expect = XOPERATOR;
2512         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2513         return THING;
2514     }
2515
2516     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2517         PL_lex_state = LEX_INTERPCASEMOD;
2518         return yylex();
2519     }
2520
2521     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2522     assert(PL_lex_inwhat != OP_TRANSR);
2523     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2524         PL_linestr = PL_lex_repl;
2525         PL_lex_inpat = 0;
2526         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2527         PL_bufend += SvCUR(PL_linestr);
2528         PL_last_lop = PL_last_uni = NULL;
2529         SAVEFREESV(PL_linestr);
2530         PL_lex_dojoin = FALSE;
2531         PL_lex_brackets = 0;
2532         PL_lex_allbrackets = 0;
2533         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2534         PL_lex_casemods = 0;
2535         *PL_lex_casestack = '\0';
2536         PL_lex_starts = 0;
2537         if (SvEVALED(PL_lex_repl)) {
2538             PL_lex_state = LEX_INTERPNORMAL;
2539             PL_lex_starts++;
2540             /*  we don't clear PL_lex_repl here, so that we can check later
2541                 whether this is an evalled subst; that means we rely on the
2542                 logic to ensure sublex_done() is called again only via the
2543                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2544         }
2545         else {
2546             PL_lex_state = LEX_INTERPCONCAT;
2547             PL_lex_repl = NULL;
2548         }
2549         return ',';
2550     }
2551     else {
2552 #ifdef PERL_MAD
2553         if (PL_madskills) {
2554             if (PL_thiswhite) {
2555                 if (!PL_endwhite)
2556                     PL_endwhite = newSVpvs("");
2557                 sv_catsv(PL_endwhite, PL_thiswhite);
2558                 PL_thiswhite = 0;
2559             }
2560             if (PL_thistoken)
2561                 sv_setpvs(PL_thistoken,"");
2562             else
2563                 PL_realtokenstart = -1;
2564         }
2565 #endif
2566         LEAVE;
2567         PL_bufend = SvPVX(PL_linestr);
2568         PL_bufend += SvCUR(PL_linestr);
2569         PL_expect = XOPERATOR;
2570         PL_sublex_info.sub_inwhat = 0;
2571         return ')';
2572     }
2573 }
2574
2575 /*
2576   scan_const
2577
2578   Extracts a pattern, double-quoted string, or transliteration.  This
2579   is terrifying code.
2580
2581   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2582   processing a pattern (PL_lex_inpat is true), a transliteration
2583   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2584
2585   Returns a pointer to the character scanned up to. If this is
2586   advanced from the start pointer supplied (i.e. if anything was
2587   successfully parsed), will leave an OP for the substring scanned
2588   in pl_yylval. Caller must intuit reason for not parsing further
2589   by looking at the next characters herself.
2590
2591   In patterns:
2592     backslashes:
2593       constants: \N{NAME} only
2594       case and quoting: \U \Q \E
2595     stops on @ and $, but not for $ as tail anchor
2596
2597   In transliterations:
2598     characters are VERY literal, except for - not at the start or end
2599     of the string, which indicates a range. If the range is in bytes,
2600     scan_const expands the range to the full set of intermediate
2601     characters. If the range is in utf8, the hyphen is replaced with
2602     a certain range mark which will be handled by pmtrans() in op.c.
2603
2604   In double-quoted strings:
2605     backslashes:
2606       double-quoted style: \r and \n
2607       constants: \x31, etc.
2608       deprecated backrefs: \1 (in substitution replacements)
2609       case and quoting: \U \Q \E
2610     stops on @ and $
2611
2612   scan_const does *not* construct ops to handle interpolated strings.
2613   It stops processing as soon as it finds an embedded $ or @ variable
2614   and leaves it to the caller to work out what's going on.
2615
2616   embedded arrays (whether in pattern or not) could be:
2617       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2618
2619   $ in double-quoted strings must be the symbol of an embedded scalar.
2620
2621   $ in pattern could be $foo or could be tail anchor.  Assumption:
2622   it's a tail anchor if $ is the last thing in the string, or if it's
2623   followed by one of "()| \r\n\t"
2624
2625   \1 (backreferences) are turned into $1
2626
2627   The structure of the code is
2628       while (there's a character to process) {
2629           handle transliteration ranges
2630           skip regexp comments /(?#comment)/ and codes /(?{code})/
2631           skip #-initiated comments in //x patterns
2632           check for embedded arrays
2633           check for embedded scalars
2634           if (backslash) {
2635               deprecate \1 in substitution replacements
2636               handle string-changing backslashes \l \U \Q \E, etc.
2637               switch (what was escaped) {
2638                   handle \- in a transliteration (becomes a literal -)
2639                   if a pattern and not \N{, go treat as regular character
2640                   handle \132 (octal characters)
2641                   handle \x15 and \x{1234} (hex characters)
2642                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2643                   handle \cV (control characters)
2644                   handle printf-style backslashes (\f, \r, \n, etc)
2645               } (end switch)
2646               continue
2647           } (end if backslash)
2648           handle regular character
2649     } (end while character to read)
2650
2651 */
2652
2653 STATIC char *
2654 S_scan_const(pTHX_ char *start)
2655 {
2656     dVAR;
2657     register char *send = PL_bufend;            /* end of the constant */
2658     SV *sv = newSV(send - start);               /* sv for the constant.  See
2659                                                    note below on sizing. */
2660     register char *s = start;                   /* start of the constant */
2661     register char *d = SvPVX(sv);               /* destination for copies */
2662     bool dorange = FALSE;                       /* are we in a translit range? */
2663     bool didrange = FALSE;                      /* did we just finish a range? */
2664     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2665     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2666                                                    to be UTF8?  But, this can
2667                                                    show as true when the source
2668                                                    isn't utf8, as for example
2669                                                    when it is entirely composed
2670                                                    of hex constants */
2671
2672     /* Note on sizing:  The scanned constant is placed into sv, which is
2673      * initialized by newSV() assuming one byte of output for every byte of
2674      * input.  This routine expects newSV() to allocate an extra byte for a
2675      * trailing NUL, which this routine will append if it gets to the end of
2676      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2677      * CAPITAL LETTER A}), or more output than input if the constant ends up
2678      * recoded to utf8, but each time a construct is found that might increase
2679      * the needed size, SvGROW() is called.  Its size parameter each time is
2680      * based on the best guess estimate at the time, namely the length used so
2681      * far, plus the length the current construct will occupy, plus room for
2682      * the trailing NUL, plus one byte for every input byte still unscanned */
2683
2684     UV uv;
2685 #ifdef EBCDIC
2686     UV literal_endpoint = 0;
2687     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2688 #endif
2689
2690     PERL_ARGS_ASSERT_SCAN_CONST;
2691
2692     assert(PL_lex_inwhat != OP_TRANSR);
2693     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2694         /* If we are doing a trans and we know we want UTF8 set expectation */
2695         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2696         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2697     }
2698
2699
2700     while (s < send || dorange) {
2701
2702         /* get transliterations out of the way (they're most literal) */
2703         if (PL_lex_inwhat == OP_TRANS) {
2704             /* expand a range A-Z to the full set of characters.  AIE! */
2705             if (dorange) {
2706                 I32 i;                          /* current expanded character */
2707                 I32 min;                        /* first character in range */
2708                 I32 max;                        /* last character in range */
2709
2710 #ifdef EBCDIC
2711                 UV uvmax = 0;
2712 #endif
2713
2714                 if (has_utf8
2715 #ifdef EBCDIC
2716                     && !native_range
2717 #endif
2718                     ) {
2719                     char * const c = (char*)utf8_hop((U8*)d, -1);
2720                     char *e = d++;
2721                     while (e-- > c)
2722                         *(e + 1) = *e;
2723                     *c = (char)UTF_TO_NATIVE(0xff);
2724                     /* mark the range as done, and continue */
2725                     dorange = FALSE;
2726                     didrange = TRUE;
2727                     continue;
2728                 }
2729
2730                 i = d - SvPVX_const(sv);                /* remember current offset */
2731 #ifdef EBCDIC
2732                 SvGROW(sv,
2733                        SvLEN(sv) + (has_utf8 ?
2734                                     (512 - UTF_CONTINUATION_MARK +
2735                                      UNISKIP(0x100))
2736                                     : 256));
2737                 /* How many two-byte within 0..255: 128 in UTF-8,
2738                  * 96 in UTF-8-mod. */
2739 #else
2740                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2741 #endif
2742                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2743 #ifdef EBCDIC
2744                 if (has_utf8) {
2745                     int j;
2746                     for (j = 0; j <= 1; j++) {
2747                         char * const c = (char*)utf8_hop((U8*)d, -1);
2748                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2749                         if (j)
2750                             min = (U8)uv;
2751                         else if (uv < 256)
2752                             max = (U8)uv;
2753                         else {
2754                             max = (U8)0xff; /* only to \xff */
2755                             uvmax = uv; /* \x{100} to uvmax */
2756                         }
2757                         d = c; /* eat endpoint chars */
2758                      }
2759                 }
2760                else {
2761 #endif
2762                    d -= 2;              /* eat the first char and the - */
2763                    min = (U8)*d;        /* first char in range */
2764                    max = (U8)d[1];      /* last char in range  */
2765 #ifdef EBCDIC
2766                }
2767 #endif
2768
2769                 if (min > max) {
2770                     Perl_croak(aTHX_
2771                                "Invalid range \"%c-%c\" in transliteration operator",
2772                                (char)min, (char)max);
2773                 }
2774
2775 #ifdef EBCDIC
2776                 if (literal_endpoint == 2 &&
2777                     ((isLOWER(min) && isLOWER(max)) ||
2778                      (isUPPER(min) && isUPPER(max)))) {
2779                     if (isLOWER(min)) {
2780                         for (i = min; i <= max; i++)
2781                             if (isLOWER(i))
2782                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2783                     } else {
2784                         for (i = min; i <= max; i++)
2785                             if (isUPPER(i))
2786                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2787                     }
2788                 }
2789                 else
2790 #endif
2791                     for (i = min; i <= max; i++)
2792 #ifdef EBCDIC
2793                         if (has_utf8) {
2794                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2795                             if (UNI_IS_INVARIANT(ch))
2796                                 *d++ = (U8)i;
2797                             else {
2798                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2799                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2800                             }
2801                         }
2802                         else
2803 #endif
2804                             *d++ = (char)i;
2805
2806 #ifdef EBCDIC
2807                 if (uvmax) {
2808                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2809                     if (uvmax > 0x101)
2810                         *d++ = (char)UTF_TO_NATIVE(0xff);
2811                     if (uvmax > 0x100)
2812                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2813                 }
2814 #endif
2815
2816                 /* mark the range as done, and continue */
2817                 dorange = FALSE;
2818                 didrange = TRUE;
2819 #ifdef EBCDIC
2820                 literal_endpoint = 0;
2821 #endif
2822                 continue;
2823             }
2824
2825             /* range begins (ignore - as first or last char) */
2826             else if (*s == '-' && s+1 < send  && s != start) {
2827                 if (didrange) {
2828                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2829                 }
2830                 if (has_utf8
2831 #ifdef EBCDIC
2832                     && !native_range
2833 #endif
2834                     ) {
2835                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2836                     s++;
2837                     continue;
2838                 }
2839                 dorange = TRUE;
2840                 s++;
2841             }
2842             else {
2843                 didrange = FALSE;
2844 #ifdef EBCDIC
2845                 literal_endpoint = 0;
2846                 native_range = TRUE;
2847 #endif
2848             }
2849         }
2850
2851         /* if we get here, we're not doing a transliteration */
2852
2853         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2854            except for the last char, which will be done separately. */
2855         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2856             if (s[2] == '#') {
2857                 while (s+1 < send && *s != ')')
2858                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2859             }
2860             else if (s[2] == '{' /* This should match regcomp.c */
2861                     || (s[2] == '?' && s[3] == '{'))
2862             {
2863                 I32 count = 1;
2864                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2865                 char c;
2866
2867                 while (count && (c = *regparse)) {
2868                     if (c == '\\' && regparse[1])
2869                         regparse++;
2870                     else if (c == '{')
2871                         count++;
2872                     else if (c == '}')
2873                         count--;
2874                     regparse++;
2875                 }
2876                 if (*regparse != ')')
2877                     regparse--;         /* Leave one char for continuation. */
2878                 while (s < regparse)
2879                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2880             }
2881         }
2882
2883         /* likewise skip #-initiated comments in //x patterns */
2884         else if (*s == '#' && PL_lex_inpat &&
2885           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2886             while (s+1 < send && *s != '\n')
2887                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2888         }
2889
2890         /* check for embedded arrays
2891            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2892            */
2893         else if (*s == '@' && s[1]) {
2894             if (isALNUM_lazy_if(s+1,UTF))
2895                 break;
2896             if (strchr(":'{$", s[1]))
2897                 break;
2898             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2899                 break; /* in regexp, neither @+ nor @- are interpolated */
2900         }
2901
2902         /* check for embedded scalars.  only stop if we're sure it's a
2903            variable.
2904         */
2905         else if (*s == '$') {
2906             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2907                 break;
2908             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2909                 if (s[1] == '\\') {
2910                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2911                                    "Possible unintended interpolation of $\\ in regex");
2912                 }
2913                 break;          /* in regexp, $ might be tail anchor */
2914             }
2915         }
2916
2917         /* End of else if chain - OP_TRANS rejoin rest */
2918
2919         /* backslashes */
2920         if (*s == '\\' && s+1 < send) {
2921             char* e;    /* Can be used for ending '}', etc. */
2922
2923             s++;
2924
2925             /* warn on \1 - \9 in substitution replacements, but note that \11
2926              * is an octal; and \19 is \1 followed by '9' */
2927             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2928                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2929             {
2930                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2931                 *--s = '$';
2932                 break;
2933             }
2934
2935             /* string-change backslash escapes */
2936             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
2937                 --s;
2938                 break;
2939             }
2940             /* In a pattern, process \N, but skip any other backslash escapes.
2941              * This is because we don't want to translate an escape sequence
2942              * into a meta symbol and have the regex compiler use the meta
2943              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2944              * in spite of this, we do have to process \N here while the proper
2945              * charnames handler is in scope.  See bugs #56444 and #62056.
2946              * There is a complication because \N in a pattern may also stand
2947              * for 'match a non-nl', and not mean a charname, in which case its
2948              * processing should be deferred to the regex compiler.  To be a
2949              * charname it must be followed immediately by a '{', and not look
2950              * like \N followed by a curly quantifier, i.e., not something like
2951              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2952              * quantifier */
2953             else if (PL_lex_inpat
2954                     && (*s != 'N'
2955                         || s[1] != '{'
2956                         || regcurly(s + 1)))
2957             {
2958                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2959                 goto default_action;
2960             }
2961
2962             switch (*s) {
2963
2964             /* quoted - in transliterations */
2965             case '-':
2966                 if (PL_lex_inwhat == OP_TRANS) {
2967                     *d++ = *s++;
2968                     continue;
2969                 }
2970                 /* FALL THROUGH */
2971             default:
2972                 {
2973                     if ((isALPHA(*s) || isDIGIT(*s)))
2974                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2975                                        "Unrecognized escape \\%c passed through",
2976                                        *s);
2977                     /* default action is to copy the quoted character */
2978                     goto default_action;
2979                 }
2980
2981             /* eg. \132 indicates the octal constant 0132 */
2982             case '0': case '1': case '2': case '3':
2983             case '4': case '5': case '6': case '7':
2984                 {
2985                     I32 flags = 0;
2986                     STRLEN len = 3;
2987                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2988                     s += len;
2989                 }
2990                 goto NUM_ESCAPE_INSERT;
2991
2992             /* eg. \o{24} indicates the octal constant \024 */
2993             case 'o':
2994                 {
2995                     STRLEN len;
2996                     const char* error;
2997
2998                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2999                     s += len;
3000                     if (! valid) {
3001                         yyerror(error);
3002                         continue;
3003                     }
3004                     goto NUM_ESCAPE_INSERT;
3005                 }
3006
3007             /* eg. \x24 indicates the hex constant 0x24 */
3008             case 'x':
3009                 ++s;
3010                 if (*s == '{') {
3011                     char* const e = strchr(s, '}');
3012                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
3013                       PERL_SCAN_DISALLOW_PREFIX;
3014                     STRLEN len;
3015
3016                     ++s;
3017                     if (!e) {
3018                         yyerror("Missing right brace on \\x{}");
3019                         continue;
3020                     }
3021                     len = e - s;
3022                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3023                     s = e + 1;
3024                 }
3025                 else {
3026                     {
3027                         STRLEN len = 2;
3028                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3029                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3030                         s += len;
3031                     }
3032                 }
3033
3034               NUM_ESCAPE_INSERT:
3035                 /* Insert oct or hex escaped character.  There will always be
3036                  * enough room in sv since such escapes will be longer than any
3037                  * UTF-8 sequence they can end up as, except if they force us
3038                  * to recode the rest of the string into utf8 */
3039
3040                 /* Here uv is the ordinal of the next character being added in
3041                  * unicode (converted from native). */
3042                 if (!UNI_IS_INVARIANT(uv)) {
3043                     if (!has_utf8 && uv > 255) {
3044                         /* Might need to recode whatever we have accumulated so
3045                          * far if it contains any chars variant in utf8 or
3046                          * utf-ebcdic. */
3047
3048                         SvCUR_set(sv, d - SvPVX_const(sv));
3049                         SvPOK_on(sv);
3050                         *d = '\0';
3051                         /* See Note on sizing above.  */
3052                         sv_utf8_upgrade_flags_grow(sv,
3053                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3054                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3055                         d = SvPVX(sv) + SvCUR(sv);
3056                         has_utf8 = TRUE;
3057                     }
3058
3059                     if (has_utf8) {
3060                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3061                         if (PL_lex_inwhat == OP_TRANS &&
3062                             PL_sublex_info.sub_op) {
3063                             PL_sublex_info.sub_op->op_private |=
3064                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3065                                              : OPpTRANS_TO_UTF);
3066                         }
3067 #ifdef EBCDIC
3068                         if (uv > 255 && !dorange)
3069                             native_range = FALSE;
3070 #endif
3071                     }
3072                     else {
3073                         *d++ = (char)uv;
3074                     }
3075                 }
3076                 else {
3077                     *d++ = (char) uv;
3078                 }
3079                 continue;
3080
3081             case 'N':
3082                 /* In a non-pattern \N must be a named character, like \N{LATIN
3083                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3084                  * mean to match a non-newline.  For non-patterns, named
3085                  * characters are converted to their string equivalents. In
3086                  * patterns, named characters are not converted to their
3087                  * ultimate forms for the same reasons that other escapes
3088                  * aren't.  Instead, they are converted to the \N{U+...} form
3089                  * to get the value from the charnames that is in effect right
3090                  * now, while preserving the fact that it was a named character
3091                  * so that the regex compiler knows this */
3092
3093                 /* This section of code doesn't generally use the
3094                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3095                  * a close examination of this macro and determined it is a
3096                  * no-op except on utfebcdic variant characters.  Every
3097                  * character generated by this that would normally need to be
3098                  * enclosed by this macro is invariant, so the macro is not
3099                  * needed, and would complicate use of copy().  XXX There are
3100                  * other parts of this file where the macro is used
3101                  * inconsistently, but are saved by it being a no-op */
3102
3103                 /* The structure of this section of code (besides checking for
3104                  * errors and upgrading to utf8) is:
3105                  *  Further disambiguate between the two meanings of \N, and if
3106                  *      not a charname, go process it elsewhere
3107                  *  If of form \N{U+...}, pass it through if a pattern;
3108                  *      otherwise convert to utf8
3109                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3110                  *  pattern; otherwise convert to utf8 */
3111
3112                 /* Here, s points to the 'N'; the test below is guaranteed to
3113                  * succeed if we are being called on a pattern as we already
3114                  * know from a test above that the next character is a '{'.
3115                  * On a non-pattern \N must mean 'named sequence, which
3116                  * requires braces */
3117                 s++;
3118                 if (*s != '{') {
3119                     yyerror("Missing braces on \\N{}");
3120                     continue;
3121                 }
3122                 s++;
3123
3124                 /* If there is no matching '}', it is an error. */
3125                 if (! (e = strchr(s, '}'))) {
3126                     if (! PL_lex_inpat) {
3127                         yyerror("Missing right brace on \\N{}");
3128                     } else {
3129                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3130                     }
3131                     continue;
3132                 }
3133
3134                 /* Here it looks like a named character */
3135
3136                 if (PL_lex_inpat) {
3137
3138                     /* XXX This block is temporary code.  \N{} implies that the
3139                      * pattern is to have Unicode semantics, and therefore
3140                      * currently has to be encoded in utf8.  By putting it in
3141                      * utf8 now, we save a whole pass in the regular expression
3142                      * compiler.  Once that code is changed so Unicode
3143                      * semantics doesn't necessarily have to be in utf8, this
3144                      * block should be removed.  However, the code that parses
3145                      * the output of this would have to be changed to not
3146                      * necessarily expect utf8 */
3147                     if (!has_utf8) {
3148                         SvCUR_set(sv, d - SvPVX_const(sv));
3149                         SvPOK_on(sv);
3150                         *d = '\0';
3151                         /* See Note on sizing above.  */
3152                         sv_utf8_upgrade_flags_grow(sv,
3153                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3154                                         /* 5 = '\N{' + cur char + NUL */
3155                                         (STRLEN)(send - s) + 5);
3156                         d = SvPVX(sv) + SvCUR(sv);
3157                         has_utf8 = TRUE;
3158                     }
3159                 }
3160
3161                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3162                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3163                                 | PERL_SCAN_DISALLOW_PREFIX;
3164                     STRLEN len;
3165
3166                     /* For \N{U+...}, the '...' is a unicode value even on
3167                      * EBCDIC machines */
3168                     s += 2;         /* Skip to next char after the 'U+' */
3169                     len = e - s;
3170                     uv = grok_hex(s, &len, &flags, NULL);
3171                     if (len == 0 || len != (STRLEN)(e - s)) {
3172                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3173                         s = e + 1;
3174                         continue;
3175                     }
3176
3177                     if (PL_lex_inpat) {
3178
3179                         /* On non-EBCDIC platforms, pass through to the regex
3180                          * compiler unchanged.  The reason we evaluated the
3181                          * number above is to make sure there wasn't a syntax
3182                          * error.  But on EBCDIC we convert to native so
3183                          * downstream code can continue to assume it's native
3184                          */
3185                         s -= 5;     /* Include the '\N{U+' */
3186 #ifdef EBCDIC
3187                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3188                                                                and the \0 */
3189                                     "\\N{U+%X}",
3190                                     (unsigned int) UNI_TO_NATIVE(uv));
3191 #else
3192                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3193                         d += e - s + 1;
3194 #endif
3195                     }
3196                     else {  /* Not a pattern: convert the hex to string */
3197
3198                          /* If destination is not in utf8, unconditionally
3199                           * recode it to be so.  This is because \N{} implies
3200                           * Unicode semantics, and scalars have to be in utf8
3201                           * to guarantee those semantics */
3202                         if (! has_utf8) {
3203                             SvCUR_set(sv, d - SvPVX_const(sv));
3204                             SvPOK_on(sv);
3205                             *d = '\0';
3206                             /* See Note on sizing above.  */
3207                             sv_utf8_upgrade_flags_grow(
3208                                         sv,
3209                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3210                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3211                             d = SvPVX(sv) + SvCUR(sv);
3212                             has_utf8 = TRUE;
3213                         }
3214
3215                         /* Add the string to the output */
3216                         if (UNI_IS_INVARIANT(uv)) {
3217                             *d++ = (char) uv;
3218                         }
3219                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3220                     }
3221                 }
3222                 else { /* Here is \N{NAME} but not \N{U+...}. */
3223
3224                     SV *res;            /* result from charnames */
3225                     const char *str;    /* the string in 'res' */
3226                     STRLEN len;         /* its length */
3227
3228                     /* Get the value for NAME */
3229                     res = newSVpvn(s, e - s);
3230                     res = new_constant( NULL, 0, "charnames",
3231                                         /* includes all of: \N{...} */
3232                                         res, NULL, s - 3, e - s + 4 );
3233
3234                     /* Most likely res will be in utf8 already since the
3235                      * standard charnames uses pack U, but a custom translator
3236                      * can leave it otherwise, so make sure.  XXX This can be
3237                      * revisited to not have charnames use utf8 for characters
3238                      * that don't need it when regexes don't have to be in utf8
3239                      * for Unicode semantics.  If doing so, remember EBCDIC */
3240                     sv_utf8_upgrade(res);
3241                     str = SvPV_const(res, len);
3242
3243                     /* Don't accept malformed input */
3244                     if (! is_utf8_string((U8 *) str, len)) {
3245                         yyerror("Malformed UTF-8 returned by \\N");
3246                     }
3247                     else if (PL_lex_inpat) {
3248
3249                         if (! len) { /* The name resolved to an empty string */
3250                             Copy("\\N{}", d, 4, char);
3251                             d += 4;
3252                         }
3253                         else {
3254                             /* In order to not lose information for the regex
3255                             * compiler, pass the result in the specially made
3256                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3257                             * the code points in hex of each character
3258                             * returned by charnames */
3259
3260                             const char *str_end = str + len;
3261                             STRLEN char_length;     /* cur char's byte length */
3262                             STRLEN output_length;   /* and the number of bytes
3263                                                        after this is translated
3264                                                        into hex digits */
3265                             const STRLEN off = d - SvPVX_const(sv);
3266
3267                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3268                              * max('U+', '.'); and 1 for NUL */
3269                             char hex_string[2 * UTF8_MAXBYTES + 5];
3270
3271                             /* Get the first character of the result. */
3272                             U32 uv = utf8n_to_uvuni((U8 *) str,
3273                                                     len,
3274                                                     &char_length,
3275                                                     UTF8_ALLOW_ANYUV);
3276
3277                             /* The call to is_utf8_string() above hopefully
3278                              * guarantees that there won't be an error.  But
3279                              * it's easy here to make sure.  The function just
3280                              * above warns and returns 0 if invalid utf8, but
3281                              * it can also return 0 if the input is validly a
3282                              * NUL. Disambiguate */
3283                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3284                                 uv = UNICODE_REPLACEMENT;
3285                             }
3286
3287                             /* Convert first code point to hex, including the
3288                              * boiler plate before it.  For all these, we
3289                              * convert to native format so that downstream code
3290                              * can continue to assume the input is native */
3291                             output_length =
3292                                 my_snprintf(hex_string, sizeof(hex_string),
3293                                             "\\N{U+%X",
3294                                             (unsigned int) UNI_TO_NATIVE(uv));
3295
3296                             /* Make sure there is enough space to hold it */
3297                             d = off + SvGROW(sv, off
3298                                                  + output_length
3299                                                  + (STRLEN)(send - e)
3300                                                  + 2);  /* '}' + NUL */
3301                             /* And output it */
3302                             Copy(hex_string, d, output_length, char);
3303                             d += output_length;
3304
3305                             /* For each subsequent character, append dot and
3306                              * its ordinal in hex */
3307                             while ((str += char_length) < str_end) {
3308                                 const STRLEN off = d - SvPVX_const(sv);
3309                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3310                                                         str_end - str,
3311                                                         &char_length,
3312                                                         UTF8_ALLOW_ANYUV);
3313                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3314                                     uv = UNICODE_REPLACEMENT;
3315                                 }
3316
3317                                 output_length =
3318                                     my_snprintf(hex_string, sizeof(hex_string),
3319                                             ".%X",
3320                                             (unsigned int) UNI_TO_NATIVE(uv));
3321
3322                                 d = off + SvGROW(sv, off
3323                                                      + output_length
3324                                                      + (STRLEN)(send - e)
3325                                                      + 2);      /* '}' +  NUL */
3326                                 Copy(hex_string, d, output_length, char);
3327                                 d += output_length;
3328                             }
3329
3330                             *d++ = '}'; /* Done.  Add the trailing brace */
3331                         }
3332                     }
3333                     else { /* Here, not in a pattern.  Convert the name to a
3334                             * string. */
3335
3336                          /* If destination is not in utf8, unconditionally
3337                           * recode it to be so.  This is because \N{} implies
3338                           * Unicode semantics, and scalars have to be in utf8
3339                           * to guarantee those semantics */
3340                         if (! has_utf8) {
3341                             SvCUR_set(sv, d - SvPVX_const(sv));
3342                             SvPOK_on(sv);
3343                             *d = '\0';
3344                             /* See Note on sizing above.  */
3345                             sv_utf8_upgrade_flags_grow(sv,
3346                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3347                                                 len + (STRLEN)(send - s) + 1);
3348                             d = SvPVX(sv) + SvCUR(sv);
3349                             has_utf8 = TRUE;
3350                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3351
3352                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3353                              * set correctly here). */
3354                             const STRLEN off = d - SvPVX_const(sv);
3355                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3356                         }
3357                         Copy(str, d, len, char);
3358                         d += len;
3359                     }
3360                     SvREFCNT_dec(res);
3361
3362                     /* Deprecate non-approved name syntax */
3363                     if (ckWARN_d(WARN_DEPRECATED)) {
3364                         bool problematic = FALSE;
3365                         char* i = s;
3366
3367                         /* For non-ut8 input, look to see that the first
3368                          * character is an alpha, then loop through the rest
3369                          * checking that each is a continuation */
3370                         if (! this_utf8) {
3371                             if (! isALPHAU(*i)) problematic = TRUE;
3372                             else for (i = s + 1; i < e; i++) {
3373                                 if (isCHARNAME_CONT(*i)) continue;
3374                                 problematic = TRUE;
3375                                 break;
3376                             }
3377                         }
3378                         else {
3379                             /* Similarly for utf8.  For invariants can check
3380                              * directly.  We accept anything above the latin1
3381                              * range because it is immaterial to Perl if it is
3382                              * correct or not, and is expensive to check.  But
3383                              * it is fairly easy in the latin1 range to convert
3384                              * the variants into a single character and check
3385                              * those */
3386                             if (UTF8_IS_INVARIANT(*i)) {
3387                                 if (! isALPHAU(*i)) problematic = TRUE;
3388                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3389                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3390                                                                             *(i+1)))))
3391                                 {
3392                                     problematic = TRUE;
3393                                 }
3394                             }
3395                             if (! problematic) for (i = s + UTF8SKIP(s);
3396                                                     i < e;
3397                                                     i+= UTF8SKIP(i))
3398                             {
3399                                 if (UTF8_IS_INVARIANT(*i)) {
3400                                     if (isCHARNAME_CONT(*i)) continue;
3401                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3402                                     continue;
3403                                 } else if (isCHARNAME_CONT(
3404                                             UNI_TO_NATIVE(
3405                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3406                                 {
3407                                     continue;
3408                                 }
3409                                 problematic = TRUE;
3410                                 break;
3411                             }
3412                         }
3413                         if (problematic) {
3414                             /* The e-i passed to the final %.*s makes sure that
3415                              * should the trailing NUL be missing that this
3416                              * print won't run off the end of the string */
3417                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3418                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3419                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3420                         }
3421                     }
3422                 } /* End \N{NAME} */
3423 #ifdef EBCDIC
3424                 if (!dorange)
3425                     native_range = FALSE; /* \N{} is defined to be Unicode */
3426 #endif
3427                 s = e + 1;  /* Point to just after the '}' */
3428                 continue;
3429
3430             /* \c is a control character */
3431             case 'c':
3432                 s++;
3433                 if (s < send) {
3434                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3435                 }
3436                 else {
3437                     yyerror("Missing control char name in \\c");
3438                 }
3439                 continue;
3440
3441             /* printf-style backslashes, formfeeds, newlines, etc */
3442             case 'b':
3443                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3444                 break;
3445             case 'n':
3446                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3447                 break;
3448             case 'r':
3449                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3450                 break;
3451             case 'f':
3452                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3453                 break;
3454             case 't':
3455                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3456                 break;
3457             case 'e':
3458                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3459                 break;
3460             case 'a':
3461                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3462                 break;
3463             } /* end switch */
3464
3465             s++;
3466             continue;
3467         } /* end if (backslash) */
3468 #ifdef EBCDIC
3469         else
3470             literal_endpoint++;
3471 #endif
3472
3473     default_action:
3474         /* If we started with encoded form, or already know we want it,
3475            then encode the next character */
3476         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3477             STRLEN len  = 1;
3478
3479
3480             /* One might think that it is wasted effort in the case of the
3481              * source being utf8 (this_utf8 == TRUE) to take the next character
3482              * in the source, convert it to an unsigned value, and then convert
3483              * it back again.  But the source has not been validated here.  The
3484              * routine that does the conversion checks for errors like
3485              * malformed utf8 */
3486
3487             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3488             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3489             if (!has_utf8) {
3490                 SvCUR_set(sv, d - SvPVX_const(sv));
3491                 SvPOK_on(sv);
3492                 *d = '\0';
3493                 /* See Note on sizing above.  */
3494                 sv_utf8_upgrade_flags_grow(sv,
3495                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3496                                         need + (STRLEN)(send - s) + 1);
3497                 d = SvPVX(sv) + SvCUR(sv);
3498                 has_utf8 = TRUE;
3499             } else if (need > len) {
3500                 /* encoded value larger than old, may need extra space (NOTE:
3501                  * SvCUR() is not set correctly here).   See Note on sizing
3502                  * above.  */
3503                 const STRLEN off = d - SvPVX_const(sv);
3504                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3505             }
3506             s += len;
3507
3508             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3509 #ifdef EBCDIC
3510             if (uv > 255 && !dorange)
3511                 native_range = FALSE;
3512 #endif
3513         }
3514         else {
3515             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3516         }
3517     } /* while loop to process each character */
3518
3519     /* terminate the string and set up the sv */
3520     *d = '\0';
3521     SvCUR_set(sv, d - SvPVX_const(sv));
3522     if (SvCUR(sv) >= SvLEN(sv))
3523         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3524                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3525
3526     SvPOK_on(sv);
3527     if (PL_encoding && !has_utf8) {
3528         sv_recode_to_utf8(sv, PL_encoding);
3529         if (SvUTF8(sv))
3530             has_utf8 = TRUE;
3531     }
3532     if (has_utf8) {
3533         SvUTF8_on(sv);
3534         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3535             PL_sublex_info.sub_op->op_private |=
3536                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3537         }
3538     }
3539
3540     /* shrink the sv if we allocated more than we used */
3541     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3542         SvPV_shrink_to_cur(sv);
3543     }
3544
3545     /* return the substring (via pl_yylval) only if we parsed anything */
3546     if (s > PL_bufptr) {
3547         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3548             const char *const key = PL_lex_inpat ? "qr" : "q";
3549             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3550             const char *type;
3551             STRLEN typelen;
3552
3553             if (PL_lex_inwhat == OP_TRANS) {
3554                 type = "tr";
3555                 typelen = 2;
3556             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3557                 type = "s";
3558                 typelen = 1;
3559             } else  {
3560                 type = "qq";
3561                 typelen = 2;
3562             }
3563
3564             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3565                                 type, typelen);
3566         }
3567         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3568     } else
3569         SvREFCNT_dec(sv);
3570     return s;
3571 }
3572
3573 /* S_intuit_more
3574  * Returns TRUE if there's more to the expression (e.g., a subscript),
3575  * FALSE otherwise.
3576  *
3577  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3578  *
3579  * ->[ and ->{ return TRUE
3580  * { and [ outside a pattern are always subscripts, so return TRUE
3581  * if we're outside a pattern and it's not { or [, then return FALSE
3582  * if we're in a pattern and the first char is a {
3583  *   {4,5} (any digits around the comma) returns FALSE
3584  * if we're in a pattern and the first char is a [
3585  *   [] returns FALSE
3586  *   [SOMETHING] has a funky algorithm to decide whether it's a
3587  *      character class or not.  It has to deal with things like
3588  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3589  * anything else returns TRUE
3590  */
3591
3592 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3593
3594 STATIC int
3595 S_intuit_more(pTHX_ register char *s)
3596 {
3597     dVAR;
3598
3599     PERL_ARGS_ASSERT_INTUIT_MORE;
3600
3601     if (PL_lex_brackets)
3602         return TRUE;
3603     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3604         return TRUE;
3605     if (*s != '{' && *s != '[')
3606         return FALSE;
3607     if (!PL_lex_inpat)
3608         return TRUE;
3609
3610     /* In a pattern, so maybe we have {n,m}. */
3611     if (*s == '{') {
3612         if (regcurly(s)) {
3613             return FALSE;
3614         }
3615         return TRUE;
3616     }
3617
3618     /* On the other hand, maybe we have a character class */
3619
3620     s++;
3621     if (*s == ']' || *s == '^')
3622         return FALSE;
3623     else {
3624         /* this is terrifying, and it works */
3625         int weight = 2;         /* let's weigh the evidence */
3626         char seen[256];
3627         unsigned char un_char = 255, last_un_char;
3628         const char * const send = strchr(s,']');
3629         char tmpbuf[sizeof PL_tokenbuf * 4];
3630
3631         if (!send)              /* has to be an expression */
3632             return TRUE;
3633
3634         Zero(seen,256,char);
3635         if (*s == '$')
3636             weight -= 3;
3637         else if (isDIGIT(*s)) {
3638             if (s[1] != ']') {
3639                 if (isDIGIT(s[1]) && s[2] == ']')
3640                     weight -= 10;
3641             }
3642             else
3643                 weight -= 100;
3644         }
3645         for (; s < send; s++) {
3646             last_un_char = un_char;
3647             un_char = (unsigned char)*s;
3648             switch (*s) {
3649             case '@':
3650             case '&':
3651             case '$':
3652                 weight -= seen[un_char] * 10;
3653                 if (isALNUM_lazy_if(s+1,UTF)) {
3654                     int len;
3655                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3656                     len = (int)strlen(tmpbuf);
3657                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3658                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3659                         weight -= 100;
3660                     else
3661                         weight -= 10;
3662                 }
3663                 else if (*s == '$' && s[1] &&
3664                   strchr("[#!%*<>()-=",s[1])) {
3665                     if (/*{*/ strchr("])} =",s[2]))
3666                         weight -= 10;
3667                     else
3668                         weight -= 1;
3669                 }
3670                 break;
3671             case '\\':
3672                 un_char = 254;
3673                 if (s[1]) {
3674                     if (strchr("wds]",s[1]))
3675                         weight += 100;
3676                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3677                         weight += 1;
3678                     else if (strchr("rnftbxcav",s[1]))
3679                         weight += 40;
3680                     else if (isDIGIT(s[1])) {
3681                         weight += 40;
3682                         while (s[1] && isDIGIT(s[1]))
3683                             s++;
3684                     }
3685                 }
3686                 else
3687                     weight += 100;
3688                 break;
3689             case '-':
3690                 if (s[1] == '\\')
3691                     weight += 50;
3692                 if (strchr("aA01! ",last_un_char))
3693                     weight += 30;
3694                 if (strchr("zZ79~",s[1]))
3695                     weight += 30;
3696                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3697                     weight -= 5;        /* cope with negative subscript */
3698                 break;
3699             default:
3700                 if (!isALNUM(last_un_char)
3701                     && !(last_un_char == '$' || last_un_char == '@'
3702                          || last_un_char == '&')
3703                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3704                     char *d = tmpbuf;
3705                     while (isALPHA(*s))
3706                         *d++ = *s++;
3707                     *d = '\0';
3708                     if (keyword(tmpbuf, d - tmpbuf, 0))
3709                         weight -= 150;
3710                 }
3711                 if (un_char == last_un_char + 1)
3712                     weight += 5;
3713                 weight -= seen[un_char];
3714                 break;
3715             }
3716             seen[un_char]++;
3717         }
3718         if (weight >= 0)        /* probably a character class */
3719             return FALSE;
3720     }
3721
3722     return TRUE;
3723 }
3724
3725 /*
3726  * S_intuit_method
3727  *
3728  * Does all the checking to disambiguate
3729  *   foo bar
3730  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3731  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3732  *
3733  * First argument is the stuff after the first token, e.g. "bar".
3734  *
3735  * Not a method if bar is a filehandle.
3736  * Not a method if foo is a subroutine prototyped to take a filehandle.
3737  * Not a method if it's really "Foo $bar"
3738  * Method if it's "foo $bar"
3739  * Not a method if it's really "print foo $bar"
3740  * Method if it's really "foo package::" (interpreted as package->foo)
3741  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3742  * Not a method if bar is a filehandle or package, but is quoted with
3743  *   =>
3744  */
3745
3746 STATIC int
3747 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3748 {
3749     dVAR;
3750     char *s = start + (*start == '$');
3751     char tmpbuf[sizeof PL_tokenbuf];
3752     STRLEN len;
3753     GV* indirgv;
3754 #ifdef PERL_MAD
3755     int soff;
3756 #endif
3757
3758     PERL_ARGS_ASSERT_INTUIT_METHOD;
3759
3760     if (gv) {
3761         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3762             return 0;
3763         if (cv) {
3764             if (SvPOK(cv)) {
3765                 const char *proto = CvPROTO(cv);
3766                 if (proto) {
3767                     if (*proto == ';')
3768                         proto++;
3769                     if (*proto == '*')
3770                         return 0;
3771                 }
3772             }
3773         } else
3774             gv = NULL;
3775     }
3776     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3777     /* start is the beginning of the possible filehandle/object,
3778      * and s is the end of it
3779      * tmpbuf is a copy of it
3780      */
3781
3782     if (*start == '$') {
3783         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3784                 isUPPER(*PL_tokenbuf))
3785             return 0;
3786 #ifdef PERL_MAD
3787         len = start - SvPVX(PL_linestr);
3788 #endif
3789         s = PEEKSPACE(s);
3790 #ifdef PERL_MAD
3791         start = SvPVX(PL_linestr) + len;
3792 #endif
3793         PL_bufptr = start;
3794         PL_expect = XREF;
3795         return *s == '(' ? FUNCMETH : METHOD;
3796     }
3797     if (!keyword(tmpbuf, len, 0)) {
3798         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3799             len -= 2;
3800             tmpbuf[len] = '\0';
3801 #ifdef PERL_MAD
3802             soff = s - SvPVX(PL_linestr);
3803 #endif
3804             goto bare_package;
3805         }
3806         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3807         if (indirgv && GvCVu(indirgv))
3808             return 0;
3809         /* filehandle or package name makes it a method */
3810         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3811 #ifdef PERL_MAD
3812             soff = s - SvPVX(PL_linestr);
3813 #endif
3814             s = PEEKSPACE(s);
3815             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3816                 return 0;       /* no assumptions -- "=>" quotes bareword */
3817       bare_package:
3818             start_force(PL_curforce);
3819             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3820                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3821             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3822             if (PL_madskills)
3823                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3824                                                             ( UTF ? SVf_UTF8 : 0 )));
3825             PL_expect = XTERM;
3826             force_next(WORD);
3827             PL_bufptr = s;
3828 #ifdef PERL_MAD
3829             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3830 #endif
3831             return *s == '(' ? FUNCMETH : METHOD;
3832         }
3833     }
3834     return 0;
3835 }
3836
3837 /* Encoded script support. filter_add() effectively inserts a
3838  * 'pre-processing' function into the current source input stream.
3839  * Note that the filter function only applies to the current source file
3840  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3841  *
3842  * The datasv parameter (which may be NULL) can be used to pass
3843  * private data to this instance of the filter. The filter function
3844  * can recover the SV using the FILTER_DATA macro and use it to
3845  * store private buffers and state information.
3846  *
3847  * The supplied datasv parameter is upgraded to a PVIO type
3848  * and the IoDIRP/IoANY field is used to store the function pointer,
3849  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3850  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3851  * private use must be set using malloc'd pointers.
3852  */
3853
3854 SV *
3855 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3856 {
3857     dVAR;
3858     if (!funcp)
3859         return NULL;
3860
3861     if (!PL_parser)
3862         return NULL;
3863
3864     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3865         Perl_croak(aTHX_ "Source filters apply only to byte streams");
3866
3867     if (!PL_rsfp_filters)
3868         PL_rsfp_filters = newAV();
3869     if (!datasv)
3870         datasv = newSV(0);
3871     SvUPGRADE(datasv, SVt_PVIO);
3872     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3873     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3874     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3875                           FPTR2DPTR(void *, IoANY(datasv)),
3876                           SvPV_nolen(datasv)));
3877     av_unshift(PL_rsfp_filters, 1);
3878     av_store(PL_rsfp_filters, 0, datasv) ;
3879     if (
3880         !PL_parser->filtered
3881      && PL_parser->lex_flags & LEX_EVALBYTES
3882      && PL_bufptr < PL_bufend
3883     ) {
3884         const char *s = PL_bufptr;
3885         while (s < PL_bufend) {
3886             if (*s == '\n') {
3887                 SV *linestr = PL_parser->linestr;
3888                 char *buf = SvPVX(linestr);
3889                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3890                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3891                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3892                 STRLEN const linestart_pos = PL_parser->linestart - buf;
3893                 STRLEN const last_uni_pos =
3894                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3895                 STRLEN const last_lop_pos =
3896                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3897                 av_push(PL_rsfp_filters, linestr);
3898                 PL_parser->linestr =
3899                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3900                 buf = SvPVX(PL_parser->linestr);
3901                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3902                 PL_parser->bufptr = buf + bufptr_pos;
3903                 PL_parser->oldbufptr = buf + oldbufptr_pos;
3904                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3905                 PL_parser->linestart = buf + linestart_pos;
3906                 if (PL_parser->last_uni)
3907                     PL_parser->last_uni = buf + last_uni_pos;
3908                 if (PL_parser->last_lop)
3909                     PL_parser->last_lop = buf + last_lop_pos;
3910                 SvLEN(linestr) = SvCUR(linestr);
3911                 SvCUR(linestr) = s-SvPVX(linestr);
3912                 PL_parser->filtered = 1;
3913                 break;
3914             }
3915             s++;
3916         }
3917     }
3918     return(datasv);
3919 }
3920
3921
3922 /* Delete most recently added instance of this filter function. */
3923 void
3924 Perl_filter_del(pTHX_ filter_t funcp)
3925 {
3926     dVAR;
3927     SV *datasv;
3928
3929     PERL_ARGS_ASSERT_FILTER_DEL;
3930
3931 #ifdef DEBUGGING
3932     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3933                           FPTR2DPTR(void*, funcp)));
3934 #endif
3935     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3936         return;
3937     /* if filter is on top of stack (usual case) just pop it off */
3938     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3939     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3940         sv_free(av_pop(PL_rsfp_filters));
3941
3942         return;
3943     }
3944     /* we need to search for the correct entry and clear it     */
3945     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3946 }
3947
3948
3949 /* Invoke the idxth filter function for the current rsfp.        */
3950 /* maxlen 0 = read one text line */
3951 I32
3952 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3953 {
3954     dVAR;
3955     filter_t funcp;
3956     SV *datasv = NULL;
3957     /* This API is bad. It should have been using unsigned int for maxlen.
3958        Not sure if we want to change the API, but if not we should sanity
3959        check the value here.  */
3960     unsigned int correct_length
3961         = maxlen < 0 ?
3962 #ifdef PERL_MICRO
3963         0x7FFFFFFF
3964 #else
3965         INT_MAX
3966 #endif
3967         : maxlen;
3968
3969     PERL_ARGS_ASSERT_FILTER_READ;
3970
3971     if (!PL_parser || !PL_rsfp_filters)
3972         return -1;
3973     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3974         /* Provide a default input filter to make life easy.    */
3975         /* Note that we append to the line. This is handy.      */
3976         DEBUG_P(PerlIO_printf(Perl_debug_log,
3977                               "filter_read %d: from rsfp\n", idx));
3978         if (correct_length) {
3979             /* Want a block */
3980             int len ;
3981             const int old_len = SvCUR(buf_sv);
3982
3983             /* ensure buf_sv is large enough */
3984             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3985             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3986                                    correct_length)) <= 0) {
3987                 if (PerlIO_error(PL_rsfp))
3988                     return -1;          /* error */
3989                 else
3990                     return 0 ;          /* end of file */
3991             }
3992             SvCUR_set(buf_sv, old_len + len) ;
3993             SvPVX(buf_sv)[old_len + len] = '\0';
3994         } else {
3995             /* Want a line */
3996             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3997                 if (PerlIO_error(PL_rsfp))
3998                     return -1;          /* error */
3999                 else
4000                     return 0 ;          /* end of file */
4001             }
4002         }
4003         return SvCUR(buf_sv);
4004     }
4005     /* Skip this filter slot if filter has been deleted */
4006     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4007         DEBUG_P(PerlIO_printf(Perl_debug_log,
4008                               "filter_read %d: skipped (filter deleted)\n",
4009                               idx));
4010         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4011     }
4012     if (SvTYPE(datasv) != SVt_PVIO) {
4013         if (correct_length) {
4014             /* Want a block */
4015             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4016             if (!remainder) return 0; /* eof */
4017             if (correct_length > remainder) correct_length = remainder;
4018             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4019             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4020         } else {
4021             /* Want a line */
4022             const char *s = SvEND(datasv);
4023             const char *send = SvPVX(datasv) + SvLEN(datasv);
4024             while (s < send) {
4025                 if (*s == '\n') {
4026                     s++;
4027                     break;
4028                 }
4029                 s++;
4030             }
4031             if (s == send) return 0; /* eof */
4032             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4033             SvCUR_set(datasv, s-SvPVX(datasv));
4034         }
4035         return SvCUR(buf_sv);
4036     }
4037     /* Get function pointer hidden within datasv        */
4038     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4039     DEBUG_P(PerlIO_printf(Perl_debug_log,
4040                           "filter_read %d: via function %p (%s)\n",
4041                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4042     /* Call function. The function is expected to       */
4043     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4044     /* Return: <0:error, =0:eof, >0:not eof             */
4045     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4046 }
4047
4048 STATIC char *
4049 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
4050 {
4051     dVAR;
4052
4053     PERL_ARGS_ASSERT_FILTER_GETS;
4054
4055 #ifdef PERL_CR_FILTER
4056     if (!PL_rsfp_filters) {
4057         filter_add(S_cr_textfilter,NULL);
4058     }
4059 #endif
4060     if (PL_rsfp_filters) {
4061         if (!append)
4062             SvCUR_set(sv, 0);   /* start with empty line        */
4063         if (FILTER_READ(0, sv, 0) > 0)
4064             return ( SvPVX(sv) ) ;
4065         else
4066             return NULL ;
4067     }
4068     else
4069         return (sv_gets(sv, PL_rsfp, append));
4070 }
4071
4072 STATIC HV *
4073 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4074 {
4075     dVAR;
4076     GV *gv;
4077
4078     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4079
4080     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4081         return PL_curstash;
4082
4083     if (len > 2 &&
4084         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4085         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4086     {
4087         return GvHV(gv);                        /* Foo:: */
4088     }
4089
4090     /* use constant CLASS => 'MyClass' */
4091     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4092     if (gv && GvCV(gv)) {
4093         SV * const sv = cv_const_sv(GvCV(gv));
4094         if (sv)
4095             pkgname = SvPV_const(sv, len);
4096     }
4097
4098     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4099 }
4100
4101 /*
4102  * S_readpipe_override
4103  * Check whether readpipe() is overridden, and generates the appropriate
4104  * optree, provided sublex_start() is called afterwards.
4105  */
4106 STATIC void
4107 S_readpipe_override(pTHX)
4108 {
4109     GV **gvp;
4110     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4111     pl_yylval.ival = OP_BACKTICK;
4112     if ((gv_readpipe
4113                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4114             ||
4115             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4116              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4117              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4118     {
4119         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4120             op_append_elem(OP_LIST,
4121                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4122                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4123     }
4124 }
4125
4126 #ifdef PERL_MAD
4127  /*
4128  * Perl_madlex
4129  * The intent of this yylex wrapper is to minimize the changes to the
4130  * tokener when we aren't interested in collecting madprops.  It remains
4131  * to be seen how successful this strategy will be...
4132  */
4133
4134 int
4135 Perl_madlex(pTHX)
4136 {
4137     int optype;
4138     char *s = PL_bufptr;
4139
4140     /* make sure PL_thiswhite is initialized */
4141     PL_thiswhite = 0;
4142     PL_thismad = 0;
4143
4144     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4145     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4146         return S_pending_ident(aTHX);
4147
4148     /* previous token ate up our whitespace? */
4149     if (!PL_lasttoke && PL_nextwhite) {
4150         PL_thiswhite = PL_nextwhite;
4151         PL_nextwhite = 0;
4152     }
4153
4154     /* isolate the token, and figure out where it is without whitespace */
4155     PL_realtokenstart = -1;
4156     PL_thistoken = 0;
4157     optype = yylex();
4158     s = PL_bufptr;
4159     assert(PL_curforce < 0);
4160
4161     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4162         if (!PL_thistoken) {
4163             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4164                 PL_thistoken = newSVpvs("");
4165             else {
4166                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4167                 PL_thistoken = newSVpvn(tstart, s - tstart);
4168             }
4169         }
4170         if (PL_thismad) /* install head */
4171             CURMAD('X', PL_thistoken);
4172     }
4173
4174     /* last whitespace of a sublex? */
4175     if (optype == ')' && PL_endwhite) {
4176         CURMAD('X', PL_endwhite);
4177     }
4178
4179     if (!PL_thismad) {
4180
4181         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4182         if (!PL_thiswhite && !PL_endwhite && !optype) {
4183             sv_free(PL_thistoken);
4184             PL_thistoken = 0;
4185             return 0;
4186         }
4187
4188         /* put off final whitespace till peg */
4189         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4190             PL_nextwhite = PL_thiswhite;
4191             PL_thiswhite = 0;
4192         }
4193         else if (PL_thisopen) {
4194             CURMAD('q', PL_thisopen);
4195             if (PL_thistoken)
4196                 sv_free(PL_thistoken);
4197             PL_thistoken = 0;
4198         }
4199         else {
4200             /* Store actual token text as madprop X */
4201             CURMAD('X', PL_thistoken);
4202         }
4203
4204         if (PL_thiswhite) {
4205             /* add preceding whitespace as madprop _ */
4206             CURMAD('_', PL_thiswhite);
4207         }
4208
4209         if (PL_thisstuff) {
4210             /* add quoted material as madprop = */
4211             CURMAD('=', PL_thisstuff);
4212         }
4213
4214         if (PL_thisclose) {
4215             /* add terminating quote as madprop Q */
4216             CURMAD('Q', PL_thisclose);
4217         }
4218     }
4219
4220     /* special processing based on optype */
4221
4222     switch (optype) {
4223
4224     /* opval doesn't need a TOKEN since it can already store mp */
4225     case WORD:
4226     case METHOD:
4227     case FUNCMETH:
4228     case THING:
4229     case PMFUNC:
4230     case PRIVATEREF:
4231     case FUNC0SUB:
4232     case UNIOPSUB:
4233     case LSTOPSUB:
4234         if (pl_yylval.opval)
4235             append_madprops(PL_thismad, pl_yylval.opval, 0);
4236         PL_thismad = 0;
4237         return optype;
4238
4239     /* fake EOF */
4240     case 0:
4241         optype = PEG;
4242         if (PL_endwhite) {
4243             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4244             PL_endwhite = 0;
4245         }
4246         break;
4247
4248     case ']':
4249     case '}':
4250         if (PL_faketokens)
4251             break;
4252         /* remember any fake bracket that lexer is about to discard */
4253         if (PL_lex_brackets == 1 &&
4254             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4255         {
4256             s = PL_bufptr;
4257             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4258                 s++;
4259             if (*s == '}') {
4260                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4261                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4262                 PL_thiswhite = 0;
4263                 PL_bufptr = s - 1;
4264                 break;  /* don't bother looking for trailing comment */
4265             }
4266             else
4267                 s = PL_bufptr;
4268         }
4269         if (optype == ']')
4270             break;
4271         /* FALLTHROUGH */
4272
4273     /* attach a trailing comment to its statement instead of next token */
4274     case ';':
4275         if (PL_faketokens)
4276             break;
4277         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4278             s = PL_bufptr;
4279             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4280                 s++;
4281             if (*s == '\n' || *s == '#') {
4282                 while (s < PL_bufend && *s != '\n')
4283                     s++;
4284                 if (s < PL_bufend)
4285                     s++;
4286                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4287                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4288                 PL_thiswhite = 0;
4289                 PL_bufptr = s;
4290             }
4291         }
4292         break;
4293
4294     /* pval */
4295     case LABEL:
4296         break;
4297
4298     /* ival */
4299     default:
4300         break;
4301
4302     }
4303
4304     /* Create new token struct.  Note: opvals return early above. */
4305     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4306     PL_thismad = 0;
4307     return optype;
4308 }
4309 #endif
4310
4311 STATIC char *
4312 S_tokenize_use(pTHX_ int is_use, char *s) {
4313     dVAR;
4314
4315     PERL_ARGS_ASSERT_TOKENIZE_USE;
4316
4317     if (PL_expect != XSTATE)
4318         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4319                     is_use ? "use" : "no"));
4320     s = SKIPSPACE1(s);
4321     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4322         s = force_version(s, TRUE);
4323         if (*s == ';' || *s == '}'
4324                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4325             start_force(PL_curforce);
4326             NEXTVAL_NEXTTOKE.opval = NULL;
4327             force_next(WORD);
4328         }
4329         else if (*s == 'v') {
4330             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4331             s = force_version(s, FALSE);
4332         }
4333     }
4334     else {
4335         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4336         s = force_version(s, FALSE);
4337     }
4338     pl_yylval.ival = is_use;
4339     return s;
4340 }
4341 #ifdef DEBUGGING
4342     static const char* const exp_name[] =
4343         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4344           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4345         };
4346 #endif
4347
4348 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4349 STATIC bool
4350 S_word_takes_any_delimeter(char *p, STRLEN len)
4351 {
4352     return (len == 1 && strchr("msyq", p[0])) ||
4353            (len == 2 && (
4354             (p[0] == 't' && p[1] == 'r') ||
4355             (p[0] == 'q' && strchr("qwxr", p[1]))));
4356 }
4357
4358 /*
4359   yylex
4360
4361   Works out what to call the token just pulled out of the input
4362   stream.  The yacc parser takes care of taking the ops we return and
4363   stitching them into a tree.
4364
4365   Returns:
4366     PRIVATEREF
4367
4368   Structure:
4369       if read an identifier
4370           if we're in a my declaration
4371               croak if they tried to say my($foo::bar)
4372               build the ops for a my() declaration
4373           if it's an access to a my() variable
4374               are we in a sort block?
4375                   croak if my($a); $a <=> $b
4376               build ops for access to a my() variable
4377           if in a dq string, and they've said @foo and we can't find @foo
4378               croak
4379           build ops for a bareword
4380       if we already built the token before, use it.
4381 */
4382
4383
4384 #ifdef __SC__
4385 #pragma segment Perl_yylex
4386 #endif
4387 int
4388 Perl_yylex(pTHX)
4389 {
4390     dVAR;
4391     register char *s = PL_bufptr;
4392     register char *d;
4393     STRLEN len;
4394     bool bof = FALSE;
4395     U32 fake_eof = 0;
4396
4397     /* orig_keyword, gvp, and gv are initialized here because
4398      * jump to the label just_a_word_zero can bypass their
4399      * initialization later. */
4400     I32 orig_keyword = 0;
4401     GV *gv = NULL;
4402     GV **gvp = NULL;
4403
4404     DEBUG_T( {
4405         SV* tmp = newSVpvs("");
4406         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4407             (IV)CopLINE(PL_curcop),
4408             lex_state_names[PL_lex_state],
4409             exp_name[PL_expect],
4410             pv_display(tmp, s, strlen(s), 0, 60));
4411         SvREFCNT_dec(tmp);
4412     } );
4413     /* check if there's an identifier for us to look at */
4414     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4415         return REPORT(S_pending_ident(aTHX));
4416
4417     /* no identifier pending identification */
4418
4419     switch (PL_lex_state) {
4420 #ifdef COMMENTARY
4421     case LEX_NORMAL:            /* Some compilers will produce faster */
4422     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4423         break;
4424 #endif
4425
4426     /* when we've already built the next token, just pull it out of the queue */
4427     case LEX_KNOWNEXT:
4428 #ifdef PERL_MAD
4429         PL_lasttoke--;
4430         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4431         if (PL_madskills) {
4432             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4433             PL_nexttoke[PL_lasttoke].next_mad = 0;
4434             if (PL_thismad && PL_thismad->mad_key == '_') {
4435                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4436                 PL_thismad->mad_val = 0;
4437                 mad_free(PL_thismad);
4438                 PL_thismad = 0;
4439             }
4440         }
4441         if (!PL_lasttoke) {
4442             PL_lex_state = PL_lex_defer;
4443             PL_expect = PL_lex_expect;
4444             PL_lex_defer = LEX_NORMAL;
4445             if (!PL_nexttoke[PL_lasttoke].next_type)
4446                 return yylex();
4447         }
4448 #else
4449         PL_nexttoke--;
4450         pl_yylval = PL_nextval[PL_nexttoke];
4451         if (!PL_nexttoke) {
4452             PL_lex_state = PL_lex_defer;
4453             PL_expect = PL_lex_expect;
4454             PL_lex_defer = LEX_NORMAL;
4455         }
4456 #endif
4457         {
4458             I32 next_type;
4459 #ifdef PERL_MAD
4460             next_type = PL_nexttoke[PL_lasttoke].next_type;
4461 #else
4462             next_type = PL_nexttype[PL_nexttoke];
4463 #endif
4464             if (next_type & (7<<24)) {
4465                 if (next_type & (1<<24)) {
4466                     if (PL_lex_brackets > 100)
4467                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4468                     PL_lex_brackstack[PL_lex_brackets++] =
4469                         (char) ((next_type >> 16) & 0xff);
4470                 }
4471                 if (next_type & (2<<24))
4472                     PL_lex_allbrackets++;
4473                 if (next_type & (4<<24))
4474                     PL_lex_allbrackets--;
4475                 next_type &= 0xffff;
4476             }
4477 #ifdef PERL_MAD
4478             /* FIXME - can these be merged?  */
4479             return next_type;
4480 #else
4481             return REPORT(next_type);
4482 #endif
4483         }
4484
4485     /* interpolated case modifiers like \L \U, including \Q and \E.
4486        when we get here, PL_bufptr is at the \
4487     */
4488     case LEX_INTERPCASEMOD:
4489 #ifdef DEBUGGING
4490         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4491             Perl_croak(aTHX_
4492                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4493                        PL_bufptr, PL_bufend, *PL_bufptr);
4494 #endif
4495         /* handle \E or end of string */
4496         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4497             /* if at a \E */
4498             if (PL_lex_casemods) {
4499                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4500                 PL_lex_casestack[PL_lex_casemods] = '\0';
4501
4502                 if (PL_bufptr != PL_bufend
4503                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4504                         || oldmod == 'F')) {
4505                     PL_bufptr += 2;
4506                     PL_lex_state = LEX_INTERPCONCAT;
4507 #ifdef PERL_MAD
4508                     if (PL_madskills)
4509                         PL_thistoken = newSVpvs("\\E");
4510 #endif
4511                 }
4512                 PL_lex_allbrackets--;
4513                 return REPORT(')');
4514             }
4515             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4516                /* Got an unpaired \E */
4517                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4518                         "Useless use of \\E");
4519             }
4520 #ifdef PERL_MAD
4521             while (PL_bufptr != PL_bufend &&
4522               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4523                 if (!PL_thiswhite)
4524                     PL_thiswhite = newSVpvs("");
4525                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4526                 PL_bufptr += 2;
4527             }
4528 #else
4529             if (PL_bufptr != PL_bufend)
4530                 PL_bufptr += 2;
4531 #endif
4532             PL_lex_state = LEX_INTERPCONCAT;
4533             return yylex();
4534         }
4535         else {
4536             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4537               "### Saw case modifier\n"); });
4538             s = PL_bufptr + 1;
4539             if (s[1] == '\\' && s[2] == 'E') {
4540 #ifdef PERL_MAD
4541                 if (!PL_thiswhite)
4542                     PL_thiswhite = newSVpvs("");
4543                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4544 #endif
4545                 PL_bufptr = s + 3;
4546                 PL_lex_state = LEX_INTERPCONCAT;
4547                 return yylex();
4548             }
4549             else {
4550                 I32 tmp;
4551                 if (!PL_madskills) /* when just compiling don't need correct */
4552                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4553                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4554                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4555                     (strchr(PL_lex_casestack, 'L')
4556                         || strchr(PL_lex_casestack, 'U')
4557                         || strchr(PL_lex_casestack, 'F'))) {
4558                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4559                     PL_lex_allbrackets--;
4560                     return REPORT(')');
4561                 }
4562                 if (PL_lex_casemods > 10)
4563                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4564                 PL_lex_casestack[PL_lex_casemods++] = *s;
4565                 PL_lex_casestack[PL_lex_casemods] = '\0';
4566                 PL_lex_state = LEX_INTERPCONCAT;
4567                 start_force(PL_curforce);
4568                 NEXTVAL_NEXTTOKE.ival = 0;
4569                 force_next((2<<24)|'(');
4570                 start_force(PL_curforce);
4571                 if (*s == 'l')
4572                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4573                 else if (*s == 'u')
4574                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4575                 else if (*s == 'L')
4576                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4577                 else if (*s == 'U')
4578                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4579                 else if (*s == 'Q')
4580                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4581                 else if (*s == 'F')
4582                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4583                 else
4584                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4585                 if (PL_madskills) {
4586                     SV* const tmpsv = newSVpvs("\\ ");
4587                     /* replace the space with the character we want to escape
4588                      */
4589                     SvPVX(tmpsv)[1] = *s;
4590                     curmad('_', tmpsv);
4591                 }
4592                 PL_bufptr = s + 1;
4593             }
4594             force_next(FUNC);
4595             if (PL_lex_starts) {
4596                 s = PL_bufptr;
4597                 PL_lex_starts = 0;
4598 #ifdef PERL_MAD
4599                 if (PL_madskills) {
4600                     if (PL_thistoken)
4601                         sv_free(PL_thistoken);
4602                     PL_thistoken = newSVpvs("");
4603                 }
4604 #endif
4605                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4606                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4607                     OPERATOR(',');
4608                 else
4609                     Aop(OP_CONCAT);
4610             }
4611             else
4612                 return yylex();
4613         }
4614
4615     case LEX_INTERPPUSH:
4616         return REPORT(sublex_push());
4617
4618     case LEX_INTERPSTART:
4619         if (PL_bufptr == PL_bufend)
4620             return REPORT(sublex_done());
4621         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4622               "### Interpolated variable\n"); });
4623         PL_expect = XTERM;
4624         PL_lex_dojoin = (*PL_bufptr == '@');
4625         PL_lex_state = LEX_INTERPNORMAL;
4626         if (PL_lex_dojoin) {
4627             start_force(PL_curforce);
4628             NEXTVAL_NEXTTOKE.ival = 0;
4629             force_next(',');
4630             start_force(PL_curforce);
4631             force_ident("\"", '$');
4632             start_force(PL_curforce);
4633             NEXTVAL_NEXTTOKE.ival = 0;
4634             force_next('$');
4635             start_force(PL_curforce);
4636             NEXTVAL_NEXTTOKE.ival = 0;
4637             force_next((2<<24)|'(');
4638             start_force(PL_curforce);
4639             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4640             force_next(FUNC);
4641         }
4642         if (PL_lex_starts++) {
4643             s = PL_bufptr;
4644 #ifdef PERL_MAD
4645             if (PL_madskills) {
4646                 if (PL_thistoken)
4647                     sv_free(PL_thistoken);
4648                 PL_thistoken = newSVpvs("");
4649             }
4650 #endif
4651             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4652             if (!PL_lex_casemods && PL_lex_inpat)
4653                 OPERATOR(',');
4654             else
4655                 Aop(OP_CONCAT);
4656         }
4657         return yylex();
4658
4659     case LEX_INTERPENDMAYBE:
4660         if (intuit_more(PL_bufptr)) {
4661             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4662             break;
4663         }
4664         /* FALL THROUGH */
4665
4666     case LEX_INTERPEND:
4667         if (PL_lex_dojoin) {
4668             PL_lex_dojoin = FALSE;
4669             PL_lex_state = LEX_INTERPCONCAT;
4670 #ifdef PERL_MAD
4671             if (PL_madskills) {
4672                 if (PL_thistoken)
4673                     sv_free(PL_thistoken);
4674                 PL_thistoken = newSVpvs("");
4675             }
4676 #endif
4677             PL_lex_allbrackets--;
4678             return REPORT(')');
4679         }
4680         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4681             && SvEVALED(PL_lex_repl))
4682         {
4683             if (PL_bufptr != PL_bufend)
4684                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4685             PL_lex_repl = NULL;
4686         }
4687         /* FALLTHROUGH */
4688     case LEX_INTERPCONCAT:
4689 #ifdef DEBUGGING
4690         if (PL_lex_brackets)
4691             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4692                        (long) PL_lex_brackets);
4693 #endif
4694         if (PL_bufptr == PL_bufend)
4695             return REPORT(sublex_done());
4696
4697         if (SvIVX(PL_linestr) == '\'') {
4698             SV *sv = newSVsv(PL_linestr);
4699             if (!PL_lex_inpat)
4700                 sv = tokeq(sv);
4701             else if ( PL_hints & HINT_NEW_RE )
4702                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4703             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4704             s = PL_bufend;
4705         }
4706         else {
4707             s = scan_const(PL_bufptr);
4708             if (*s == '\\')
4709                 PL_lex_state = LEX_INTERPCASEMOD;
4710             else
4711                 PL_lex_state = LEX_INTERPSTART;
4712         }
4713
4714         if (s != PL_bufptr) {
4715             start_force(PL_curforce);
4716             if (PL_madskills) {
4717                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4718             }
4719             NEXTVAL_NEXTTOKE = pl_yylval;
4720             PL_expect = XTERM;
4721             force_next(THING);
4722             if (PL_lex_starts++) {
4723 #ifdef PERL_MAD
4724                 if (PL_madskills) {
4725                     if (PL_thistoken)
4726                         sv_free(PL_thistoken);
4727                     PL_thistoken = newSVpvs("");
4728                 }
4729 #endif
4730                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4731                 if (!PL_lex_casemods && PL_lex_inpat)
4732                     OPERATOR(',');
4733                 else
4734                     Aop(OP_CONCAT);
4735             }
4736             else {
4737                 PL_bufptr = s;
4738                 return yylex();
4739             }
4740         }
4741
4742         return yylex();
4743     case LEX_FORMLINE:
4744         PL_lex_state = LEX_NORMAL;
4745         s = scan_formline(PL_bufptr);
4746         if (!PL_lex_formbrack)
4747             goto rightbracket;
4748         OPERATOR(';');
4749     }
4750
4751     s = PL_bufptr;
4752     PL_oldoldbufptr = PL_oldbufptr;
4753     PL_oldbufptr = s;
4754
4755   retry:
4756 #ifdef PERL_MAD
4757     if (PL_thistoken) {
4758         sv_free(PL_thistoken);
4759         PL_thistoken = 0;
4760     }
4761     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4762 #endif
4763     switch (*s) {
4764     default:
4765         if (isIDFIRST_lazy_if(s,UTF))
4766             goto keylookup;
4767         {
4768         SV *dsv = newSVpvs_flags("", SVs_TEMP);
4769         const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4770                                                     UTF8SKIP(s),
4771                                                     SVs_TEMP | SVf_UTF8),
4772                                             10, UNI_DISPLAY_ISPRINT))
4773                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4774         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4775         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4776             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4777         } else {
4778             d = PL_linestart;
4779         }
4780         *s = '\0';
4781         sv_setpv(dsv, d);
4782         if (UTF)
4783             SvUTF8_on(dsv);
4784         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
4785     }
4786     case 4:
4787     case 26:
4788         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4789     case 0:
4790 #ifdef PERL_MAD
4791         if (PL_madskills)
4792             PL_faketokens = 0;
4793 #endif
4794         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4795             PL_last_uni = 0;
4796             PL_last_lop = 0;
4797             if (PL_lex_brackets &&
4798                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4799                 yyerror((const char *)
4800                         (PL_lex_formbrack
4801                          ? "Format not terminated"
4802                          : "Missing right curly or square bracket"));
4803             }
4804             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4805                         "### Tokener got EOF\n");
4806             } );
4807             TOKEN(0);
4808         }
4809         if (s++ < PL_bufend)
4810             goto retry;                 /* ignore stray nulls */
4811         PL_last_uni = 0;
4812         PL_last_lop = 0;
4813         if (!PL_in_eval && !PL_preambled) {
4814             PL_preambled = TRUE;
4815 #ifdef PERL_MAD
4816             if (PL_madskills)
4817                 PL_faketokens = 1;
4818 #endif
4819             if (PL_perldb) {
4820                 /* Generate a string of Perl code to load the debugger.
4821                  * If PERL5DB is set, it will return the contents of that,
4822                  * otherwise a compile-time require of perl5db.pl.  */
4823
4824                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4825
4826                 if (pdb) {
4827                     sv_setpv(PL_linestr, pdb);
4828                     sv_catpvs(PL_linestr,";");
4829                 } else {
4830                     SETERRNO(0,SS_NORMAL);
4831                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4832                 }
4833             } else
4834                 sv_setpvs(PL_linestr,"");
4835             if (PL_preambleav) {
4836                 SV **svp = AvARRAY(PL_preambleav);
4837                 SV **const end = svp + AvFILLp(PL_preambleav);
4838                 while(svp <= end) {
4839                     sv_catsv(PL_linestr, *svp);
4840                     ++svp;
4841                     sv_catpvs(PL_linestr, ";");
4842                 }
4843                 sv_free(MUTABLE_SV(PL_preambleav));
4844                 PL_preambleav = NULL;
4845             }
4846             if (PL_minus_E)
4847                 sv_catpvs(PL_linestr,
4848                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4849             if (PL_minus_n || PL_minus_p) {
4850                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4851                 if (PL_minus_l)
4852                     sv_catpvs(PL_linestr,"chomp;");
4853                 if (PL_minus_a) {
4854                     if (PL_minus_F) {
4855                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4856                              || *PL_splitstr == '"')
4857                               && strchr(PL_splitstr + 1, *PL_splitstr))
4858                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4859                         else {
4860                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4861                                bytes can be used as quoting characters.  :-) */
4862                             const char *splits = PL_splitstr;
4863                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4864                             do {
4865                                 /* Need to \ \s  */
4866                                 if (*splits == '\\')
4867                                     sv_catpvn(PL_linestr, splits, 1);
4868                                 sv_catpvn(PL_linestr, splits, 1);
4869                             } while (*splits++);
4870                             /* This loop will embed the trailing NUL of
4871                                PL_linestr as the last thing it does before
4872                                terminating.  */
4873                             sv_catpvs(PL_linestr, ");");
4874                         }
4875                     }
4876                     else
4877                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4878                 }
4879             }
4880             sv_catpvs(PL_linestr, "\n");
4881             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4882             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4883             PL_last_lop = PL_last_uni = NULL;
4884             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4885                 update_debugger_info(PL_linestr, NULL, 0);
4886             goto retry;
4887         }
4888         do {
4889             fake_eof = 0;
4890             bof = PL_rsfp ? TRUE : FALSE;
4891             if (0) {
4892               fake_eof:
4893                 fake_eof = LEX_FAKE_EOF;
4894             }
4895             PL_bufptr = PL_bufend;
4896             CopLINE_inc(PL_curcop);
4897             if (!lex_next_chunk(fake_eof)) {
4898                 CopLINE_dec(PL_curcop);
4899                 s = PL_bufptr;
4900                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4901             }
4902             CopLINE_dec(PL_curcop);
4903 #ifdef PERL_MAD
4904             if (!PL_rsfp)
4905                 PL_realtokenstart = -1;
4906 #endif
4907             s = PL_bufptr;
4908             /* If it looks like the start of a BOM or raw UTF-16,
4909              * check if it in fact is. */
4910             if (bof && PL_rsfp &&
4911                      (*s == 0 ||
4912                       *(U8*)s == 0xEF ||
4913                       *(U8*)s >= 0xFE ||
4914                       s[1] == 0)) {
4915                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4916                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4917 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4918                 /* offset may include swallowed CR */
4919                 if (!bof)
4920                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4921 #endif
4922                 if (bof) {
4923                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4924                     s = swallow_bom((U8*)s);
4925                 }
4926             }
4927             if (PL_parser->in_pod) {
4928                 /* Incest with pod. */
4929 #ifdef PERL_MAD
4930                 if (PL_madskills)
4931                     sv_catsv(PL_thiswhite, PL_linestr);
4932 #endif
4933                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4934                     sv_setpvs(PL_linestr, "");
4935                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4936                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4937                     PL_last_lop = PL_last_uni = NULL;
4938                     PL_parser->in_pod = 0;
4939                 }
4940             }
4941             if (PL_rsfp || PL_parser->filtered)
4942                 incline(s);
4943         } while (PL_parser->in_pod);
4944         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4945         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4946         PL_last_lop = PL_last_uni = NULL;
4947         if (CopLINE(PL_curcop) == 1) {
4948             while (s < PL_bufend && isSPACE(*s))
4949                 s++;
4950             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4951                 s++;
4952 #ifdef PERL_MAD
4953             if (PL_madskills)
4954                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4955 #endif
4956             d = NULL;
4957             if (!PL_in_eval) {
4958                 if (*s == '#' && *(s+1) == '!')
4959                     d = s + 2;
4960 #ifdef ALTERNATE_SHEBANG
4961                 else {
4962                     static char const as[] = ALTERNATE_SHEBANG;
4963                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4964                         d = s + (sizeof(as) - 1);
4965                 }
4966 #endif /* ALTERNATE_SHEBANG */
4967             }
4968             if (d) {
4969                 char *ipath;
4970                 char *ipathend;
4971
4972                 while (isSPACE(*d))
4973                     d++;
4974                 ipath = d;
4975                 while (*d && !isSPACE(*d))
4976                     d++;
4977                 ipathend = d;
4978
4979 #ifdef ARG_ZERO_IS_SCRIPT
4980                 if (ipathend > ipath) {
4981                     /*
4982                      * HP-UX (at least) sets argv[0] to the script name,
4983                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4984                      * at least, set argv[0] to the basename of the Perl
4985                      * interpreter. So, having found "#!", we'll set it right.
4986                      */
4987                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4988                                                     SVt_PV)); /* $^X */
4989                     assert(SvPOK(x) || SvGMAGICAL(x));
4990                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4991                         sv_setpvn(x, ipath, ipathend - ipath);
4992                         SvSETMAGIC(x);
4993                     }
4994                     else {
4995                         STRLEN blen;
4996                         STRLEN llen;
4997                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4998                         const char * const lstart = SvPV_const(x,llen);
4999                         if (llen < blen) {
5000                             bstart += blen - llen;
5001                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5002                                 sv_setpvn(x, ipath, ipathend - ipath);
5003                                 SvSETMAGIC(x);
5004                             }
5005                         }
5006                     }
5007                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5008                 }
5009 #endif /* ARG_ZERO_IS_SCRIPT */
5010
5011                 /*
5012                  * Look for options.
5013                  */
5014                 d = instr(s,"perl -");
5015                 if (!d) {
5016                     d = instr(s,"perl");
5017 #if defined(DOSISH)
5018                     /* avoid getting into infinite loops when shebang
5019                      * line contains "Perl" rather than "perl" */
5020                     if (!d) {
5021                         for (d = ipathend-4; d >= ipath; --d) {
5022                             if ((*d == 'p' || *d == 'P')
5023                                 && !ibcmp(d, "perl", 4))
5024                             {
5025                                 break;
5026                             }
5027                         }
5028                         if (d < ipath)
5029                             d = NULL;
5030                     }
5031 #endif
5032                 }
5033 #ifdef ALTERNATE_SHEBANG
5034                 /*
5035                  * If the ALTERNATE_SHEBANG on this system starts with a
5036                  * character that can be part of a Perl expression, then if
5037                  * we see it but not "perl", we're probably looking at the
5038                  * start of Perl code, not a request to hand off to some
5039                  * other interpreter.  Similarly, if "perl" is there, but
5040                  * not in the first 'word' of the line, we assume the line
5041                  * contains the start of the Perl program.
5042                  */
5043                 if (d && *s != '#') {
5044                     const char *c = ipath;
5045                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5046                         c++;
5047                     if (c < d)
5048                         d = NULL;       /* "perl" not in first word; ignore */
5049                     else
5050                         *s = '#';       /* Don't try to parse shebang line */
5051                 }
5052 #endif /* ALTERNATE_SHEBANG */
5053                 if (!d &&
5054                     *s == '#' &&
5055                     ipathend > ipath &&
5056                     !PL_minus_c &&
5057                     !instr(s,"indir") &&
5058                     instr(PL_origargv[0],"perl"))
5059                 {
5060                     dVAR;
5061                     char **newargv;
5062
5063                     *ipathend = '\0';
5064                     s = ipathend + 1;
5065                     while (s < PL_bufend && isSPACE(*s))
5066                         s++;
5067                     if (s < PL_bufend) {
5068                         Newx(newargv,PL_origargc+3,char*);
5069                         newargv[1] = s;
5070                         while (s < PL_bufend && !isSPACE(*s))
5071                             s++;
5072                         *s = '\0';
5073                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5074                     }
5075                     else
5076                         newargv = PL_origargv;
5077                     newargv[0] = ipath;
5078                     PERL_FPU_PRE_EXEC
5079                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5080                     PERL_FPU_POST_EXEC
5081                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5082                 }
5083                 if (d) {
5084                     while (*d && !isSPACE(*d))
5085                         d++;
5086                     while (SPACE_OR_TAB(*d))
5087                         d++;
5088
5089                     if (*d++ == '-') {
5090                         const bool switches_done = PL_doswitches;
5091                         const U32 oldpdb = PL_perldb;
5092                         const bool oldn = PL_minus_n;
5093                         const bool oldp = PL_minus_p;
5094                         const char *d1 = d;
5095
5096                         do {
5097                             bool baduni = FALSE;
5098                             if (*d1 == 'C') {
5099                                 const char *d2 = d1 + 1;
5100                                 if (parse_unicode_opts((const char **)&d2)
5101                                     != PL_unicode)
5102                                     baduni = TRUE;
5103                             }
5104                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5105                                 const char * const m = d1;
5106                                 while (*d1 && !isSPACE(*d1))
5107                                     d1++;
5108                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5109                                       (int)(d1 - m), m);
5110                             }
5111                             d1 = moreswitches(d1);
5112                         } while (d1);
5113                         if (PL_doswitches && !switches_done) {
5114                             int argc = PL_origargc;
5115                             char **argv = PL_origargv;
5116                             do {
5117                                 argc--,argv++;
5118                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5119                             init_argv_symbols(argc,argv);
5120                         }
5121                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5122                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5123                               /* if we have already added "LINE: while (<>) {",
5124                                  we must not do it again */
5125                         {
5126                             sv_setpvs(PL_linestr, "");
5127                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5128                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5129                             PL_last_lop = PL_last_uni = NULL;
5130                             PL_preambled = FALSE;
5131                             if (PERLDB_LINE || PERLDB_SAVESRC)
5132                                 (void)gv_fetchfile(PL_origfilename);
5133                             goto retry;
5134                         }
5135                     }
5136                 }
5137             }
5138         }
5139         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5140             PL_bufptr = s;
5141             PL_lex_state = LEX_FORMLINE;
5142             return yylex();
5143         }
5144         goto retry;
5145     case '\r':
5146 #ifdef PERL_STRICT_CR
5147         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5148         Perl_croak(aTHX_
5149       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5150 #endif
5151     case ' ': case '\t': case '\f': case 013:
5152 #ifdef PERL_MAD
5153         PL_realtokenstart = -1;
5154         if (!PL_thiswhite)
5155             PL_thiswhite = newSVpvs("");
5156         sv_catpvn(PL_thiswhite, s, 1);
5157 #endif
5158         s++;
5159         goto retry;
5160     case '#':
5161     case '\n':
5162 #ifdef PERL_MAD
5163         PL_realtokenstart = -1;
5164         if (PL_madskills)
5165             PL_faketokens = 0;
5166 #endif
5167         if (PL_lex_state != LEX_NORMAL ||
5168              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5169             if (*s == '#' && s == PL_linestart && PL_in_eval
5170              && !PL_rsfp && !PL_parser->filtered) {
5171                 /* handle eval qq[#line 1 "foo"\n ...] */
5172                 CopLINE_dec(PL_curcop);
5173                 incline(s);
5174             }
5175             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5176                 s = SKIPSPACE0(s);
5177                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5178                     incline(s);
5179             }
5180             else {
5181                 d = s;
5182                 while (d < PL_bufend && *d != '\n')
5183                     d++;
5184                 if (d < PL_bufend)
5185                     d++;
5186                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5187                     Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5188                                d, PL_bufend);
5189 #ifdef PERL_MAD
5190                 if (PL_madskills)
5191                     PL_thiswhite = newSVpvn(s, d - s);
5192 #endif
5193                 s = d;
5194                 incline(s);
5195             }
5196             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5197                 PL_bufptr = s;
5198                 PL_lex_state = LEX_FORMLINE;
5199                 return yylex();
5200             }
5201         }
5202         else {
5203 #ifdef PERL_MAD
5204             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5205                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5206                     PL_faketokens = 0;
5207                     s = SKIPSPACE0(s);
5208                     TOKEN(PEG); /* make sure any #! line is accessible */
5209                 }
5210                 s = SKIPSPACE0(s);
5211             }
5212             else {
5213 /*              if (PL_madskills && PL_lex_formbrack) { */
5214                     d = s;
5215                     while (d < PL_bufend && *d != '\n')
5216                         d++;
5217                     if (d < PL_bufend)
5218                         d++;
5219                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5220                       Perl_croak(aTHX_ "panic: input overflow");
5221                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5222                         if (!PL_thiswhite)
5223                             PL_thiswhite = newSVpvs("");
5224                         if (CopLINE(PL_curcop) == 1) {
5225                             sv_setpvs(PL_thiswhite, "");
5226                             PL_faketokens = 0;
5227                         }
5228                         sv_catpvn(PL_thiswhite, s, d - s);
5229                     }
5230                     s = d;
5231 /*              }
5232                 *s = '\0';
5233                 PL_bufend = s; */
5234             }
5235 #else
5236             *s = '\0';
5237             PL_bufend = s;
5238 #endif
5239         }
5240         goto retry;
5241     case '-':
5242         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5243             I32 ftst = 0;
5244             char tmp;
5245
5246             s++;
5247             PL_bufptr = s;
5248             tmp = *s++;
5249
5250             while (s < PL_bufend && SPACE_OR_TAB(*s))
5251                 s++;
5252
5253             if (strnEQ(s,"=>",2)) {
5254                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5255                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5256                 OPERATOR('-');          /* unary minus */
5257             }
5258             PL_last_uni = PL_oldbufptr;
5259             switch (tmp) {
5260             case 'r': ftst = OP_FTEREAD;        break;
5261             case 'w': ftst = OP_FTEWRITE;       break;
5262             case 'x': ftst = OP_FTEEXEC;        break;
5263             case 'o': ftst = OP_FTEOWNED;       break;
5264             case 'R': ftst = OP_FTRREAD;        break;
5265             case 'W': ftst = OP_FTRWRITE;       break;
5266             case 'X': ftst = OP_FTREXEC;        break;
5267             case 'O': ftst = OP_FTROWNED;       break;
5268             case 'e': ftst = OP_FTIS;           break;
5269             case 'z': ftst = OP_FTZERO;         break;
5270             case 's': ftst = OP_FTSIZE;         break;
5271             case 'f': ftst = OP_FTFILE;         break;
5272             case 'd': ftst = OP_FTDIR;          break;
5273             case 'l': ftst = OP_FTLINK;         break;
5274             case 'p': ftst = OP_FTPIPE;         break;
5275             case 'S': ftst = OP_FTSOCK;         break;
5276             case 'u': ftst = OP_FTSUID;         break;
5277             case 'g': ftst = OP_FTSGID;         break;
5278             case 'k': ftst = OP_FTSVTX;         break;
5279             case 'b': ftst = OP_FTBLK;          break;
5280             case 'c': ftst = OP_FTCHR;          break;
5281             case 't': ftst = OP_FTTTY;          break;
5282             case 'T': ftst = OP_FTTEXT;         break;
5283             case 'B': ftst = OP_FTBINARY;       break;
5284             case 'M': case 'A': case 'C':
5285                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5286                 switch (tmp) {
5287                 case 'M': ftst = OP_FTMTIME;    break;
5288                 case 'A': ftst = OP_FTATIME;    break;
5289                 case 'C': ftst = OP_FTCTIME;    break;
5290                 default:                        break;
5291                 }
5292                 break;
5293             default:
5294                 break;
5295             }
5296             if (ftst) {
5297                 PL_last_lop_op = (OPCODE)ftst;
5298                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5299                         "### Saw file test %c\n", (int)tmp);
5300                 } );
5301                 FTST(ftst);
5302             }
5303             else {
5304                 /* Assume it was a minus followed by a one-letter named
5305                  * subroutine call (or a -bareword), then. */
5306                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5307                         "### '-%c' looked like a file test but was not\n",
5308                         (int) tmp);
5309                 } );
5310                 s = --PL_bufptr;
5311             }
5312         }
5313         {
5314             const char tmp = *s++;
5315             if (*s == tmp) {
5316                 s++;
5317                 if (PL_expect == XOPERATOR)
5318                     TERM(POSTDEC);
5319                 else
5320                     OPERATOR(PREDEC);
5321             }
5322             else if (*s == '>') {
5323                 s++;
5324                 s = SKIPSPACE1(s);
5325                 if (isIDFIRST_lazy_if(s,UTF)) {
5326                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5327                     TOKEN(ARROW);
5328                 }
5329                 else if (*s == '$')
5330                     OPERATOR(ARROW);
5331                 else
5332                     TERM(ARROW);
5333             }
5334             if (PL_expect == XOPERATOR) {
5335                 if (*s == '=' && !PL_lex_allbrackets &&
5336                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5337                     s--;
5338                     TOKEN(0);
5339                 }
5340                 Aop(OP_SUBTRACT);
5341             }
5342             else {
5343                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5344                     check_uni();
5345                 OPERATOR('-');          /* unary minus */
5346             }
5347         }
5348
5349     case '+':
5350         {
5351             const char tmp = *s++;
5352             if (*s == tmp) {
5353                 s++;
5354                 if (PL_expect == XOPERATOR)
5355                     TERM(POSTINC);
5356                 else
5357                     OPERATOR(PREINC);
5358             }
5359             if (PL_expect == XOPERATOR) {
5360                 if (*s == '=' && !PL_lex_allbrackets &&
5361                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5362                     s--;
5363                     TOKEN(0);
5364                 }
5365                 Aop(OP_ADD);
5366             }
5367             else {
5368                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5369                     check_uni();
5370                 OPERATOR('+');
5371             }
5372         }
5373
5374     case '*':
5375         if (PL_expect != XOPERATOR) {
5376             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5377             PL_expect = XOPERATOR;
5378             force_ident(PL_tokenbuf, '*');
5379             if (!*PL_tokenbuf)
5380                 PREREF('*');
5381             TERM('*');
5382         }
5383         s++;
5384         if (*s == '*') {
5385             s++;
5386             if (*s == '=' && !PL_lex_allbrackets &&
5387                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5388                 s -= 2;
5389                 TOKEN(0);
5390             }
5391             PWop(OP_POW);
5392         }
5393         if (*s == '=' && !PL_lex_allbrackets &&
5394                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5395             s--;
5396             TOKEN(0);
5397         }
5398         Mop(OP_MULTIPLY);
5399
5400     case '%':
5401         if (PL_expect == XOPERATOR) {
5402             if (s[1] == '=' && !PL_lex_allbrackets &&
5403                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5404                 TOKEN(0);
5405             ++s;
5406             Mop(OP_MODULO);
5407         }
5408         PL_tokenbuf[0] = '%';
5409         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5410                 sizeof PL_tokenbuf - 1, FALSE);
5411         if (!PL_tokenbuf[1]) {
5412             PREREF('%');
5413         }
5414         PL_pending_ident = '%';
5415         TERM('%');
5416
5417     case '^':
5418         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5419                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5420             TOKEN(0);
5421         s++;
5422         BOop(OP_BIT_XOR);
5423     case '[':
5424         if (PL_lex_brackets > 100)
5425             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5426         PL_lex_brackstack[PL_lex_brackets++] = 0;
5427         PL_lex_allbrackets++;
5428         {
5429             const char tmp = *s++;
5430             OPERATOR(tmp);
5431         }
5432     case '~':
5433         if (s[1] == '~'
5434             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5435         {
5436             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5437                 TOKEN(0);
5438             s += 2;
5439             Eop(OP_SMARTMATCH);
5440         }
5441         s++;
5442         OPERATOR('~');
5443     case ',':
5444         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5445             TOKEN(0);
5446         s++;
5447         OPERATOR(',');
5448     case ':':
5449         if (s[1] == ':') {
5450             len = 0;
5451             goto just_a_word_zero_gv;
5452         }
5453         s++;
5454         switch (PL_expect) {
5455             OP *attrs;
5456 #ifdef PERL_MAD
5457             I32 stuffstart;
5458 #endif
5459         case XOPERATOR:
5460             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5461                 break;
5462             PL_bufptr = s;      /* update in case we back off */
5463             if (*s == '=') {
5464                 Perl_croak(aTHX_
5465                            "Use of := for an empty attribute list is not allowed");
5466             }
5467             goto grabattrs;
5468         case XATTRBLOCK:
5469             PL_expect = XBLOCK;
5470             goto grabattrs;
5471         case XATTRTERM:
5472             PL_expect = XTERMBLOCK;
5473          grabattrs:
5474 #ifdef PERL_MAD
5475             stuffstart = s - SvPVX(PL_linestr) - 1;
5476 #endif
5477             s = PEEKSPACE(s);
5478             attrs = NULL;
5479             while (isIDFIRST_lazy_if(s,UTF)) {
5480                 I32 tmp;
5481                 SV *sv;
5482                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5483                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5484                     if (tmp < 0) tmp = -tmp;
5485                     switch (tmp) {
5486                     case KEY_or:
5487                     case KEY_and:
5488                     case KEY_for:
5489                     case KEY_foreach:
5490                     case KEY_unless:
5491                     case KEY_if:
5492                     case KEY_while:
5493                     case KEY_until:
5494                         goto got_attrs;
5495                     default:
5496                         break;
5497                     }
5498                 }
5499                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5500                 if (*d == '(') {
5501                     d = scan_str(d,TRUE,TRUE);
5502                     if (!d) {
5503                         /* MUST advance bufptr here to avoid bogus
5504                            "at end of line" context messages from yyerror().
5505                          */
5506                         PL_bufptr = s + len;
5507                         yyerror("Unterminated attribute parameter in attribute list");
5508                         if (attrs)
5509                             op_free(attrs);
5510                         sv_free(sv);
5511                         return REPORT(0);       /* EOF indicator */
5512                     }
5513                 }
5514                 if (PL_lex_stuff) {
5515                     sv_catsv(sv, PL_lex_stuff);
5516                     attrs = op_append_elem(OP_LIST, attrs,
5517                                         newSVOP(OP_CONST, 0, sv));
5518                     SvREFCNT_dec(PL_lex_stuff);
5519                     PL_lex_stuff = NULL;
5520                 }
5521                 else {
5522                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5523                         sv_free(sv);
5524                         if (PL_in_my == KEY_our) {
5525                             deprecate(":unique");
5526                         }
5527                         else
5528                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5529                     }
5530
5531                     /* NOTE: any CV attrs applied here need to be part of
5532                        the CVf_BUILTIN_ATTRS define in cv.h! */
5533                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5534                         sv_free(sv);
5535                         CvLVALUE_on(PL_compcv);
5536                     }
5537                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5538                         sv_free(sv);
5539                         deprecate(":locked");
5540                     }
5541                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5542                         sv_free(sv);
5543                         CvMETHOD_on(PL_compcv);
5544                     }
5545                     /* After we've set the flags, it could be argued that
5546                        we don't need to do the attributes.pm-based setting
5547                        process, and shouldn't bother appending recognized
5548                        flags.  To experiment with that, uncomment the
5549                        following "else".  (Note that's already been
5550                        uncommented.  That keeps the above-applied built-in
5551                        attributes from being intercepted (and possibly
5552                        rejected) by a package's attribute routines, but is
5553                        justified by the performance win for the common case
5554                        of applying only built-in attributes.) */
5555                     else
5556                         attrs = op_append_elem(OP_LIST, attrs,
5557                                             newSVOP(OP_CONST, 0,
5558                                                     sv));
5559                 }
5560                 s = PEEKSPACE(d);
5561                 if (*s == ':' && s[1] != ':')
5562                     s = PEEKSPACE(s+1);
5563                 else if (s == d)
5564                     break;      /* require real whitespace or :'s */
5565                 /* XXX losing whitespace on sequential attributes here */
5566             }
5567             {
5568                 const char tmp
5569                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5570                 if (*s != ';' && *s != '}' && *s != tmp
5571                     && (tmp != '=' || *s != ')')) {
5572                     const char q = ((*s == '\'') ? '"' : '\'');
5573                     /* If here for an expression, and parsed no attrs, back
5574                        off. */
5575                     if (tmp == '=' && !attrs) {
5576                         s = PL_bufptr;
5577                         break;
5578                     }
5579                     /* MUST advance bufptr here to avoid bogus "at end of line"
5580                        context messages from yyerror().
5581                     */
5582                     PL_bufptr = s;
5583                     yyerror( (const char *)
5584                              (*s
5585                               ? Perl_form(aTHX_ "Invalid separator character "
5586                                           "%c%c%c in attribute list", q, *s, q)
5587                               : "Unterminated attribute list" ) );
5588                     if (attrs)
5589                         op_free(attrs);
5590                     OPERATOR(':');
5591                 }
5592             }
5593         got_attrs:
5594             if (attrs) {
5595                 start_force(PL_curforce);
5596                 NEXTVAL_NEXTTOKE.opval = attrs;
5597                 CURMAD('_', PL_nextwhite);
5598                 force_next(THING);
5599             }
5600 #ifdef PERL_MAD
5601             if (PL_madskills) {
5602                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5603                                      (s - SvPVX(PL_linestr)) - stuffstart);
5604             }
5605 #endif
5606             TOKEN(COLONATTR);
5607         }
5608         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5609             s--;
5610             TOKEN(0);
5611         }
5612         PL_lex_allbrackets--;
5613         OPERATOR(':');
5614     case '(':
5615         s++;
5616         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5617             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5618         else
5619             PL_expect = XTERM;
5620         s = SKIPSPACE1(s);
5621         PL_lex_allbrackets++;
5622         TOKEN('(');
5623     case ';':
5624         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5625             TOKEN(0);
5626         CLINE;
5627         s++;
5628         OPERATOR(';');
5629     case ')':
5630         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5631             TOKEN(0);
5632         s++;
5633         PL_lex_allbrackets--;
5634         s = SKIPSPACE1(s);
5635         if (*s == '{')
5636             PREBLOCK(')');
5637         TERM(')');
5638     case ']':
5639         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5640             TOKEN(0);
5641         s++;
5642         if (PL_lex_brackets <= 0)
5643             yyerror("Unmatched right square bracket");
5644         else
5645             --PL_lex_brackets;
5646         PL_lex_allbrackets--;
5647         if (PL_lex_state == LEX_INTERPNORMAL) {
5648             if (PL_lex_brackets == 0) {
5649                 if (*s == '-' && s[1] == '>')
5650                     PL_lex_state = LEX_INTERPENDMAYBE;
5651                 else if (*s != '[' && *s != '{')
5652                     PL_lex_state = LEX_INTERPEND;
5653             }
5654         }
5655         TERM(']');
5656     case '{':
5657       leftbracket:
5658         s++;
5659         if (PL_lex_brackets > 100) {
5660             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5661         }
5662         switch (PL_expect) {
5663         case XTERM:
5664             if (PL_lex_formbrack) {
5665                 s--;
5666                 PRETERMBLOCK(DO);
5667             }
5668             if (PL_oldoldbufptr == PL_last_lop)
5669                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5670             else
5671                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5672             PL_lex_allbrackets++;
5673             OPERATOR(HASHBRACK);
5674         case XOPERATOR:
5675             while (s < PL_bufend && SPACE_OR_TAB(*s))
5676                 s++;
5677             d = s;
5678             PL_tokenbuf[0] = '\0';
5679             if (d < PL_bufend && *d == '-') {
5680                 PL_tokenbuf[0] = '-';
5681                 d++;
5682                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5683                     d++;
5684             }
5685             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5686                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5687                               FALSE, &len);
5688                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5689                     d++;
5690                 if (*d == '}') {
5691                     const char minus = (PL_tokenbuf[0] == '-');
5692                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5693                     if (minus)
5694                         force_next('-');
5695                 }
5696             }
5697             /* FALL THROUGH */
5698         case XATTRBLOCK:
5699         case XBLOCK:
5700             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5701             PL_lex_allbrackets++;
5702             PL_expect = XSTATE;
5703             break;
5704         case XATTRTERM:
5705         case XTERMBLOCK:
5706             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5707             PL_lex_allbrackets++;
5708             PL_expect = XSTATE;
5709             break;
5710         default: {
5711                 const char *t;
5712                 if (PL_oldoldbufptr == PL_last_lop)
5713                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5714                 else
5715                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5716                 PL_lex_allbrackets++;
5717                 s = SKIPSPACE1(s);
5718                 if (*s == '}') {
5719                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5720                         PL_expect = XTERM;
5721                         /* This hack is to get the ${} in the message. */
5722                         PL_bufptr = s+1;
5723                         yyerror("syntax error");
5724                         break;
5725                     }
5726                     OPERATOR(HASHBRACK);
5727                 }
5728                 /* This hack serves to disambiguate a pair of curlies
5729                  * as being a block or an anon hash.  Normally, expectation
5730                  * determines that, but in cases where we're not in a
5731                  * position to expect anything in particular (like inside
5732                  * eval"") we have to resolve the ambiguity.  This code
5733                  * covers the case where the first term in the curlies is a
5734                  * quoted string.  Most other cases need to be explicitly
5735                  * disambiguated by prepending a "+" before the opening
5736                  * curly in order to force resolution as an anon hash.
5737                  *
5738                  * XXX should probably propagate the outer expectation
5739                  * into eval"" to rely less on this hack, but that could
5740                  * potentially break current behavior of eval"".
5741                  * GSAR 97-07-21
5742                  */
5743                 t = s;
5744                 if (*s == '\'' || *s == '"' || *s == '`') {
5745                     /* common case: get past first string, handling escapes */
5746                     for (t++; t < PL_bufend && *t != *s;)
5747                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5748                             t++;
5749                     t++;
5750                 }
5751                 else if (*s == 'q') {
5752                     if (++t < PL_bufend
5753                         && (!isALNUM(*t)
5754                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5755                                 && !isALNUM(*t))))
5756                     {
5757                         /* skip q//-like construct */
5758                         const char *tmps;
5759                         char open, close, term;
5760                         I32 brackets = 1;
5761
5762                         while (t < PL_bufend && isSPACE(*t))
5763                             t++;
5764                         /* check for q => */
5765                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5766                             OPERATOR(HASHBRACK);
5767                         }
5768                         term = *t;
5769                         open = term;
5770                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5771                             term = tmps[5];
5772                         close = term;
5773                         if (open == close)
5774                             for (t++; t < PL_bufend; t++) {
5775                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5776                                     t++;
5777                                 else if (*t == open)
5778                                     break;
5779                             }
5780                         else {
5781                             for (t++; t < PL_bufend; t++) {
5782                                 if (*t == '\\' && t+1 < PL_bufend)
5783                                     t++;
5784                                 else if (*t == close && --brackets <= 0)
5785                                     break;
5786                                 else if (*t == open)
5787                                     brackets++;
5788                             }
5789                         }
5790                         t++;
5791                     }
5792                     else
5793                         /* skip plain q word */
5794                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5795                              t += UTF8SKIP(t);
5796                 }
5797                 else if (isALNUM_lazy_if(t,UTF)) {
5798                     t += UTF8SKIP(t);
5799                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5800                          t += UTF8SKIP(t);
5801                 }
5802                 while (t < PL_bufend && isSPACE(*t))
5803                     t++;
5804                 /* if comma follows first term, call it an anon hash */
5805                 /* XXX it could be a comma expression with loop modifiers */
5806                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5807                                    || (*t == '=' && t[1] == '>')))
5808                     OPERATOR(HASHBRACK);
5809                 if (PL_expect == XREF)
5810                     PL_expect = XTERM;
5811                 else {
5812                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5813                     PL_expect = XSTATE;
5814                 }
5815             }
5816             break;
5817         }
5818         pl_yylval.ival = CopLINE(PL_curcop);
5819         if (isSPACE(*s) || *s == '#')
5820             PL_copline = NOLINE;   /* invalidate current command line number */
5821         TOKEN('{');
5822     case '}':
5823         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5824             TOKEN(0);
5825       rightbracket:
5826         s++;
5827         if (PL_lex_brackets <= 0)
5828             yyerror("Unmatched right curly bracket");
5829         else
5830             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5831         PL_lex_allbrackets--;
5832         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5833             PL_lex_formbrack = 0;
5834         if (PL_lex_state == LEX_INTERPNORMAL) {
5835             if (PL_lex_brackets == 0) {
5836                 if (PL_expect & XFAKEBRACK) {
5837                     PL_expect &= XENUMMASK;
5838                     PL_lex_state = LEX_INTERPEND;
5839                     PL_bufptr = s;
5840 #if 0
5841                     if (PL_madskills) {
5842                         if (!PL_thiswhite)
5843                             PL_thiswhite = newSVpvs("");
5844                         sv_catpvs(PL_thiswhite,"}");
5845                     }
5846 #endif
5847                     return yylex();     /* ignore fake brackets */
5848                 }
5849                 if (*s == '-' && s[1] == '>')
5850                     PL_lex_state = LEX_INTERPENDMAYBE;
5851                 else if (*s != '[' && *s != '{')
5852                     PL_lex_state = LEX_INTERPEND;
5853             }
5854         }
5855         if (PL_expect & XFAKEBRACK) {
5856             PL_expect &= XENUMMASK;
5857             PL_bufptr = s;
5858             return yylex();             /* ignore fake brackets */
5859         }
5860         start_force(PL_curforce);
5861         if (PL_madskills) {
5862             curmad('X', newSVpvn(s-1,1));
5863             CURMAD('_', PL_thiswhite);
5864         }
5865         force_next('}');
5866 #ifdef PERL_MAD
5867         if (!PL_thistoken)
5868             PL_thistoken = newSVpvs("");
5869 #endif
5870         TOKEN(';');
5871     case '&':
5872         s++;
5873         if (*s++ == '&') {
5874             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5875                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5876                 s -= 2;
5877                 TOKEN(0);
5878             }
5879             AOPERATOR(ANDAND);
5880         }
5881         s--;
5882         if (PL_expect == XOPERATOR) {
5883             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5884                 && isIDFIRST_lazy_if(s,UTF))
5885             {
5886                 CopLINE_dec(PL_curcop);
5887                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5888                 CopLINE_inc(PL_curcop);
5889             }
5890             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5891                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5892                 s--;
5893                 TOKEN(0);
5894             }
5895             BAop(OP_BIT_AND);
5896         }
5897
5898         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5899         if (*PL_tokenbuf) {
5900             PL_expect = XOPERATOR;
5901             force_ident(PL_tokenbuf, '&');
5902         }
5903         else
5904             PREREF('&');
5905         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5906         TERM('&');
5907
5908     case '|':
5909         s++;
5910         if (*s++ == '|') {
5911             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5912                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5913                 s -= 2;
5914                 TOKEN(0);
5915             }
5916             AOPERATOR(OROR);
5917         }
5918         s--;
5919         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5920                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5921             s--;
5922             TOKEN(0);
5923         }
5924         BOop(OP_BIT_OR);
5925     case '=':
5926         s++;
5927         {
5928             const char tmp = *s++;
5929             if (tmp == '=') {
5930                 if (!PL_lex_allbrackets &&
5931                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5932                     s -= 2;
5933                     TOKEN(0);
5934                 }
5935                 Eop(OP_EQ);
5936             }
5937             if (tmp == '>') {
5938                 if (!PL_lex_allbrackets &&
5939                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5940                     s -= 2;
5941                     TOKEN(0);
5942                 }
5943                 OPERATOR(',');
5944             }
5945             if (tmp == '~')
5946                 PMop(OP_MATCH);
5947             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5948                 && strchr("+-*/%.^&|<",tmp))
5949                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5950                             "Reversed %c= operator",(int)tmp);
5951             s--;
5952             if (PL_expect == XSTATE && isALPHA(tmp) &&
5953                 (s == PL_linestart+1 || s[-2] == '\n') )
5954                 {
5955                     if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
5956                         d = PL_bufend;
5957                         while (s < d) {
5958                             if (*s++ == '\n') {
5959                                 incline(s);
5960                                 if (strnEQ(s,"=cut",4)) {
5961                                     s = strchr(s,'\n');
5962                                     if (s)
5963                                         s++;
5964                                     else
5965                                         s = d;
5966                                     incline(s);
5967                                     goto retry;
5968                                 }
5969                             }
5970                         }
5971                         goto retry;
5972                     }
5973 #ifdef PERL_MAD
5974                     if (PL_madskills) {
5975                         if (!PL_thiswhite)
5976                             PL_thiswhite = newSVpvs("");
5977                         sv_catpvn(PL_thiswhite, PL_linestart,
5978                                   PL_bufend - PL_linestart);
5979                     }
5980 #endif
5981                     s = PL_bufend;
5982                     PL_parser->in_pod = 1;
5983                     goto retry;
5984                 }
5985         }
5986         if (PL_lex_brackets < PL_lex_formbrack) {
5987             const char *t = s;
5988 #ifdef PERL_STRICT_CR
5989             while (SPACE_OR_TAB(*t))
5990 #else
5991             while (SPACE_OR_TAB(*t) || *t == '\r')
5992 #endif
5993                 t++;
5994             if (*t == '\n' || *t == '#') {
5995                 s--;
5996                 PL_expect = XBLOCK;
5997                 goto leftbracket;
5998             }
5999         }
6000         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6001             s--;
6002             TOKEN(0);
6003         }
6004         pl_yylval.ival = 0;
6005         OPERATOR(ASSIGNOP);
6006     case '!':
6007         s++;
6008         {
6009             const char tmp = *s++;
6010             if (tmp == '=') {
6011                 /* was this !=~ where !~ was meant?
6012                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6013
6014                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6015                     const char *t = s+1;
6016
6017                     while (t < PL_bufend && isSPACE(*t))
6018                         ++t;
6019
6020                     if (*t == '/' || *t == '?' ||
6021                         ((*t == 'm' || *t == 's' || *t == 'y')
6022                          && !isALNUM(t[1])) ||
6023                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6024                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6025                                     "!=~ should be !~");
6026                 }
6027                 if (!PL_lex_allbrackets &&
6028                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6029                     s -= 2;
6030                     TOKEN(0);
6031                 }
6032                 Eop(OP_NE);
6033             }
6034             if (tmp == '~')
6035                 PMop(OP_NOT);
6036         }
6037         s--;
6038         OPERATOR('!');
6039     case '<':
6040         if (PL_expect != XOPERATOR) {
6041             if (s[1] != '<' && !strchr(s,'>'))
6042                 check_uni();
6043             if (s[1] == '<')
6044                 s = scan_heredoc(s);
6045             else
6046                 s = scan_inputsymbol(s);
6047             TERM(sublex_start());
6048         }
6049         s++;
6050         {
6051             char tmp = *s++;
6052             if (tmp == '<') {
6053                 if (*s == '=' && !PL_lex_allbrackets &&
6054                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6055                     s -= 2;
6056                     TOKEN(0);
6057                 }
6058                 SHop(OP_LEFT_SHIFT);
6059             }
6060             if (tmp == '=') {
6061                 tmp = *s++;
6062                 if (tmp == '>') {
6063                     if (!PL_lex_allbrackets &&
6064                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6065                         s -= 3;
6066                         TOKEN(0);
6067                     }
6068                     Eop(OP_NCMP);
6069                 }
6070                 s--;
6071                 if (!PL_lex_allbrackets &&
6072                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6073                     s -= 2;
6074                     TOKEN(0);
6075                 }
6076                 Rop(OP_LE);
6077             }
6078         }
6079         s--;
6080         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6081             s--;
6082             TOKEN(0);
6083         }
6084         Rop(OP_LT);
6085     case '>':
6086         s++;
6087         {
6088             const char tmp = *s++;
6089             if (tmp == '>') {
6090                 if (*s == '=' && !PL_lex_allbrackets &&
6091                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6092                     s -= 2;
6093                     TOKEN(0);
6094                 }
6095                 SHop(OP_RIGHT_SHIFT);
6096             }
6097             else if (tmp == '=') {
6098                 if (!PL_lex_allbrackets &&
6099                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6100                     s -= 2;
6101                     TOKEN(0);
6102                 }
6103                 Rop(OP_GE);
6104             }
6105         }
6106         s--;
6107         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6108             s--;
6109             TOKEN(0);
6110         }
6111         Rop(OP_GT);
6112
6113     case '$':
6114         CLINE;
6115
6116         if (PL_expect == XOPERATOR) {
6117             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6118                 return deprecate_commaless_var_list();
6119             }
6120         }
6121
6122         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6123             PL_tokenbuf[0] = '@';
6124             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6125                            sizeof PL_tokenbuf - 1, FALSE);
6126             if (PL_expect == XOPERATOR)
6127                 no_op("Array length", s);
6128             if (!PL_tokenbuf[1])
6129                 PREREF(DOLSHARP);
6130             PL_expect = XOPERATOR;
6131             PL_pending_ident = '#';
6132             TOKEN(DOLSHARP);
6133         }
6134
6135         PL_tokenbuf[0] = '$';
6136         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6137                        sizeof PL_tokenbuf - 1, FALSE);
6138         if (PL_expect == XOPERATOR)
6139             no_op("Scalar", s);
6140         if (!PL_tokenbuf[1]) {
6141             if (s == PL_bufend)
6142                 yyerror("Final $ should be \\$ or $name");
6143             PREREF('$');
6144         }
6145
6146         d = s;
6147         {
6148             const char tmp = *s;
6149             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6150                 s = SKIPSPACE1(s);
6151
6152             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6153                 && intuit_more(s)) {
6154                 if (*s == '[') {
6155                     PL_tokenbuf[0] = '@';
6156                     if (ckWARN(WARN_SYNTAX)) {
6157                         char *t = s+1;
6158
6159                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6160                             t++;
6161                         if (*t++ == ',') {
6162                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6163                             while (t < PL_bufend && *t != ']')
6164                                 t++;
6165                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6166                                         "Multidimensional syntax %.*s not supported",
6167                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6168                         }
6169                     }
6170                 }
6171                 else if (*s == '{') {
6172                     char *t;
6173                     PL_tokenbuf[0] = '%';
6174                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6175                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6176                         {
6177                             char tmpbuf[sizeof PL_tokenbuf];
6178                             do {
6179                                 t++;
6180                             } while (isSPACE(*t));
6181                             if (isIDFIRST_lazy_if(t,UTF)) {
6182                                 STRLEN len;
6183                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6184                                               &len);
6185                                 while (isSPACE(*t))
6186                                     t++;
6187                                 if (*t == ';'
6188                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6189                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6190                                                 "You need to quote \"%"SVf"\"",
6191                                                   SVfARG(newSVpvn_flags(tmpbuf, len,
6192                                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
6193                             }
6194                         }
6195                 }
6196             }
6197
6198             PL_expect = XOPERATOR;
6199             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6200                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6201                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6202                     PL_expect = XOPERATOR;
6203                 else if (strchr("$@\"'`q", *s))
6204                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6205                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6206                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6207                 else if (isIDFIRST_lazy_if(s,UTF)) {
6208                     char tmpbuf[sizeof PL_tokenbuf];
6209                     int t2;
6210                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6211                     if ((t2 = keyword(tmpbuf, len, 0))) {
6212                         /* binary operators exclude handle interpretations */
6213                         switch (t2) {
6214                         case -KEY_x:
6215                         case -KEY_eq:
6216                         case -KEY_ne:
6217                         case -KEY_gt:
6218                         case -KEY_lt:
6219                         case -KEY_ge:
6220                         case -KEY_le:
6221                         case -KEY_cmp:
6222                             break;
6223                         default:
6224                             PL_expect = XTERM;  /* e.g. print $fh length() */
6225                             break;
6226                         }
6227                     }
6228                     else {
6229                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6230                     }
6231                 }
6232                 else if (isDIGIT(*s))
6233                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6234                 else if (*s == '.' && isDIGIT(s[1]))
6235                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6236                 else if ((*s == '?' || *s == '-' || *s == '+')
6237                          && !isSPACE(s[1]) && s[1] != '=')
6238                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6239                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6240                          && s[1] != '/')
6241                     PL_expect = XTERM;          /* e.g. print $fh /.../
6242                                                    XXX except DORDOR operator
6243                                                 */
6244                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6245                          && s[2] != '=')
6246                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6247             }
6248         }
6249         PL_pending_ident = '$';
6250         TOKEN('$');
6251
6252     case '@':
6253         if (PL_expect == XOPERATOR)
6254             no_op("Array", s);
6255         PL_tokenbuf[0] = '@';
6256         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6257         if (!PL_tokenbuf[1]) {
6258             PREREF('@');
6259         }
6260         if (PL_lex_state == LEX_NORMAL)
6261             s = SKIPSPACE1(s);
6262         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6263             if (*s == '{')
6264                 PL_tokenbuf[0] = '%';
6265
6266             /* Warn about @ where they meant $. */
6267             if (*s == '[' || *s == '{') {
6268                 if (ckWARN(WARN_SYNTAX)) {
6269                     const char *t = s + 1;
6270                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6271                         t += UTF ? UTF8SKIP(t) : 1;
6272                     if (*t == '}' || *t == ']') {
6273                         t++;
6274                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6275        /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6276                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6277                             "Scalar value %"SVf" better written as $%"SVf,
6278                             SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6279                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6280                             SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6281                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
6282                     }
6283                 }
6284             }
6285         }
6286         PL_pending_ident = '@';
6287         TERM('@');
6288
6289      case '/':                  /* may be division, defined-or, or pattern */
6290         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6291             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6292                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6293                 TOKEN(0);
6294             s += 2;
6295             AOPERATOR(DORDOR);
6296         }
6297      case '?':                  /* may either be conditional or pattern */
6298         if (PL_expect == XOPERATOR) {
6299              char tmp = *s++;
6300              if(tmp == '?') {
6301                 if (!PL_lex_allbrackets &&
6302                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6303                     s--;
6304                     TOKEN(0);
6305                 }
6306                 PL_lex_allbrackets++;
6307                 OPERATOR('?');
6308              }
6309              else {
6310                  tmp = *s++;
6311                  if(tmp == '/') {
6312                      /* A // operator. */
6313                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6314                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6315                                             LEX_FAKEEOF_LOGIC)) {
6316                         s -= 2;
6317                         TOKEN(0);
6318                     }
6319                     AOPERATOR(DORDOR);
6320                  }
6321                  else {
6322                      s--;
6323                      if (*s == '=' && !PL_lex_allbrackets &&
6324                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6325                          s--;
6326                          TOKEN(0);
6327                      }
6328                      Mop(OP_DIVIDE);
6329                  }
6330              }
6331          }
6332          else {
6333              /* Disable warning on "study /blah/" */
6334              if (PL_oldoldbufptr == PL_last_uni
6335               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6336                   || memNE(PL_last_uni, "study", 5)
6337                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6338               ))
6339                  check_uni();
6340              if (*s == '?')
6341                  deprecate("?PATTERN? without explicit operator");
6342              s = scan_pat(s,OP_MATCH);
6343              TERM(sublex_start());
6344          }
6345
6346     case '.':
6347         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6348 #ifdef PERL_STRICT_CR
6349             && s[1] == '\n'
6350 #else
6351             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6352 #endif
6353             && (s == PL_linestart || s[-1] == '\n') )
6354         {
6355             PL_lex_formbrack = 0;
6356             PL_expect = XSTATE;
6357             goto rightbracket;
6358         }
6359         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6360             s += 3;
6361             OPERATOR(YADAYADA);
6362         }
6363         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6364             char tmp = *s++;
6365             if (*s == tmp) {
6366                 if (!PL_lex_allbrackets &&
6367                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6368                     s--;
6369                     TOKEN(0);
6370                 }
6371                 s++;
6372                 if (*s == tmp) {
6373                     s++;
6374                     pl_yylval.ival = OPf_SPECIAL;
6375                 }
6376                 else
6377                     pl_yylval.ival = 0;
6378                 OPERATOR(DOTDOT);
6379             }
6380             if (*s == '=' && !PL_lex_allbrackets &&
6381                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6382                 s--;
6383                 TOKEN(0);
6384             }
6385             Aop(OP_CONCAT);
6386         }
6387         /* FALL THROUGH */
6388     case '0': case '1': case '2': case '3': case '4':
6389     case '5': case '6': case '7': case '8': case '9':
6390         s = scan_num(s, &pl_yylval);
6391         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6392         if (PL_expect == XOPERATOR)
6393             no_op("Number",s);
6394         TERM(THING);
6395
6396     case '\'':
6397         s = scan_str(s,!!PL_madskills,FALSE);
6398         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6399         if (PL_expect == XOPERATOR) {
6400             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6401                 return deprecate_commaless_var_list();
6402             }
6403             else
6404                 no_op("String",s);
6405         }
6406         if (!s)
6407             missingterm(NULL);
6408         pl_yylval.ival = OP_CONST;
6409         TERM(sublex_start());
6410
6411     case '"':
6412         s = scan_str(s,!!PL_madskills,FALSE);
6413         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6414         if (PL_expect == XOPERATOR) {
6415             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6416                 return deprecate_commaless_var_list();
6417             }
6418             else
6419                 no_op("String",s);
6420         }
6421         if (!s)
6422             missingterm(NULL);
6423         pl_yylval.ival = OP_CONST;
6424         /* FIXME. I think that this can be const if char *d is replaced by
6425            more localised variables.  */
6426         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6427             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6428                 pl_yylval.ival = OP_STRINGIFY;
6429                 break;
6430             }
6431         }
6432         TERM(sublex_start());
6433
6434     case '`':
6435         s = scan_str(s,!!PL_madskills,FALSE);
6436         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6437         if (PL_expect == XOPERATOR)
6438             no_op("Backticks",s);
6439         if (!s)
6440             missingterm(NULL);
6441         readpipe_override();
6442         TERM(sublex_start());
6443
6444     case '\\':
6445         s++;
6446         if (PL_lex_inwhat && isDIGIT(*s))
6447             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6448                            *s, *s);
6449         if (PL_expect == XOPERATOR)
6450             no_op("Backslash",s);
6451         OPERATOR(REFGEN);
6452
6453     case 'v':
6454         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6455             char *start = s + 2;
6456             while (isDIGIT(*start) || *start == '_')
6457                 start++;
6458             if (*start == '.' && isDIGIT(start[1])) {
6459                 s = scan_num(s, &pl_yylval);
6460                 TERM(THING);
6461             }
6462             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6463             else if (!isALPHA(*start) && (PL_expect == XTERM
6464                         || PL_expect == XREF || PL_expect == XSTATE
6465                         || PL_expect == XTERMORDORDOR)) {
6466                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6467                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6468                 if (!gv) {
6469                     s = scan_num(s, &pl_yylval);
6470                     TERM(THING);
6471                 }
6472             }
6473         }
6474         goto keylookup;
6475     case 'x':
6476         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6477             s++;
6478             Mop(OP_REPEAT);
6479         }
6480         goto keylookup;
6481
6482     case '_':
6483     case 'a': case 'A':
6484     case 'b': case 'B':
6485     case 'c': case 'C':
6486     case 'd': case 'D':
6487     case 'e': case 'E':
6488     case 'f': case 'F':
6489     case 'g': case 'G':
6490     case 'h': case 'H':
6491     case 'i': case 'I':
6492     case 'j': case 'J':
6493     case 'k': case 'K':
6494     case 'l': case 'L':
6495     case 'm': case 'M':
6496     case 'n': case 'N':
6497     case 'o': case 'O':
6498     case 'p': case 'P':
6499     case 'q': case 'Q':
6500     case 'r': case 'R':
6501     case 's': case 'S':
6502     case 't': case 'T':
6503     case 'u': case 'U':
6504               case 'V':
6505     case 'w': case 'W':
6506               case 'X':
6507     case 'y': case 'Y':
6508     case 'z': case 'Z':
6509
6510       keylookup: {
6511         bool anydelim;
6512         I32 tmp;
6513
6514         orig_keyword = 0;
6515         gv = NULL;
6516         gvp = NULL;
6517
6518         PL_bufptr = s;
6519         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6520
6521         /* Some keywords can be followed by any delimiter, including ':' */
6522         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6523
6524         /* x::* is just a word, unless x is "CORE" */
6525         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6526             goto just_a_word;
6527
6528         d = s;
6529         while (d < PL_bufend && isSPACE(*d))
6530                 d++;    /* no comments skipped here, or s### is misparsed */
6531
6532         /* Is this a word before a => operator? */
6533         if (*d == '=' && d[1] == '>') {
6534             CLINE;
6535             pl_yylval.opval
6536                 = (OP*)newSVOP(OP_CONST, 0,
6537                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6538             pl_yylval.opval->op_private = OPpCONST_BARE;
6539             TERM(WORD);
6540         }
6541
6542         /* Check for plugged-in keyword */
6543         {
6544             OP *o;
6545             int result;
6546             char *saved_bufptr = PL_bufptr;
6547             PL_bufptr = s;
6548             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6549             s = PL_bufptr;
6550             if (result == KEYWORD_PLUGIN_DECLINE) {
6551                 /* not a plugged-in keyword */
6552                 PL_bufptr = saved_bufptr;
6553             } else if (result == KEYWORD_PLUGIN_STMT) {
6554                 pl_yylval.opval = o;
6555                 CLINE;
6556                 PL_expect = XSTATE;
6557                 return REPORT(PLUGSTMT);
6558             } else if (result == KEYWORD_PLUGIN_EXPR) {
6559                 pl_yylval.opval = o;
6560                 CLINE;
6561                 PL_expect = XOPERATOR;
6562                 return REPORT(PLUGEXPR);
6563             } else {
6564                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6565                                         PL_tokenbuf);
6566             }
6567         }
6568
6569         /* Check for built-in keyword */
6570         tmp = keyword(PL_tokenbuf, len, 0);
6571
6572         /* Is this a label? */
6573         if (!anydelim && PL_expect == XSTATE
6574               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6575             s = d + 1;
6576             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6577             CLINE;
6578             TOKEN(LABEL);
6579         }
6580
6581         if (tmp < 0) {                  /* second-class keyword? */
6582             GV *ogv = NULL;     /* override (winner) */
6583             GV *hgv = NULL;     /* hidden (loser) */
6584             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6585                 CV *cv;
6586                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6587                                             UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6588                     (cv = GvCVu(gv)))
6589                 {
6590                     if (GvIMPORTED_CV(gv))
6591                         ogv = gv;
6592                     else if (! CvMETHOD(cv))
6593                         hgv = gv;
6594                 }
6595                 if (!ogv &&
6596                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6597                                             UTF ? -(I32)len : (I32)len, FALSE)) &&
6598                     (gv = *gvp) && isGV_with_GP(gv) &&
6599                     GvCVu(gv) && GvIMPORTED_CV(gv))
6600                 {
6601                     ogv = gv;
6602                 }
6603             }
6604             if (ogv) {
6605                 orig_keyword = tmp;
6606                 tmp = 0;                /* overridden by import or by GLOBAL */
6607             }
6608             else if (gv && !gvp
6609                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6610                      && GvCVu(gv))
6611             {
6612                 tmp = 0;                /* any sub overrides "weak" keyword */
6613             }
6614             else {                      /* no override */
6615                 tmp = -tmp;
6616                 if (tmp == KEY_dump) {
6617                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6618                                    "dump() better written as CORE::dump()");
6619                 }
6620                 gv = NULL;
6621                 gvp = 0;
6622                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6623                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6624                                    "Ambiguous call resolved as CORE::%s(), "
6625                                    "qualify as such or use &",
6626                                    GvENAME(hgv));
6627             }
6628         }
6629
6630       reserved_word:
6631         switch (tmp) {
6632
6633         default:                        /* not a keyword */
6634             /* Trade off - by using this evil construction we can pull the
6635                variable gv into the block labelled keylookup. If not, then
6636                we have to give it function scope so that the goto from the
6637                earlier ':' case doesn't bypass the initialisation.  */
6638             if (0) {
6639             just_a_word_zero_gv:
6640                 gv = NULL;
6641                 gvp = NULL;
6642                 orig_keyword = 0;
6643             }
6644           just_a_word: {
6645                 SV *sv;
6646                 int pkgname = 0;
6647                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6648                 OP *rv2cv_op;
6649                 CV *cv;
6650 #ifdef PERL_MAD
6651                 SV *nextPL_nextwhite = 0;
6652 #endif
6653
6654
6655                 /* Get the rest if it looks like a package qualifier */
6656
6657                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6658                     STRLEN morelen;
6659                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6660                                   TRUE, &morelen);
6661                     if (!morelen)
6662                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6663                                 *s == '\'' ? "'" : "::");
6664                     len += morelen;
6665                     pkgname = 1;
6666                 }
6667
6668                 if (PL_expect == XOPERATOR) {
6669                     if (PL_bufptr == PL_linestart) {
6670                         CopLINE_dec(PL_curcop);
6671                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6672                         CopLINE_inc(PL_curcop);
6673                     }
6674                     else
6675                         no_op("Bareword",s);
6676                 }
6677
6678                 /* Look for a subroutine with this name in current package,
6679                    unless name is "Foo::", in which case Foo is a bareword
6680                    (and a package name). */
6681
6682                 if (len > 2 && !PL_madskills &&
6683                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6684                 {
6685                     if (ckWARN(WARN_BAREWORD)
6686                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6687                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6688                             "Bareword \"%s\" refers to nonexistent package",
6689                              PL_tokenbuf);
6690                     len -= 2;
6691                     PL_tokenbuf[len] = '\0';
6692                     gv = NULL;
6693                     gvp = 0;
6694                 }
6695                 else {
6696                     if (!gv) {
6697                         /* Mustn't actually add anything to a symbol table.
6698                            But also don't want to "initialise" any placeholder
6699                            constants that might already be there into full
6700                            blown PVGVs with attached PVCV.  */
6701                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6702                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6703                                                SVt_PVCV);
6704                     }
6705                     len = 0;
6706                 }
6707
6708                 /* if we saw a global override before, get the right name */
6709
6710                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6711                     len ? len : strlen(PL_tokenbuf));
6712                 if (gvp) {
6713                     SV * const tmp_sv = sv;
6714                     sv = newSVpvs("CORE::GLOBAL::");
6715                     sv_catsv(sv, tmp_sv);
6716                     SvREFCNT_dec(tmp_sv);
6717                 }
6718
6719 #ifdef PERL_MAD
6720                 if (PL_madskills && !PL_thistoken) {
6721                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6722                     PL_thistoken = newSVpvn(start,s - start);
6723                     PL_realtokenstart = s - SvPVX(PL_linestr);
6724                 }
6725 #endif
6726
6727                 /* Presume this is going to be a bareword of some sort. */
6728                 CLINE;
6729                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6730                 pl_yylval.opval->op_private = OPpCONST_BARE;
6731
6732                 /* And if "Foo::", then that's what it certainly is. */
6733                 if (len)
6734                     goto safe_bareword;
6735
6736                 {
6737                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6738                     const_op->op_private = OPpCONST_BARE;
6739                     rv2cv_op = newCVREF(0, const_op);
6740                 }
6741                 cv = rv2cv_op_cv(rv2cv_op, 0);
6742
6743                 /* See if it's the indirect object for a list operator. */
6744
6745                 if (PL_oldoldbufptr &&
6746                     PL_oldoldbufptr < PL_bufptr &&
6747                     (PL_oldoldbufptr == PL_last_lop
6748                      || PL_oldoldbufptr == PL_last_uni) &&
6749                     /* NO SKIPSPACE BEFORE HERE! */
6750                     (PL_expect == XREF ||
6751                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6752                 {
6753                     bool immediate_paren = *s == '(';
6754
6755                     /* (Now we can afford to cross potential line boundary.) */
6756                     s = SKIPSPACE2(s,nextPL_nextwhite);
6757 #ifdef PERL_MAD
6758                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6759 #endif
6760
6761                     /* Two barewords in a row may indicate method call. */
6762
6763                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6764                         (tmp = intuit_method(s, gv, cv))) {
6765                         op_free(rv2cv_op);
6766                         if (tmp == METHOD && !PL_lex_allbrackets &&
6767                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6768                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6769                         return REPORT(tmp);
6770                     }
6771
6772                     /* If not a declared subroutine, it's an indirect object. */
6773                     /* (But it's an indir obj regardless for sort.) */
6774                     /* Also, if "_" follows a filetest operator, it's a bareword */
6775
6776                     if (
6777                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6778                          (!cv &&
6779                         (PL_last_lop_op != OP_MAPSTART &&
6780                          PL_last_lop_op != OP_GREPSTART))))
6781                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6782                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6783                        )
6784                     {
6785                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6786                         goto bareword;
6787                     }
6788                 }
6789
6790                 PL_expect = XOPERATOR;
6791 #ifdef PERL_MAD
6792                 if (isSPACE(*s))
6793                     s = SKIPSPACE2(s,nextPL_nextwhite);
6794                 PL_nextwhite = nextPL_nextwhite;
6795 #else
6796                 s = skipspace(s);
6797 #endif
6798
6799                 /* Is this a word before a => operator? */
6800                 if (*s == '=' && s[1] == '>' && !pkgname) {
6801                     op_free(rv2cv_op);
6802                     CLINE;
6803                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6804                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6805                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6806                     TERM(WORD);
6807                 }
6808
6809                 /* If followed by a paren, it's certainly a subroutine. */
6810                 if (*s == '(') {
6811                     CLINE;
6812                     if (cv) {
6813                         d = s + 1;
6814                         while (SPACE_OR_TAB(*d))
6815                             d++;
6816                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6817                             s = d + 1;
6818                             goto its_constant;
6819                         }
6820                     }
6821 #ifdef PERL_MAD
6822                     if (PL_madskills) {
6823                         PL_nextwhite = PL_thiswhite;
6824                         PL_thiswhite = 0;
6825                     }
6826                     start_force(PL_curforce);
6827 #endif
6828                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6829                     PL_expect = XOPERATOR;
6830 #ifdef PERL_MAD
6831                     if (PL_madskills) {
6832                         PL_nextwhite = nextPL_nextwhite;
6833                         curmad('X', PL_thistoken);
6834                         PL_thistoken = newSVpvs("");
6835                     }
6836 #endif
6837                     op_free(rv2cv_op);
6838                     force_next(WORD);
6839                     pl_yylval.ival = 0;
6840                     TOKEN('&');
6841                 }
6842
6843                 /* If followed by var or block, call it a method (unless sub) */
6844
6845                 if ((*s == '$' || *s == '{') && !cv) {
6846                     op_free(rv2cv_op);
6847                     PL_last_lop = PL_oldbufptr;
6848                     PL_last_lop_op = OP_METHOD;
6849                     if (!PL_lex_allbrackets &&
6850                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6851                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6852                     PREBLOCK(METHOD);
6853                 }
6854
6855                 /* If followed by a bareword, see if it looks like indir obj. */
6856
6857                 if (!orig_keyword
6858                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6859                         && (tmp = intuit_method(s, gv, cv))) {
6860                     op_free(rv2cv_op);
6861                     if (tmp == METHOD && !PL_lex_allbrackets &&
6862                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6863                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6864                     return REPORT(tmp);
6865                 }
6866
6867                 /* Not a method, so call it a subroutine (if defined) */
6868
6869                 if (cv) {
6870                     if (lastchar == '-')
6871                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6872                                          "Ambiguous use of -%s resolved as -&%s()",
6873                                          PL_tokenbuf, PL_tokenbuf);
6874                     /* Check for a constant sub */
6875                     if ((sv = cv_const_sv(cv))) {
6876                   its_constant:
6877                         op_free(rv2cv_op);
6878                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6879                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6880                         pl_yylval.opval->op_private = 0;
6881                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
6882                         TOKEN(WORD);
6883                     }
6884
6885                     op_free(pl_yylval.opval);
6886                     pl_yylval.opval = rv2cv_op;
6887                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6888                     PL_last_lop = PL_oldbufptr;
6889                     PL_last_lop_op = OP_ENTERSUB;
6890                     /* Is there a prototype? */
6891                     if (
6892 #ifdef PERL_MAD
6893                         cv &&
6894 #endif
6895                         SvPOK(cv))
6896                     {
6897                         STRLEN protolen = CvPROTOLEN(cv);
6898                         const char *proto = CvPROTO(cv);
6899                         bool optional;
6900                         if (!protolen)
6901                             TERM(FUNC0SUB);
6902                         if ((optional = *proto == ';'))
6903                           do
6904                             proto++;
6905                           while (*proto == ';');
6906                         if (
6907                             (
6908                                 (
6909                                     *proto == '$' || *proto == '_'
6910                                  || *proto == '*' || *proto == '+'
6911                                 )
6912                              && proto[1] == '\0'
6913                             )
6914                          || (
6915                              *proto == '\\' && proto[1] && proto[2] == '\0'
6916                             )
6917                         )
6918                             UNIPROTO(UNIOPSUB,optional);
6919                         if (*proto == '\\' && proto[1] == '[') {
6920                             const char *p = proto + 2;
6921                             while(*p && *p != ']')
6922                                 ++p;
6923                             if(*p == ']' && !p[1])
6924                                 UNIPROTO(UNIOPSUB,optional);
6925                         }
6926                         if (*proto == '&' && *s == '{') {
6927                             if (PL_curstash)
6928                                 sv_setpvs(PL_subname, "__ANON__");
6929                             else
6930                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6931                             if (!PL_lex_allbrackets &&
6932                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6933                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6934                             PREBLOCK(LSTOPSUB);
6935                         }
6936                     }
6937 #ifdef PERL_MAD
6938                     {
6939                         if (PL_madskills) {
6940                             PL_nextwhite = PL_thiswhite;
6941                             PL_thiswhite = 0;
6942                         }
6943                         start_force(PL_curforce);
6944                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6945                         PL_expect = XTERM;
6946                         if (PL_madskills) {
6947                             PL_nextwhite = nextPL_nextwhite;
6948                             curmad('X', PL_thistoken);
6949                             PL_thistoken = newSVpvs("");
6950                         }
6951                         force_next(WORD);
6952                         if (!PL_lex_allbrackets &&
6953                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6954                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6955                         TOKEN(NOAMP);
6956                     }
6957                 }
6958
6959                 /* Guess harder when madskills require "best effort". */
6960                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6961                     int probable_sub = 0;
6962                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6963                         probable_sub = 1;
6964                     else if (isALPHA(*s)) {
6965                         char tmpbuf[1024];
6966                         STRLEN tmplen;
6967                         d = s;
6968                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6969                         if (!keyword(tmpbuf, tmplen, 0))
6970                             probable_sub = 1;
6971                         else {
6972                             while (d < PL_bufend && isSPACE(*d))
6973                                 d++;
6974                             if (*d == '=' && d[1] == '>')
6975                                 probable_sub = 1;
6976                         }
6977                     }
6978                     if (probable_sub) {
6979                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
6980                                         SVt_PVCV);
6981                         op_free(pl_yylval.opval);
6982                         pl_yylval.opval = rv2cv_op;
6983                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6984                         PL_last_lop = PL_oldbufptr;
6985                         PL_last_lop_op = OP_ENTERSUB;
6986                         PL_nextwhite = PL_thiswhite;
6987                         PL_thiswhite = 0;
6988                         start_force(PL_curforce);
6989                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6990                         PL_expect = XTERM;
6991                         PL_nextwhite = nextPL_nextwhite;
6992                         curmad('X', PL_thistoken);
6993                         PL_thistoken = newSVpvs("");
6994                         force_next(WORD);
6995                         if (!PL_lex_allbrackets &&
6996                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6997                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6998                         TOKEN(NOAMP);
6999                     }
7000 #else
7001                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7002                     PL_expect = XTERM;
7003                     force_next(WORD);
7004                     if (!PL_lex_allbrackets &&
7005                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7006                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7007                     TOKEN(NOAMP);
7008 #endif
7009                 }
7010
7011                 /* Call it a bare word */
7012
7013                 if (PL_hints & HINT_STRICT_SUBS)
7014                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7015                 else {
7016                 bareword:
7017                     /* after "print" and similar functions (corresponding to
7018                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7019                      * a filehandle should be subject to "strict subs".
7020                      * Likewise for the optional indirect-object argument to system
7021                      * or exec, which can't be a bareword */
7022                     if ((PL_last_lop_op == OP_PRINT
7023                             || PL_last_lop_op == OP_PRTF
7024                             || PL_last_lop_op == OP_SAY
7025                             || PL_last_lop_op == OP_SYSTEM
7026                             || PL_last_lop_op == OP_EXEC)
7027                             && (PL_hints & HINT_STRICT_SUBS))
7028                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7029                     if (lastchar != '-') {
7030                         if (ckWARN(WARN_RESERVED)) {
7031                             d = PL_tokenbuf;
7032                             while (isLOWER(*d))
7033                                 d++;
7034                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7035                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7036                                        PL_tokenbuf);
7037                         }
7038                     }
7039                 }
7040                 op_free(rv2cv_op);
7041
7042             safe_bareword:
7043                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7044                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7045                                      "Operator or semicolon missing before %c%"SVf,
7046                                      lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7047                                                     strlen(PL_tokenbuf),
7048                                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
7049                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7050                                      "Ambiguous use of %c resolved as operator %c",
7051                                      lastchar, lastchar);
7052                 }
7053                 TOKEN(WORD);
7054             }
7055
7056         case KEY___FILE__:
7057             FUN0OP(
7058                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7059             );
7060
7061         case KEY___LINE__:
7062             FUN0OP(
7063                 (OP*)newSVOP(OP_CONST, 0,
7064                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7065             );
7066
7067         case KEY___PACKAGE__:
7068             FUN0OP(
7069                 (OP*)newSVOP(OP_CONST, 0,
7070                                         (PL_curstash
7071                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7072                                          : &PL_sv_undef))
7073             );
7074
7075         case KEY___DATA__:
7076         case KEY___END__: {
7077             GV *gv;
7078             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7079                 const char *pname = "main";
7080                 STRLEN plen = 4;
7081                 U32 putf8 = 0;
7082                 if (PL_tokenbuf[2] == 'D')
7083                 {
7084                     HV * const stash =
7085                         PL_curstash ? PL_curstash : PL_defstash;
7086                     pname = HvNAME_get(stash);
7087                     plen  = HvNAMELEN (stash);
7088                     if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7089                 }
7090                 gv = gv_fetchpvn_flags(
7091                         Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7092                         plen+6, GV_ADD|putf8, SVt_PVIO
7093                 );
7094                 GvMULTI_on(gv);
7095                 if (!GvIO(gv))
7096                     GvIOp(gv) = newIO();
7097                 IoIFP(GvIOp(gv)) = PL_rsfp;
7098 #if defined(HAS_FCNTL) && defined(F_SETFD)
7099                 {
7100                     const int fd = PerlIO_fileno(PL_rsfp);
7101                     fcntl(fd,F_SETFD,fd >= 3);
7102                 }
7103 #endif
7104                 /* Mark this internal pseudo-handle as clean */
7105                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7106                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7107                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7108                 else
7109                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7110 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7111                 /* if the script was opened in binmode, we need to revert
7112                  * it to text mode for compatibility; but only iff it has CRs
7113                  * XXX this is a questionable hack at best. */
7114                 if (PL_bufend-PL_bufptr > 2
7115                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7116                 {
7117                     Off_t loc = 0;
7118                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7119                         loc = PerlIO_tell(PL_rsfp);
7120                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7121                     }
7122 #ifdef NETWARE
7123                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7124 #else
7125                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7126 #endif  /* NETWARE */
7127                         if (loc > 0)
7128                             PerlIO_seek(PL_rsfp, loc, 0);
7129                     }
7130                 }
7131 #endif
7132 #ifdef PERLIO_LAYERS
7133                 if (!IN_BYTES) {
7134                     if (UTF)
7135                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7136                     else if (PL_encoding) {
7137                         SV *name;
7138                         dSP;
7139                         ENTER;
7140                         SAVETMPS;
7141                         PUSHMARK(sp);
7142                         EXTEND(SP, 1);
7143                         XPUSHs(PL_encoding);
7144                         PUTBACK;
7145                         call_method("name", G_SCALAR);
7146                         SPAGAIN;
7147                         name = POPs;
7148                         PUTBACK;
7149                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7150                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7151                                                       SVfARG(name)));
7152                         FREETMPS;
7153                         LEAVE;
7154                     }
7155                 }
7156 #endif
7157 #ifdef PERL_MAD
7158                 if (PL_madskills) {
7159                     if (PL_realtokenstart >= 0) {
7160                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7161                         if (!PL_endwhite)
7162                             PL_endwhite = newSVpvs("");
7163                         sv_catsv(PL_endwhite, PL_thiswhite);
7164                         PL_thiswhite = 0;
7165                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7166                         PL_realtokenstart = -1;
7167                     }
7168                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7169                            != NULL) ;
7170                 }
7171 #endif
7172                 PL_rsfp = NULL;
7173             }
7174             goto fake_eof;
7175         }
7176
7177         case KEY___SUB__:
7178             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7179
7180         case KEY_AUTOLOAD:
7181         case KEY_DESTROY:
7182         case KEY_BEGIN:
7183         case KEY_UNITCHECK:
7184         case KEY_CHECK:
7185         case KEY_INIT:
7186         case KEY_END:
7187             if (PL_expect == XSTATE) {
7188                 s = PL_bufptr;
7189                 goto really_sub;
7190             }
7191             goto just_a_word;
7192
7193         case KEY_CORE:
7194             if (*s == ':' && s[1] == ':') {
7195                 s += 2;
7196                 d = s;
7197                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7198                 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
7199                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7200                 if (tmp < 0)
7201                     tmp = -tmp;
7202                 else if (tmp == KEY_require || tmp == KEY_do
7203                       || tmp == KEY_glob)
7204                     /* that's a way to remember we saw "CORE::" */
7205                     orig_keyword = tmp;
7206                 goto reserved_word;
7207             }
7208             goto just_a_word;
7209
7210         case KEY_abs:
7211             UNI(OP_ABS);
7212
7213         case KEY_alarm:
7214             UNI(OP_ALARM);
7215
7216         case KEY_accept:
7217             LOP(OP_ACCEPT,XTERM);
7218
7219         case KEY_and:
7220             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7221                 return REPORT(0);
7222             OPERATOR(ANDOP);
7223
7224         case KEY_atan2:
7225             LOP(OP_ATAN2,XTERM);
7226
7227         case KEY_bind:
7228             LOP(OP_BIND,XTERM);
7229
7230         case KEY_binmode:
7231             LOP(OP_BINMODE,XTERM);
7232
7233         case KEY_bless:
7234             LOP(OP_BLESS,XTERM);
7235
7236         case KEY_break:
7237             FUN0(OP_BREAK);
7238
7239         case KEY_chop:
7240             UNI(OP_CHOP);
7241
7242         case KEY_continue:
7243                     /* We have to disambiguate the two senses of
7244                       "continue". If the next token is a '{' then
7245                       treat it as the start of a continue block;
7246                       otherwise treat it as a control operator.
7247                      */
7248                     s = skipspace(s);
7249                     if (*s == '{')
7250             PREBLOCK(CONTINUE);
7251                     else
7252                         FUN0(OP_CONTINUE);
7253
7254         case KEY_chdir:
7255             /* may use HOME */
7256             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7257             UNI(OP_CHDIR);
7258
7259         case KEY_close:
7260             UNI(OP_CLOSE);
7261
7262         case KEY_closedir:
7263             UNI(OP_CLOSEDIR);
7264
7265         case KEY_cmp:
7266             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7267                 return REPORT(0);
7268             Eop(OP_SCMP);
7269
7270         case KEY_caller:
7271             UNI(OP_CALLER);
7272
7273         case KEY_crypt:
7274 #ifdef FCRYPT
7275             if (!PL_cryptseen) {
7276                 PL_cryptseen = TRUE;
7277                 init_des();
7278             }
7279 #endif
7280             LOP(OP_CRYPT,XTERM);
7281
7282         case KEY_chmod:
7283             LOP(OP_CHMOD,XTERM);
7284
7285         case KEY_chown:
7286             LOP(OP_CHOWN,XTERM);
7287
7288         case KEY_connect:
7289             LOP(OP_CONNECT,XTERM);
7290
7291         case KEY_chr:
7292             UNI(OP_CHR);
7293
7294         case KEY_cos:
7295             UNI(OP_COS);
7296
7297         case KEY_chroot:
7298             UNI(OP_CHROOT);
7299
7300         case KEY_default:
7301             PREBLOCK(DEFAULT);
7302
7303         case KEY_do:
7304             s = SKIPSPACE1(s);
7305             if (*s == '{')
7306                 PRETERMBLOCK(DO);
7307             if (*s != '\'')
7308                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7309             if (orig_keyword == KEY_do) {
7310                 orig_keyword = 0;
7311                 pl_yylval.ival = 1;
7312             }
7313             else
7314                 pl_yylval.ival = 0;
7315             OPERATOR(DO);
7316
7317         case KEY_die:
7318             PL_hints |= HINT_BLOCK_SCOPE;
7319             LOP(OP_DIE,XTERM);
7320
7321         case KEY_defined:
7322             UNI(OP_DEFINED);
7323
7324         case KEY_delete:
7325             UNI(OP_DELETE);
7326
7327         case KEY_dbmopen:
7328             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7329                               STR_WITH_LEN("NDBM_File::"),
7330                               STR_WITH_LEN("DB_File::"),
7331                               STR_WITH_LEN("GDBM_File::"),
7332                               STR_WITH_LEN("SDBM_File::"),
7333                               STR_WITH_LEN("ODBM_File::"),
7334                               NULL);
7335             LOP(OP_DBMOPEN,XTERM);
7336
7337         case KEY_dbmclose:
7338             UNI(OP_DBMCLOSE);
7339
7340         case KEY_dump:
7341             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7342             LOOPX(OP_DUMP);
7343
7344         case KEY_else:
7345             PREBLOCK(ELSE);
7346
7347         case KEY_elsif:
7348             pl_yylval.ival = CopLINE(PL_curcop);
7349             OPERATOR(ELSIF);
7350
7351         case KEY_eq:
7352             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7353                 return REPORT(0);
7354             Eop(OP_SEQ);
7355
7356         case KEY_exists:
7357             UNI(OP_EXISTS);
7358
7359         case KEY_exit:
7360             if (PL_madskills)
7361                 UNI(OP_INT);
7362             UNI(OP_EXIT);
7363
7364         case KEY_eval:
7365             s = SKIPSPACE1(s);
7366             if (*s == '{') { /* block eval */
7367                 PL_expect = XTERMBLOCK;
7368                 UNIBRACK(OP_ENTERTRY);
7369             }
7370             else { /* string eval */
7371                 PL_expect = XTERM;
7372                 UNIBRACK(OP_ENTEREVAL);
7373             }
7374
7375         case KEY_evalbytes:
7376             PL_expect = XTERM;
7377             UNIBRACK(-OP_ENTEREVAL);
7378
7379         case KEY_eof:
7380             UNI(OP_EOF);
7381
7382         case KEY_exp:
7383             UNI(OP_EXP);
7384
7385         case KEY_each:
7386             UNI(OP_EACH);
7387
7388         case KEY_exec:
7389             LOP(OP_EXEC,XREF);
7390
7391         case KEY_endhostent:
7392             FUN0(OP_EHOSTENT);
7393
7394         case KEY_endnetent:
7395             FUN0(OP_ENETENT);
7396
7397         case KEY_endservent:
7398             FUN0(OP_ESERVENT);
7399
7400         case KEY_endprotoent:
7401             FUN0(OP_EPROTOENT);
7402
7403         case KEY_endpwent:
7404             FUN0(OP_EPWENT);
7405
7406         case KEY_endgrent:
7407             FUN0(OP_EGRENT);
7408
7409         case KEY_for:
7410         case KEY_foreach:
7411             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7412                 return REPORT(0);
7413             pl_yylval.ival = CopLINE(PL_curcop);
7414             s = SKIPSPACE1(s);
7415             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7416                 char *p = s;
7417 #ifdef PERL_MAD
7418                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7419 #endif
7420
7421                 if ((PL_bufend - p) >= 3 &&
7422                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7423                     p += 2;
7424                 else if ((PL_bufend - p) >= 4 &&
7425                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7426                     p += 3;
7427                 p = PEEKSPACE(p);
7428                 if (isIDFIRST_lazy_if(p,UTF)) {
7429                     p = scan_ident(p, PL_bufend,
7430                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7431                     p = PEEKSPACE(p);
7432                 }
7433                 if (*p != '$')
7434                     Perl_croak(aTHX_ "Missing $ on loop variable");
7435 #ifdef PERL_MAD
7436                 s = SvPVX(PL_linestr) + soff;
7437 #endif
7438             }
7439             OPERATOR(FOR);
7440
7441         case KEY_formline:
7442             LOP(OP_FORMLINE,XTERM);
7443
7444         case KEY_fork:
7445             FUN0(OP_FORK);
7446
7447         case KEY_fc:
7448             UNI(OP_FC);
7449
7450         case KEY_fcntl:
7451             LOP(OP_FCNTL,XTERM);
7452
7453         case KEY_fileno:
7454             UNI(OP_FILENO);
7455
7456         case KEY_flock:
7457             LOP(OP_FLOCK,XTERM);
7458
7459         case KEY_gt:
7460             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7461                 return REPORT(0);
7462             Rop(OP_SGT);
7463
7464         case KEY_ge:
7465             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7466                 return REPORT(0);
7467             Rop(OP_SGE);
7468
7469         case KEY_grep:
7470             LOP(OP_GREPSTART, XREF);
7471
7472         case KEY_goto:
7473             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7474             LOOPX(OP_GOTO);
7475
7476         case KEY_gmtime:
7477             UNI(OP_GMTIME);
7478
7479         case KEY_getc:
7480             UNIDOR(OP_GETC);
7481
7482         case KEY_getppid:
7483             FUN0(OP_GETPPID);
7484
7485         case KEY_getpgrp:
7486             UNI(OP_GETPGRP);
7487
7488         case KEY_getpriority:
7489             LOP(OP_GETPRIORITY,XTERM);
7490
7491         case KEY_getprotobyname:
7492             UNI(OP_GPBYNAME);
7493
7494         case KEY_getprotobynumber:
7495             LOP(OP_GPBYNUMBER,XTERM);
7496
7497         case KEY_getprotoent:
7498             FUN0(OP_GPROTOENT);
7499
7500         case KEY_getpwent:
7501             FUN0(OP_GPWENT);
7502
7503         case KEY_getpwnam:
7504             UNI(OP_GPWNAM);
7505
7506         case KEY_getpwuid:
7507             UNI(OP_GPWUID);
7508
7509         case KEY_getpeername:
7510             UNI(OP_GETPEERNAME);
7511
7512         case KEY_gethostbyname:
7513             UNI(OP_GHBYNAME);
7514
7515         case KEY_gethostbyaddr:
7516             LOP(OP_GHBYADDR,XTERM);
7517
7518         case KEY_gethostent:
7519             FUN0(OP_GHOSTENT);
7520
7521         case KEY_getnetbyname:
7522             UNI(OP_GNBYNAME);
7523
7524         case KEY_getnetbyaddr:
7525             LOP(OP_GNBYADDR,XTERM);
7526
7527         case KEY_getnetent:
7528             FUN0(OP_GNETENT);
7529
7530         case KEY_getservbyname:
7531             LOP(OP_GSBYNAME,XTERM);
7532
7533         case KEY_getservbyport:
7534             LOP(OP_GSBYPORT,XTERM);
7535
7536         case KEY_getservent:
7537             FUN0(OP_GSERVENT);
7538
7539         case KEY_getsockname:
7540             UNI(OP_GETSOCKNAME);
7541
7542         case KEY_getsockopt:
7543             LOP(OP_GSOCKOPT,XTERM);
7544
7545         case KEY_getgrent:
7546             FUN0(OP_GGRENT);
7547
7548         case KEY_getgrnam:
7549             UNI(OP_GGRNAM);
7550
7551         case KEY_getgrgid:
7552             UNI(OP_GGRGID);
7553
7554         case KEY_getlogin:
7555             FUN0(OP_GETLOGIN);
7556
7557         case KEY_given:
7558             pl_yylval.ival = CopLINE(PL_curcop);
7559             OPERATOR(GIVEN);
7560
7561         case KEY_glob:
7562             LOP(
7563              orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7564              XTERM
7565             );
7566
7567         case KEY_hex:
7568             UNI(OP_HEX);
7569
7570         case KEY_if:
7571             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7572                 return REPORT(0);
7573             pl_yylval.ival = CopLINE(PL_curcop);
7574             OPERATOR(IF);
7575
7576         case KEY_index:
7577             LOP(OP_INDEX,XTERM);
7578
7579         case KEY_int:
7580             UNI(OP_INT);
7581
7582         case KEY_ioctl:
7583             LOP(OP_IOCTL,XTERM);
7584
7585         case KEY_join:
7586             LOP(OP_JOIN,XTERM);
7587
7588         case KEY_keys:
7589             UNI(OP_KEYS);
7590
7591         case KEY_kill:
7592             LOP(OP_KILL,XTERM);
7593
7594         case KEY_last:
7595             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7596             LOOPX(OP_LAST);
7597
7598         case KEY_lc:
7599             UNI(OP_LC);
7600
7601         case KEY_lcfirst:
7602             UNI(OP_LCFIRST);
7603
7604         case KEY_local:
7605             pl_yylval.ival = 0;
7606             OPERATOR(LOCAL);
7607
7608         case KEY_length:
7609             UNI(OP_LENGTH);
7610
7611         case KEY_lt:
7612             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7613                 return REPORT(0);
7614             Rop(OP_SLT);
7615
7616         case KEY_le:
7617             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7618                 return REPORT(0);
7619             Rop(OP_SLE);
7620
7621         case KEY_localtime:
7622             UNI(OP_LOCALTIME);
7623
7624         case KEY_log:
7625             UNI(OP_LOG);
7626
7627         case KEY_link:
7628             LOP(OP_LINK,XTERM);
7629
7630         case KEY_listen:
7631             LOP(OP_LISTEN,XTERM);
7632
7633         case KEY_lock:
7634             UNI(OP_LOCK);
7635
7636         case KEY_lstat:
7637             UNI(OP_LSTAT);
7638
7639         case KEY_m:
7640             s = scan_pat(s,OP_MATCH);
7641             TERM(sublex_start());
7642
7643         case KEY_map:
7644             LOP(OP_MAPSTART, XREF);
7645
7646         case KEY_mkdir:
7647             LOP(OP_MKDIR,XTERM);
7648
7649         case KEY_msgctl:
7650             LOP(OP_MSGCTL,XTERM);
7651
7652         case KEY_msgget:
7653             LOP(OP_MSGGET,XTERM);
7654
7655         case KEY_msgrcv:
7656             LOP(OP_MSGRCV,XTERM);
7657
7658         case KEY_msgsnd:
7659             LOP(OP_MSGSND,XTERM);
7660
7661         case KEY_our:
7662         case KEY_my:
7663         case KEY_state:
7664             PL_in_my = (U16)tmp;
7665             s = SKIPSPACE1(s);
7666             if (isIDFIRST_lazy_if(s,UTF)) {
7667 #ifdef PERL_MAD
7668                 char* start = s;
7669 #endif
7670                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7671                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7672                     goto really_sub;
7673                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7674                 if (!PL_in_my_stash) {
7675                     char tmpbuf[1024];
7676                     PL_bufptr = s;
7677                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7678                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7679                 }
7680 #ifdef PERL_MAD
7681                 if (PL_madskills) {     /* just add type to declarator token */
7682                     sv_catsv(PL_thistoken, PL_nextwhite);
7683                     PL_nextwhite = 0;
7684                     sv_catpvn(PL_thistoken, start, s - start);
7685                 }
7686 #endif
7687             }
7688             pl_yylval.ival = 1;
7689             OPERATOR(MY);
7690
7691         case KEY_next:
7692             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7693             LOOPX(OP_NEXT);
7694
7695         case KEY_ne:
7696             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7697                 return REPORT(0);
7698             Eop(OP_SNE);
7699
7700         case KEY_no:
7701             s = tokenize_use(0, s);
7702             OPERATOR(USE);
7703
7704         case KEY_not:
7705             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7706                 FUN1(OP_NOT);
7707             else {
7708                 if (!PL_lex_allbrackets &&
7709                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7710                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7711                 OPERATOR(NOTOP);
7712             }
7713
7714         case KEY_open:
7715             s = SKIPSPACE1(s);
7716             if (isIDFIRST_lazy_if(s,UTF)) {
7717                 const char *t;
7718                 for (d = s; isALNUM_lazy_if(d,UTF);)
7719                     d++;
7720                 for (t=d; isSPACE(*t);)
7721                     t++;
7722                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7723                     /* [perl #16184] */
7724                     && !(t[0] == '=' && t[1] == '>')
7725                     && !(t[0] == ':' && t[1] == ':')
7726                     && !keyword(s, d-s, 0)
7727                 ) {
7728                     int parms_len = (int)(d-s);
7729                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7730                            "Precedence problem: open %.*s should be open(%.*s)",
7731                             parms_len, s, parms_len, s);
7732                 }
7733             }
7734             LOP(OP_OPEN,XTERM);
7735
7736         case KEY_or:
7737             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7738                 return REPORT(0);
7739             pl_yylval.ival = OP_OR;
7740             OPERATOR(OROP);
7741
7742         case KEY_ord:
7743             UNI(OP_ORD);
7744
7745         case KEY_oct:
7746             UNI(OP_OCT);
7747
7748         case KEY_opendir:
7749             LOP(OP_OPEN_DIR,XTERM);
7750
7751         case KEY_print:
7752             checkcomma(s,PL_tokenbuf,"filehandle");
7753             LOP(OP_PRINT,XREF);
7754
7755         case KEY_printf:
7756             checkcomma(s,PL_tokenbuf,"filehandle");
7757             LOP(OP_PRTF,XREF);
7758
7759         case KEY_prototype:
7760             UNI(OP_PROTOTYPE);
7761
7762         case KEY_push:
7763             LOP(OP_PUSH,XTERM);
7764
7765         case KEY_pop:
7766             UNIDOR(OP_POP);
7767
7768         case KEY_pos:
7769             UNIDOR(OP_POS);
7770
7771         case KEY_pack:
7772             LOP(OP_PACK,XTERM);
7773
7774         case KEY_package:
7775             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7776             s = SKIPSPACE1(s);
7777             s = force_strict_version(s);
7778             PL_lex_expect = XBLOCK;
7779             OPERATOR(PACKAGE);
7780
7781         case KEY_pipe:
7782             LOP(OP_PIPE_OP,XTERM);
7783
7784         case KEY_q:
7785             s = scan_str(s,!!PL_madskills,FALSE);
7786             if (!s)
7787                 missingterm(NULL);
7788             pl_yylval.ival = OP_CONST;
7789             TERM(sublex_start());
7790
7791         case KEY_quotemeta:
7792             UNI(OP_QUOTEMETA);
7793
7794         case KEY_qw: {
7795             OP *words = NULL;
7796             s = scan_str(s,!!PL_madskills,FALSE);
7797             if (!s)
7798                 missingterm(NULL);
7799             PL_expect = XOPERATOR;
7800             if (SvCUR(PL_lex_stuff)) {
7801                 int warned_comma = !ckWARN(WARN_QW);
7802                 int warned_comment = warned_comma;
7803                 d = SvPV_force(PL_lex_stuff, len);
7804                 while (len) {
7805                     for (; isSPACE(*d) && len; --len, ++d)
7806                         /**/;
7807                     if (len) {
7808                         SV *sv;
7809                         const char *b = d;
7810                         if (!warned_comma || !warned_comment) {
7811                             for (; !isSPACE(*d) && len; --len, ++d) {
7812                                 if (!warned_comma && *d == ',') {
7813                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7814                                         "Possible attempt to separate words with commas");
7815                                     ++warned_comma;
7816                                 }
7817                                 else if (!warned_comment && *d == '#') {
7818                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7819                                         "Possible attempt to put comments in qw() list");
7820                                     ++warned_comment;
7821                                 }
7822                             }
7823                         }
7824                         else {
7825                             for (; !isSPACE(*d) && len; --len, ++d)
7826                                 /**/;
7827                         }
7828                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7829                         words = op_append_elem(OP_LIST, words,
7830                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7831                     }
7832                 }
7833             }
7834             if (!words)
7835                 words = newNULLLIST();
7836             if (PL_lex_stuff) {
7837                 SvREFCNT_dec(PL_lex_stuff);
7838                 PL_lex_stuff = NULL;
7839             }
7840             PL_expect = XOPERATOR;
7841             pl_yylval.opval = sawparens(words);
7842             TOKEN(QWLIST);
7843         }
7844
7845         case KEY_qq:
7846             s = scan_str(s,!!PL_madskills,FALSE);
7847             if (!s)
7848                 missingterm(NULL);
7849             pl_yylval.ival = OP_STRINGIFY;
7850             if (SvIVX(PL_lex_stuff) == '\'')
7851                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7852             TERM(sublex_start());
7853
7854         case KEY_qr:
7855             s = scan_pat(s,OP_QR);
7856             TERM(sublex_start());
7857
7858         case KEY_qx:
7859             s = scan_str(s,!!PL_madskills,FALSE);
7860             if (!s)
7861                 missingterm(NULL);
7862             readpipe_override();
7863             TERM(sublex_start());
7864
7865         case KEY_return:
7866             OLDLOP(OP_RETURN);
7867
7868         case KEY_require:
7869             s = SKIPSPACE1(s);
7870             if (isDIGIT(*s)) {
7871                 s = force_version(s, FALSE);
7872             }
7873             else if (*s != 'v' || !isDIGIT(s[1])
7874                     || (s = force_version(s, TRUE), *s == 'v'))
7875             {
7876                 *PL_tokenbuf = '\0';
7877                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7878                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7879                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7880                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
7881                 else if (*s == '<')
7882                     yyerror("<> should be quotes");
7883             }
7884             if (orig_keyword == KEY_require) {
7885                 orig_keyword = 0;
7886                 pl_yylval.ival = 1;
7887             }
7888             else
7889                 pl_yylval.ival = 0;
7890             PL_expect = XTERM;
7891             PL_bufptr = s;
7892             PL_last_uni = PL_oldbufptr;
7893             PL_last_lop_op = OP_REQUIRE;
7894             s = skipspace(s);
7895             return REPORT( (int)REQUIRE );
7896
7897         case KEY_reset:
7898             UNI(OP_RESET);
7899
7900         case KEY_redo:
7901             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7902             LOOPX(OP_REDO);
7903
7904         case KEY_rename:
7905             LOP(OP_RENAME,XTERM);
7906
7907         case KEY_rand:
7908             UNI(OP_RAND);
7909
7910         case KEY_rmdir:
7911             UNI(OP_RMDIR);
7912
7913         case KEY_rindex:
7914             LOP(OP_RINDEX,XTERM);
7915
7916         case KEY_read:
7917             LOP(OP_READ,XTERM);
7918
7919         case KEY_readdir:
7920             UNI(OP_READDIR);
7921
7922         case KEY_readline:
7923             UNIDOR(OP_READLINE);
7924
7925         case KEY_readpipe:
7926             UNIDOR(OP_BACKTICK);
7927
7928         case KEY_rewinddir:
7929             UNI(OP_REWINDDIR);
7930
7931         case KEY_recv:
7932             LOP(OP_RECV,XTERM);
7933
7934         case KEY_reverse:
7935             LOP(OP_REVERSE,XTERM);
7936
7937         case KEY_readlink:
7938             UNIDOR(OP_READLINK);
7939
7940         case KEY_ref:
7941             UNI(OP_REF);
7942
7943         case KEY_s:
7944             s = scan_subst(s);
7945             if (pl_yylval.opval)
7946                 TERM(sublex_start());
7947             else
7948                 TOKEN(1);       /* force error */
7949
7950         case KEY_say:
7951             checkcomma(s,PL_tokenbuf,"filehandle");
7952             LOP(OP_SAY,XREF);
7953
7954         case KEY_chomp:
7955             UNI(OP_CHOMP);
7956
7957         case KEY_scalar:
7958             UNI(OP_SCALAR);
7959
7960         case KEY_select:
7961             LOP(OP_SELECT,XTERM);
7962
7963         case KEY_seek:
7964             LOP(OP_SEEK,XTERM);
7965
7966         case KEY_semctl:
7967             LOP(OP_SEMCTL,XTERM);
7968
7969         case KEY_semget:
7970             LOP(OP_SEMGET,XTERM);
7971
7972         case KEY_semop:
7973             LOP(OP_SEMOP,XTERM);
7974
7975         case KEY_send:
7976             LOP(OP_SEND,XTERM);
7977
7978         case KEY_setpgrp:
7979             LOP(OP_SETPGRP,XTERM);
7980
7981         case KEY_setpriority:
7982             LOP(OP_SETPRIORITY,XTERM);
7983
7984         case KEY_sethostent:
7985             UNI(OP_SHOSTENT);
7986
7987         case KEY_setnetent:
7988             UNI(OP_SNETENT);
7989
7990         case KEY_setservent:
7991             UNI(OP_SSERVENT);
7992
7993         case KEY_setprotoent:
7994             UNI(OP_SPROTOENT);
7995
7996         case KEY_setpwent:
7997             FUN0(OP_SPWENT);
7998
7999         case KEY_setgrent:
8000             FUN0(OP_SGRENT);
8001
8002         case KEY_seekdir:
8003             LOP(OP_SEEKDIR,XTERM);
8004
8005         case KEY_setsockopt:
8006             LOP(OP_SSOCKOPT,XTERM);
8007
8008         case KEY_shift:
8009             UNIDOR(OP_SHIFT);
8010
8011         case KEY_shmctl:
8012             LOP(OP_SHMCTL,XTERM);
8013
8014         case KEY_shmget:
8015             LOP(OP_SHMGET,XTERM);
8016
8017         case KEY_shmread:
8018             LOP(OP_SHMREAD,XTERM);
8019
8020         case KEY_shmwrite:
8021             LOP(OP_SHMWRITE,XTERM);
8022
8023         case KEY_shutdown:
8024             LOP(OP_SHUTDOWN,XTERM);
8025
8026         case KEY_sin:
8027             UNI(OP_SIN);
8028
8029         case KEY_sleep:
8030             UNI(OP_SLEEP);
8031
8032         case KEY_socket:
8033             LOP(OP_SOCKET,XTERM);
8034
8035         case KEY_socketpair:
8036             LOP(OP_SOCKPAIR,XTERM);
8037
8038         case KEY_sort:
8039             checkcomma(s,PL_tokenbuf,"subroutine name");
8040             s = SKIPSPACE1(s);
8041             PL_expect = XTERM;
8042             s = force_word(s,WORD,TRUE,TRUE,FALSE);
8043             LOP(OP_SORT,XREF);
8044
8045         case KEY_split:
8046             LOP(OP_SPLIT,XTERM);
8047
8048         case KEY_sprintf:
8049             LOP(OP_SPRINTF,XTERM);
8050
8051         case KEY_splice:
8052             LOP(OP_SPLICE,XTERM);
8053
8054         case KEY_sqrt:
8055             UNI(OP_SQRT);
8056
8057         case KEY_srand:
8058             UNI(OP_SRAND);
8059
8060         case KEY_stat:
8061             UNI(OP_STAT);
8062
8063         case KEY_study:
8064             UNI(OP_STUDY);
8065
8066         case KEY_substr:
8067             LOP(OP_SUBSTR,XTERM);
8068
8069         case KEY_format:
8070         case KEY_sub:
8071           really_sub:
8072             {
8073                 char tmpbuf[sizeof PL_tokenbuf];
8074                 SSize_t tboffset = 0;
8075                 expectation attrful;
8076                 bool have_name, have_proto;
8077                 const int key = tmp;
8078
8079 #ifdef PERL_MAD
8080                 SV *tmpwhite = 0;
8081
8082                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8083                 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
8084                 PL_thistoken = 0;
8085
8086                 d = s;
8087                 s = SKIPSPACE2(s,tmpwhite);
8088 #else
8089                 s = skipspace(s);
8090 #endif
8091
8092                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8093                     (*s == ':' && s[1] == ':'))
8094                 {
8095 #ifdef PERL_MAD
8096                     SV *nametoke = NULL;
8097 #endif
8098
8099                     PL_expect = XBLOCK;
8100                     attrful = XATTRBLOCK;
8101                     /* remember buffer pos'n for later force_word */
8102                     tboffset = s - PL_oldbufptr;
8103                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
8104 #ifdef PERL_MAD
8105                     if (PL_madskills)
8106                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8107 #endif
8108                     if (memchr(tmpbuf, ':', len))
8109                         sv_setpvn(PL_subname, tmpbuf, len);
8110                     else {
8111                         sv_setsv(PL_subname,PL_curstname);
8112                         sv_catpvs(PL_subname,"::");
8113                         sv_catpvn(PL_subname,tmpbuf,len);
8114                     }
8115                     if (SvUTF8(PL_linestr))
8116                         SvUTF8_on(PL_subname);
8117                     have_name = TRUE;
8118
8119 #ifdef PERL_MAD
8120
8121                     start_force(0);
8122                     CURMAD('X', nametoke);
8123                     CURMAD('_', tmpwhite);
8124                     (void) force_word(PL_oldbufptr + tboffset, WORD,
8125                                       FALSE, TRUE, TRUE);
8126
8127                     s = SKIPSPACE2(d,tmpwhite);
8128 #else
8129                     s = skipspace(d);
8130 #endif
8131                 }
8132                 else {
8133                     if (key == KEY_my)
8134                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
8135                     PL_expect = XTERMBLOCK;
8136                     attrful = XATTRTERM;
8137                     sv_setpvs(PL_subname,"?");
8138                     have_name = FALSE;
8139                 }
8140
8141                 if (key == KEY_format) {
8142                     if (*s == '=')
8143                         PL_lex_formbrack = PL_lex_brackets + 1;
8144 #ifdef PERL_MAD
8145                     PL_thistoken = subtoken;
8146                     s = d;
8147 #else
8148                     if (have_name)
8149                         (void) force_word(PL_oldbufptr + tboffset, WORD,
8150                                           FALSE, TRUE, TRUE);
8151 #endif
8152                     OPERATOR(FORMAT);
8153                 }
8154
8155                 /* Look for a prototype */
8156                 if (*s == '(') {
8157                     char *p;
8158                     bool bad_proto = FALSE;
8159                     bool in_brackets = FALSE;
8160                     char greedy_proto = ' ';
8161                     bool proto_after_greedy_proto = FALSE;
8162                     bool must_be_last = FALSE;
8163                     bool underscore = FALSE;
8164                     bool seen_underscore = FALSE;
8165                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8166                     STRLEN tmplen;
8167
8168                     s = scan_str(s,!!PL_madskills,FALSE);
8169                     if (!s)
8170                         Perl_croak(aTHX_ "Prototype not terminated");
8171                     /* strip spaces and check for bad characters */
8172                     d = SvPV(PL_lex_stuff, tmplen);
8173                     tmp = 0;
8174                     for (p = d; tmplen; tmplen--, ++p) {
8175                         if (!isSPACE(*p)) {
8176                             d[tmp++] = *p;
8177
8178                             if (warnillegalproto) {
8179                                 if (must_be_last)
8180                                     proto_after_greedy_proto = TRUE;
8181                                 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8182                                     bad_proto = TRUE;
8183                                 }
8184                                 else {
8185                                     if ( underscore ) {
8186                                         if ( !strchr(";@%", *p) )
8187                                             bad_proto = TRUE;
8188                                         underscore = FALSE;
8189                                     }
8190                                     if ( *p == '[' ) {
8191                                         in_brackets = TRUE;
8192                                     }
8193                                     else if ( *p == ']' ) {
8194                                         in_brackets = FALSE;
8195                                     }
8196                                     else if ( (*p == '@' || *p == '%') &&
8197                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8198                                          !in_brackets ) {
8199                                         must_be_last = TRUE;
8200                                         greedy_proto = *p;
8201                                     }
8202                                     else if ( *p == '_' ) {
8203                                         underscore = seen_underscore = TRUE;
8204                                     }
8205                                 }
8206                             }
8207                         }
8208                     }
8209                     d[tmp] = '\0';
8210                     if (proto_after_greedy_proto)
8211                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8212                                     "Prototype after '%c' for %"SVf" : %s",
8213                                     greedy_proto, SVfARG(PL_subname), d);
8214                     if (bad_proto) {
8215                         SV *dsv = newSVpvs_flags("", SVs_TEMP);
8216                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8217                                     "Illegal character %sin prototype for %"SVf" : %s",
8218                                     seen_underscore ? "after '_' " : "",
8219                                     SVfARG(PL_subname),
8220                                     sv_uni_display(dsv,
8221                                          newSVpvn_flags(d, tmp, SVs_TEMP | SvUTF8(PL_lex_stuff)),
8222                                          tmp, UNI_DISPLAY_ISPRINT));
8223                     }
8224                     SvCUR_set(PL_lex_stuff, tmp);
8225                     have_proto = TRUE;
8226
8227 #ifdef PERL_MAD
8228                     start_force(0);
8229                     CURMAD('q', PL_thisopen);
8230                     CURMAD('_', tmpwhite);
8231                     CURMAD('=', PL_thisstuff);
8232                     CURMAD('Q', PL_thisclose);
8233                     NEXTVAL_NEXTTOKE.opval =
8234                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8235                     PL_lex_stuff = NULL;
8236                     force_next(THING);
8237
8238                     s = SKIPSPACE2(s,tmpwhite);
8239 #else
8240                     s = skipspace(s);
8241 #endif
8242                 }
8243                 else
8244                     have_proto = FALSE;
8245
8246                 if (*s == ':' && s[1] != ':')
8247                     PL_expect = attrful;
8248                 else if (*s != '{' && key == KEY_sub) {
8249                     if (!have_name)
8250                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8251                     else if (*s != ';' && *s != '}')
8252                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8253                 }
8254
8255 #ifdef PERL_MAD
8256                 start_force(0);
8257                 if (tmpwhite) {
8258                     if (PL_madskills)
8259                         curmad('^', newSVpvs(""));
8260                     CURMAD('_', tmpwhite);
8261                 }
8262                 force_next(0);
8263
8264                 PL_thistoken = subtoken;
8265 #else
8266                 if (have_proto) {
8267                     NEXTVAL_NEXTTOKE.opval =
8268                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8269                     PL_lex_stuff = NULL;
8270                     force_next(THING);
8271                 }
8272 #endif
8273                 if (!have_name) {
8274                     if (PL_curstash)
8275                         sv_setpvs(PL_subname, "__ANON__");
8276                     else
8277                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8278                     TOKEN(ANONSUB);
8279                 }
8280 #ifndef PERL_MAD
8281                 (void) force_word(PL_oldbufptr + tboffset, WORD,
8282                                   FALSE, TRUE, TRUE);
8283 #endif
8284                 if (key == KEY_my)
8285                     TOKEN(MYSUB);
8286                 TOKEN(SUB);
8287             }
8288
8289         case KEY_system:
8290             LOP(OP_SYSTEM,XREF);
8291
8292         case KEY_symlink:
8293             LOP(OP_SYMLINK,XTERM);
8294
8295         case KEY_syscall:
8296             LOP(OP_SYSCALL,XTERM);
8297
8298         case KEY_sysopen:
8299             LOP(OP_SYSOPEN,XTERM);
8300
8301         case KEY_sysseek:
8302             LOP(OP_SYSSEEK,XTERM);
8303
8304         case KEY_sysread:
8305             LOP(OP_SYSREAD,XTERM);
8306
8307         case KEY_syswrite:
8308             LOP(OP_SYSWRITE,XTERM);
8309
8310         case KEY_tr:
8311             s = scan_trans(s);
8312             TERM(sublex_start());
8313
8314         case KEY_tell:
8315             UNI(OP_TELL);
8316
8317         case KEY_telldir:
8318             UNI(OP_TELLDIR);
8319
8320         case KEY_tie:
8321             LOP(OP_TIE,XTERM);
8322
8323         case KEY_tied:
8324             UNI(OP_TIED);
8325
8326         case KEY_time:
8327             FUN0(OP_TIME);
8328
8329         case KEY_times:
8330             FUN0(OP_TMS);
8331
8332         case KEY_truncate:
8333             LOP(OP_TRUNCATE,XTERM);
8334
8335         case KEY_uc:
8336             UNI(OP_UC);
8337
8338         case KEY_ucfirst:
8339             UNI(OP_UCFIRST);
8340
8341         case KEY_untie:
8342             UNI(OP_UNTIE);
8343
8344         case KEY_until:
8345             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8346                 return REPORT(0);
8347             pl_yylval.ival = CopLINE(PL_curcop);
8348             OPERATOR(UNTIL);
8349
8350         case KEY_unless:
8351             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8352                 return REPORT(0);
8353             pl_yylval.ival = CopLINE(PL_curcop);
8354             OPERATOR(UNLESS);
8355
8356         case KEY_unlink:
8357             LOP(OP_UNLINK,XTERM);
8358
8359         case KEY_undef:
8360             UNIDOR(OP_UNDEF);
8361
8362         case KEY_unpack:
8363             LOP(OP_UNPACK,XTERM);
8364
8365         case KEY_utime:
8366             LOP(OP_UTIME,XTERM);
8367
8368         case KEY_umask:
8369             UNIDOR(OP_UMASK);
8370
8371         case KEY_unshift:
8372             LOP(OP_UNSHIFT,XTERM);
8373
8374         case KEY_use:
8375             s = tokenize_use(1, s);
8376             OPERATOR(USE);
8377
8378         case KEY_values:
8379             UNI(OP_VALUES);
8380
8381         case KEY_vec:
8382             LOP(OP_VEC,XTERM);
8383
8384         case KEY_when:
8385             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8386                 return REPORT(0);
8387             pl_yylval.ival = CopLINE(PL_curcop);
8388             OPERATOR(WHEN);
8389
8390         case KEY_while:
8391             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8392                 return REPORT(0);
8393             pl_yylval.ival = CopLINE(PL_curcop);
8394             OPERATOR(WHILE);
8395
8396         case KEY_warn:
8397             PL_hints |= HINT_BLOCK_SCOPE;
8398             LOP(OP_WARN,XTERM);
8399
8400         case KEY_wait:
8401             FUN0(OP_WAIT);
8402
8403         case KEY_waitpid:
8404             LOP(OP_WAITPID,XTERM);
8405
8406         case KEY_wantarray:
8407             FUN0(OP_WANTARRAY);
8408
8409         case KEY_write:
8410 #ifdef EBCDIC
8411         {
8412             char ctl_l[2];
8413             ctl_l[0] = toCTRL('L');
8414             ctl_l[1] = '\0';
8415             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8416         }
8417 #else
8418             /* Make sure $^L is defined */
8419             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8420 #endif
8421             UNI(OP_ENTERWRITE);
8422
8423         case KEY_x:
8424             if (PL_expect == XOPERATOR) {
8425                 if (*s == '=' && !PL_lex_allbrackets &&
8426                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8427                     return REPORT(0);
8428                 Mop(OP_REPEAT);
8429             }
8430             check_uni();
8431             goto just_a_word;
8432
8433         case KEY_xor:
8434             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8435                 return REPORT(0);
8436             pl_yylval.ival = OP_XOR;
8437             OPERATOR(OROP);
8438
8439         case KEY_y:
8440             s = scan_trans(s);
8441             TERM(sublex_start());
8442         }
8443     }}
8444 }
8445 #ifdef __SC__
8446 #pragma segment Main
8447 #endif
8448
8449 static int
8450 S_pending_ident(pTHX)
8451 {
8452     dVAR;
8453     register char *d;
8454     PADOFFSET tmp = 0;
8455     /* pit holds the identifier we read and pending_ident is reset */
8456     char pit = PL_pending_ident;
8457     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8458     /* All routes through this function want to know if there is a colon.  */
8459     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8460     PL_pending_ident = 0;
8461
8462     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8463     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8464           "### Pending identifier '%s'\n", PL_tokenbuf); });
8465
8466     /* if we're in a my(), we can't allow dynamics here.
8467        $foo'bar has already been turned into $foo::bar, so
8468        just check for colons.
8469
8470        if it's a legal name, the OP is a PADANY.
8471     */
8472     if (PL_in_my) {
8473         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8474             if (has_colon)
8475                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8476                                   "variable %s in \"our\"",
8477                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8478             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8479         }
8480         else {
8481             if (has_colon)
8482                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8483                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8484                             UTF ? SVf_UTF8 : 0);
8485
8486             pl_yylval.opval = newOP(OP_PADANY, 0);
8487             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8488                                                         UTF ? SVf_UTF8 : 0);
8489             return PRIVATEREF;
8490         }
8491     }
8492
8493     /*
8494        build the ops for accesses to a my() variable.
8495
8496        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8497        then used in a comparison.  This catches most, but not
8498        all cases.  For instance, it catches
8499            sort { my($a); $a <=> $b }
8500        but not
8501            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8502        (although why you'd do that is anyone's guess).
8503     */
8504
8505     if (!has_colon) {
8506         if (!PL_in_my)
8507             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8508                                     UTF ? SVf_UTF8 : 0);
8509         if (tmp != NOT_IN_PAD) {
8510             /* might be an "our" variable" */
8511             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8512                 /* build ops for a bareword */
8513                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8514                 HEK * const stashname = HvNAME_HEK(stash);
8515                 SV *  const sym = newSVhek(stashname);
8516                 sv_catpvs(sym, "::");
8517                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8518                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8519                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8520                 gv_fetchsv(sym,
8521                     (PL_in_eval
8522                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8523                         : GV_ADDMULTI
8524                     ),
8525                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8526                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8527                      : SVt_PVHV));
8528                 return WORD;
8529             }
8530
8531             /* if it's a sort block and they're naming $a or $b */
8532             if (PL_last_lop_op == OP_SORT &&
8533                 PL_tokenbuf[0] == '$' &&
8534                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8535                 && !PL_tokenbuf[2])
8536             {
8537                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8538                      d < PL_bufend && *d != '\n';
8539                      d++)
8540                 {
8541                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8542                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8543                               PL_tokenbuf);
8544                     }
8545                 }
8546             }
8547
8548             pl_yylval.opval = newOP(OP_PADANY, 0);
8549             pl_yylval.opval->op_targ = tmp;
8550             return PRIVATEREF;
8551         }
8552     }
8553
8554     /*
8555        Whine if they've said @foo in a doublequoted string,
8556        and @foo isn't a variable we can find in the symbol
8557        table.
8558     */
8559     if (ckWARN(WARN_AMBIGUOUS) &&
8560         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8561         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8562                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8563         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8564                 /* DO NOT warn for @- and @+ */
8565                 && !( PL_tokenbuf[2] == '\0' &&
8566                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8567            )
8568         {
8569             /* Downgraded from fatal to warning 20000522 mjd */
8570             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8571                         "Possible unintended interpolation of %"SVf" in string",
8572                         SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8573                                         SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8574         }
8575     }
8576
8577     /* build ops for a bareword */
8578     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
8579                                                       tokenbuf_len - 1,
8580                                                       UTF ? SVf_UTF8 : 0 ));
8581     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8582     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8583                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8584                      | ( UTF ? SVf_UTF8 : 0 ),
8585                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8586                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8587                       : SVt_PVHV));
8588     return WORD;
8589 }
8590
8591 STATIC void
8592 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8593 {
8594     dVAR;
8595
8596     PERL_ARGS_ASSERT_CHECKCOMMA;
8597
8598     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8599         if (ckWARN(WARN_SYNTAX)) {
8600             int level = 1;
8601             const char *w;
8602             for (w = s+2; *w && level; w++) {
8603                 if (*w == '(')
8604                     ++level;
8605                 else if (*w == ')')
8606                     --level;
8607             }
8608             while (isSPACE(*w))
8609                 ++w;
8610             /* the list of chars below is for end of statements or
8611              * block / parens, boolean operators (&&, ||, //) and branch
8612              * constructs (or, and, if, until, unless, while, err, for).
8613              * Not a very solid hack... */
8614             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8615                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8616                             "%s (...) interpreted as function",name);
8617         }
8618     }
8619     while (s < PL_bufend && isSPACE(*s))
8620         s++;
8621     if (*s == '(')
8622         s++;
8623     while (s < PL_bufend && isSPACE(*s))
8624         s++;
8625     if (isIDFIRST_lazy_if(s,UTF)) {
8626         const char * const w = s;
8627         s += UTF ? UTF8SKIP(s) : 1;
8628         while (isALNUM_lazy_if(s,UTF))
8629             s += UTF ? UTF8SKIP(s) : 1;
8630         while (s < PL_bufend && isSPACE(*s))
8631             s++;
8632         if (*s == ',') {
8633             GV* gv;
8634             if (keyword(w, s - w, 0))
8635                 return;
8636
8637             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8638             if (gv && GvCVu(gv))
8639                 return;
8640             Perl_croak(aTHX_ "No comma allowed after %s", what);
8641         }
8642     }
8643 }
8644
8645 /* Either returns sv, or mortalizes sv and returns a new SV*.
8646    Best used as sv=new_constant(..., sv, ...).
8647    If s, pv are NULL, calls subroutine with one argument,
8648    and type is used with error messages only. */
8649
8650 STATIC SV *
8651 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8652                SV *sv, SV *pv, const char *type, STRLEN typelen)
8653 {
8654     dVAR; dSP;
8655     HV * table = GvHV(PL_hintgv);                /* ^H */
8656     SV *res;
8657     SV **cvp;
8658     SV *cv, *typesv;
8659     const char *why1 = "", *why2 = "", *why3 = "";
8660
8661     PERL_ARGS_ASSERT_NEW_CONSTANT;
8662
8663     /* charnames doesn't work well if there have been errors found */
8664     if (PL_error_count > 0 && strEQ(key,"charnames"))
8665         return &PL_sv_undef;
8666
8667     if (!table
8668         || ! (PL_hints & HINT_LOCALIZE_HH)
8669         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8670         || ! SvOK(*cvp))
8671     {
8672         SV *msg;
8673
8674         /* Here haven't found what we're looking for.  If it is charnames,
8675          * perhaps it needs to be loaded.  Try doing that before giving up */
8676         if (strEQ(key,"charnames")) {
8677             Perl_load_module(aTHX_
8678                             0,
8679                             newSVpvs("_charnames"),
8680                              /* version parameter; no need to specify it, as if
8681                               * we get too early a version, will fail anyway,
8682                               * not being able to find '_charnames' */
8683                             NULL,
8684                             newSVpvs(":full"),
8685                             newSVpvs(":short"),
8686                             NULL);
8687             SPAGAIN;
8688             table = GvHV(PL_hintgv);
8689             if (table
8690                 && (PL_hints & HINT_LOCALIZE_HH)
8691                 && (cvp = hv_fetch(table, key, keylen, FALSE))
8692                 && SvOK(*cvp))
8693             {
8694                 goto now_ok;
8695             }
8696         }
8697         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8698             msg = Perl_newSVpvf(aTHX_
8699                             "Constant(%s) unknown", (type ? type: "undef"));
8700         }
8701         else {
8702         why1 = "$^H{";
8703         why2 = key;
8704         why3 = "} is not defined";
8705     report:
8706         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8707                             (type ? type: "undef"), why1, why2, why3);
8708         }
8709         yyerror(SvPVX_const(msg));
8710         SvREFCNT_dec(msg);
8711         return sv;
8712     }
8713 now_ok:
8714     sv_2mortal(sv);                     /* Parent created it permanently */
8715     cv = *cvp;
8716     if (!pv && s)
8717         pv = newSVpvn_flags(s, len, SVs_TEMP);
8718     if (type && pv)
8719         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8720     else
8721         typesv = &PL_sv_undef;
8722
8723     PUSHSTACKi(PERLSI_OVERLOAD);
8724     ENTER ;
8725     SAVETMPS;
8726
8727     PUSHMARK(SP) ;
8728     EXTEND(sp, 3);
8729     if (pv)
8730         PUSHs(pv);
8731     PUSHs(sv);
8732     if (pv)
8733         PUSHs(typesv);
8734     PUTBACK;
8735     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8736
8737     SPAGAIN ;
8738
8739     /* Check the eval first */
8740     if (!PL_in_eval && SvTRUE(ERRSV)) {
8741         sv_catpvs(ERRSV, "Propagated");
8742         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8743         (void)POPs;
8744         res = SvREFCNT_inc_simple(sv);
8745     }
8746     else {
8747         res = POPs;
8748         SvREFCNT_inc_simple_void(res);
8749     }
8750
8751     PUTBACK ;
8752     FREETMPS ;
8753     LEAVE ;
8754     POPSTACK;
8755
8756     if (!SvOK(res)) {
8757         why1 = "Call to &{$^H{";
8758         why2 = key;
8759         why3 = "}} did not return a defined value";
8760         sv = res;
8761         goto report;
8762     }
8763
8764     return res;
8765 }
8766
8767 /* Returns a NUL terminated string, with the length of the string written to
8768    *slp
8769    */
8770 STATIC char *
8771 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8772 {
8773     dVAR;
8774     register char *d = dest;
8775     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8776
8777     PERL_ARGS_ASSERT_SCAN_WORD;
8778
8779     for (;;) {
8780         if (d >= e)
8781             Perl_croak(aTHX_ ident_too_long);
8782         if (isALNUM(*s))        /* UTF handled below */
8783             *d++ = *s++;
8784         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8785             *d++ = ':';
8786             *d++ = ':';
8787             s++;
8788         }
8789         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8790             *d++ = *s++;
8791             *d++ = *s++;
8792         }
8793         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8794             char *t = s + UTF8SKIP(s);
8795             size_t len;
8796             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8797                 t += UTF8SKIP(t);
8798             len = t - s;
8799             if (d + len > e)
8800                 Perl_croak(aTHX_ ident_too_long);
8801             Copy(s, d, len, char);
8802             d += len;
8803             s = t;
8804         }
8805         else {
8806             *d = '\0';
8807             *slp = d - dest;
8808             return s;
8809         }
8810     }
8811 }
8812
8813 STATIC char *
8814 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8815 {
8816     dVAR;
8817     char *bracket = NULL;
8818     char funny = *s++;
8819     register char *d = dest;
8820     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8821
8822     PERL_ARGS_ASSERT_SCAN_IDENT;
8823
8824     if (isSPACE(*s))
8825         s = PEEKSPACE(s);
8826     if (isDIGIT(*s)) {
8827         while (isDIGIT(*s)) {
8828             if (d >= e)
8829                 Perl_croak(aTHX_ ident_too_long);
8830             *d++ = *s++;
8831         }
8832     }
8833     else {
8834         for (;;) {
8835             if (d >= e)
8836                 Perl_croak(aTHX_ ident_too_long);
8837             if (isALNUM(*s))    /* UTF handled below */
8838                 *d++ = *s++;
8839             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8840                 *d++ = ':';
8841                 *d++ = ':';
8842                 s++;
8843             }
8844             else if (*s == ':' && s[1] == ':') {
8845                 *d++ = *s++;
8846                 *d++ = *s++;
8847             }
8848             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8849                 char *t = s + UTF8SKIP(s);
8850                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8851                     t += UTF8SKIP(t);
8852                 if (d + (t - s) > e)
8853                     Perl_croak(aTHX_ ident_too_long);
8854                 Copy(s, d, t - s, char);
8855                 d += t - s;
8856                 s = t;
8857             }
8858             else
8859                 break;
8860         }
8861     }
8862     *d = '\0';
8863     d = dest;
8864     if (*d) {
8865         if (PL_lex_state != LEX_NORMAL)
8866             PL_lex_state = LEX_INTERPENDMAYBE;
8867         return s;
8868     }
8869     if (*s == '$' && s[1] &&
8870         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8871     {
8872         return s;
8873     }
8874     if (*s == '{') {
8875         bracket = s;
8876         s++;
8877     }
8878     else if (ck_uni)
8879         check_uni();
8880     if (s < send) {
8881         if (UTF) {
8882             const STRLEN skip = UTF8SKIP(s);
8883             STRLEN i;
8884             d[skip] = '\0';
8885             for ( i = 0; i < skip; i++ )
8886                 d[i] = *s++;
8887         }
8888         else {
8889             *d = *s++;
8890             d[1] = '\0';
8891         }
8892     }
8893     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8894         *d = toCTRL(*s);
8895         s++;
8896     }
8897     if (bracket) {
8898         if (isSPACE(s[-1])) {
8899             while (s < send) {
8900                 const char ch = *s++;
8901                 if (!SPACE_OR_TAB(ch)) {
8902                     *d = ch;
8903                     break;
8904                 }
8905             }
8906         }
8907         if (isIDFIRST_lazy_if(d,UTF)) {
8908             d += UTF8SKIP(d);
8909             if (UTF) {
8910                 char *end = s;
8911                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8912                     end += UTF8SKIP(end);
8913                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8914                         end += UTF8SKIP(end);
8915                 }
8916                 Copy(s, d, end - s, char);
8917                 d += end - s;
8918                 s = end;
8919             }
8920             else {
8921                 while ((isALNUM(*s) || *s == ':') && d < e)
8922                     *d++ = *s++;
8923                 if (d >= e)
8924                     Perl_croak(aTHX_ ident_too_long);
8925             }
8926             *d = '\0';
8927             while (s < send && SPACE_OR_TAB(*s))
8928                 s++;
8929             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8930                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8931                     const char * const brack =
8932                         (const char *)
8933                         ((*s == '[') ? "[...]" : "{...}");
8934    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8935                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8936                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8937                         funny, dest, brack, funny, dest, brack);
8938                 }
8939                 bracket++;
8940                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8941                 PL_lex_allbrackets++;
8942                 return s;
8943             }
8944         }
8945         /* Handle extended ${^Foo} variables
8946          * 1999-02-27 mjd-perl-patch@plover.com */
8947         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8948                  && isALNUM(*s))
8949         {
8950             d++;
8951             while (isALNUM(*s) && d < e) {
8952                 *d++ = *s++;
8953             }
8954             if (d >= e)
8955                 Perl_croak(aTHX_ ident_too_long);
8956             *d = '\0';
8957         }
8958         if (*s == '}') {
8959             s++;
8960             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8961                 PL_lex_state = LEX_INTERPEND;
8962                 PL_expect = XREF;
8963             }
8964             if (PL_lex_state == LEX_NORMAL) {
8965                 if (ckWARN(WARN_AMBIGUOUS) &&
8966                     (keyword(dest, d - dest, 0)
8967                      || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
8968                 {
8969                     SV *tmp = newSVpvn_flags( dest, d - dest,
8970                                             SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
8971                     if (funny == '#')
8972                         funny = '@';
8973                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8974                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
8975                         funny, tmp, funny, tmp);
8976                 }
8977             }
8978         }
8979         else {
8980             s = bracket;                /* let the parser handle it */
8981             *dest = '\0';
8982         }
8983     }
8984     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8985         PL_lex_state = LEX_INTERPEND;
8986     return s;
8987 }
8988
8989 static bool
8990 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8991
8992     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8993      * the parse starting at 's', based on the subset that are valid in this
8994      * context input to this routine in 'valid_flags'. Advances s.  Returns
8995      * TRUE if the input was a valid flag, so the next char may be as well;
8996      * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8997      * current regex.  This routine will set it to any charset modifier found.
8998      * The caller shouldn't change it.  This way, another charset modifier
8999      * encountered in the parse can be detected as an error, as we have decided
9000      * allow only one */
9001
9002     const char c = **s;
9003
9004     if (! strchr(valid_flags, c)) {
9005         if (isALNUM(c)) {
9006             goto deprecate;
9007         }
9008         return FALSE;
9009     }
9010
9011     switch (c) {
9012
9013         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9014         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9015         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9016         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9017         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9018         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9019         case LOCALE_PAT_MOD:
9020
9021             /* In 5.14, qr//lt is legal but deprecated; the 't' means they
9022              * can't be regex modifiers.
9023              * In 5.14, s///le is legal and ambiguous.  Try to disambiguate as
9024              * much as easily done.  s///lei, for example, has to mean regex
9025              * modifiers if it's not an error (as does any word character
9026              * following the 'e').  Otherwise, we resolve to the backwards-
9027              * compatible, but less likely 's/// le ...', i.e. as meaning
9028              * less-than-or-equal.  The reason it's not likely is that s//
9029              * returns a number for code in the field (/r returns a string, but
9030              * that wasn't added until the 5.13 series), and so '<=' should be
9031              * used for comparing, not 'le'. */
9032             if (*((*s) + 1) == 't') {
9033                 goto deprecate;
9034             }
9035             else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
9036
9037                 /* 'e' is valid only for substitutes, s///e.  If it is not
9038                  * valid in the current context, then 'm//le' must mean the
9039                  * comparison operator, so use the regular deprecation message.
9040                  */
9041                 if (! strchr(valid_flags, 'e')) {
9042                     goto deprecate;
9043                 }
9044                 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
9045                     "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'.  In Perl 5.18, it will be resolved the other way");
9046                 return FALSE;
9047             }
9048             if (*charset) {
9049                 goto multiple_charsets;
9050             }
9051             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9052             *charset = c;
9053             break;
9054         case UNICODE_PAT_MOD:
9055             /* In 5.14, qr//unless and qr//until are legal but deprecated; the
9056              * 'n' means they can't be regex modifiers */
9057             if (*((*s) + 1) == 'n') {
9058                 goto deprecate;
9059             }
9060             if (*charset) {
9061                 goto multiple_charsets;
9062             }
9063             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9064             *charset = c;
9065             break;
9066         case ASCII_RESTRICT_PAT_MOD:
9067             /* In 5.14, qr//and is legal but deprecated; the 'n' means they
9068              * can't be regex modifiers */
9069             if (*((*s) + 1) == 'n') {
9070                 goto deprecate;
9071             }
9072
9073             if (! *charset) {
9074                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9075             }
9076             else {
9077
9078                 /* Error if previous modifier wasn't an 'a', but if it was, see
9079                  * if, and accept, a second occurrence (only) */
9080                 if (*charset != 'a'
9081                     || get_regex_charset(*pmfl)
9082                         != REGEX_ASCII_RESTRICTED_CHARSET)
9083                 {
9084                         goto multiple_charsets;
9085                 }
9086                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9087             }
9088             *charset = c;
9089             break;
9090         case DEPENDS_PAT_MOD:
9091             if (*charset) {
9092                 goto multiple_charsets;
9093             }
9094             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9095             *charset = c;
9096             break;
9097     }
9098
9099     (*s)++;
9100     return TRUE;
9101
9102     deprecate:
9103         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
9104             "Having no space between pattern and following word is deprecated");
9105         return FALSE;
9106
9107     multiple_charsets:
9108         if (*charset != c) {
9109             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9110         }
9111         else if (c == 'a') {
9112             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9113         }
9114         else {
9115             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9116         }
9117
9118         /* Pretend that it worked, so will continue processing before dieing */
9119         (*s)++;
9120         return TRUE;
9121 }
9122
9123 STATIC char *
9124 S_scan_pat(pTHX_ char *start, I32 type)
9125 {
9126     dVAR;
9127     PMOP *pm;
9128     char *s = scan_str(start,!!PL_madskills,FALSE);
9129     const char * const valid_flags =
9130         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9131     char charset = '\0';    /* character set modifier */
9132 #ifdef PERL_MAD
9133     char *modstart;
9134 #endif
9135
9136     PERL_ARGS_ASSERT_SCAN_PAT;
9137
9138     if (!s) {
9139         const char * const delimiter = skipspace(start);
9140         Perl_croak(aTHX_
9141                    (const char *)
9142                    (*delimiter == '?'
9143                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9144                     : "Search pattern not terminated" ));
9145     }
9146
9147     pm = (PMOP*)newPMOP(type, 0);
9148     if (PL_multi_open == '?') {
9149         /* This is the only point in the code that sets PMf_ONCE:  */
9150         pm->op_pmflags |= PMf_ONCE;
9151
9152         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9153            allows us to restrict the list needed by reset to just the ??
9154            matches.  */
9155         assert(type != OP_TRANS);
9156         if (PL_curstash) {
9157             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9158             U32 elements;
9159             if (!mg) {
9160                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9161                                  0);
9162             }
9163             elements = mg->mg_len / sizeof(PMOP**);
9164             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9165             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9166             mg->mg_len = elements * sizeof(PMOP**);
9167             PmopSTASH_set(pm,PL_curstash);
9168         }
9169     }
9170 #ifdef PERL_MAD
9171     modstart = s;
9172 #endif
9173     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9174 #ifdef PERL_MAD
9175     if (PL_madskills && modstart != s) {
9176         SV* tmptoken = newSVpvn(modstart, s - modstart);
9177         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9178     }
9179 #endif
9180     /* issue a warning if /c is specified,but /g is not */
9181     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9182     {
9183         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9184                        "Use of /c modifier is meaningless without /g" );
9185     }
9186
9187     PL_lex_op = (OP*)pm;
9188     pl_yylval.ival = OP_MATCH;
9189     return s;
9190 }
9191
9192 STATIC char *
9193 S_scan_subst(pTHX_ char *start)
9194 {
9195     dVAR;
9196     char *s;
9197     register PMOP *pm;
9198     I32 first_start;
9199     I32 es = 0;
9200     char charset = '\0';    /* character set modifier */
9201 #ifdef PERL_MAD
9202     char *modstart;
9203 #endif
9204
9205     PERL_ARGS_ASSERT_SCAN_SUBST;
9206
9207     pl_yylval.ival = OP_NULL;
9208
9209     s = scan_str(start,!!PL_madskills,FALSE);
9210
9211     if (!s)
9212         Perl_croak(aTHX_ "Substitution pattern not terminated");
9213
9214     if (s[-1] == PL_multi_open)
9215         s--;
9216 #ifdef PERL_MAD
9217     if (PL_madskills) {
9218         CURMAD('q', PL_thisopen);
9219         CURMAD('_', PL_thiswhite);
9220         CURMAD('E', PL_thisstuff);
9221         CURMAD('Q', PL_thisclose);
9222         PL_realtokenstart = s - SvPVX(PL_linestr);
9223     }
9224 #endif
9225
9226     first_start = PL_multi_start;
9227     s = scan_str(s,!!PL_madskills,FALSE);
9228     if (!s) {
9229         if (PL_lex_stuff) {
9230             SvREFCNT_dec(PL_lex_stuff);
9231             PL_lex_stuff = NULL;
9232         }
9233         Perl_croak(aTHX_ "Substitution replacement not terminated");
9234     }
9235     PL_multi_start = first_start;       /* so whole substitution is taken together */
9236
9237     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9238
9239 #ifdef PERL_MAD
9240     if (PL_madskills) {
9241         CURMAD('z', PL_thisopen);
9242         CURMAD('R', PL_thisstuff);
9243         CURMAD('Z', PL_thisclose);
9244     }
9245     modstart = s;
9246 #endif
9247
9248     while (*s) {
9249         if (*s == EXEC_PAT_MOD) {
9250             s++;
9251             es++;
9252         }
9253         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9254         {
9255             break;
9256         }
9257     }
9258
9259 #ifdef PERL_MAD
9260     if (PL_madskills) {
9261         if (modstart != s)
9262             curmad('m', newSVpvn(modstart, s - modstart));
9263         append_madprops(PL_thismad, (OP*)pm, 0);
9264         PL_thismad = 0;
9265     }
9266 #endif
9267     if ((pm->op_pmflags & PMf_CONTINUE)) {
9268         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9269     }
9270
9271     if (es) {
9272         SV * const repl = newSVpvs("");
9273
9274         PL_sublex_info.super_bufptr = s;
9275         PL_sublex_info.super_bufend = PL_bufend;
9276         PL_multi_end = 0;
9277         pm->op_pmflags |= PMf_EVAL;
9278         while (es-- > 0) {
9279             if (es)
9280                 sv_catpvs(repl, "eval ");
9281             else
9282                 sv_catpvs(repl, "do ");
9283         }
9284         sv_catpvs(repl, "{");
9285         sv_catsv(repl, PL_lex_repl);
9286         if (strchr(SvPVX(PL_lex_repl), '#'))
9287             sv_catpvs(repl, "\n");
9288         sv_catpvs(repl, "}");
9289         SvEVALED_on(repl);
9290         SvREFCNT_dec(PL_lex_repl);
9291         PL_lex_repl = repl;
9292     }
9293
9294     PL_lex_op = (OP*)pm;
9295     pl_yylval.ival = OP_SUBST;
9296     return s;
9297 }
9298
9299 STATIC char *
9300 S_scan_trans(pTHX_ char *start)
9301 {
9302     dVAR;
9303     register char* s;
9304     OP *o;
9305     U8 squash;
9306     U8 del;
9307     U8 complement;
9308     bool nondestruct = 0;
9309 #ifdef PERL_MAD
9310     char *modstart;
9311 #endif
9312
9313     PERL_ARGS_ASSERT_SCAN_TRANS;
9314
9315     pl_yylval.ival = OP_NULL;
9316
9317     s = scan_str(start,!!PL_madskills,FALSE);
9318     if (!s)
9319         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9320
9321     if (s[-1] == PL_multi_open)
9322         s--;
9323 #ifdef PERL_MAD
9324     if (PL_madskills) {
9325         CURMAD('q', PL_thisopen);
9326         CURMAD('_', PL_thiswhite);
9327         CURMAD('E', PL_thisstuff);
9328         CURMAD('Q', PL_thisclose);
9329         PL_realtokenstart = s - SvPVX(PL_linestr);
9330     }
9331 #endif
9332
9333     s = scan_str(s,!!PL_madskills,FALSE);
9334     if (!s) {
9335         if (PL_lex_stuff) {
9336             SvREFCNT_dec(PL_lex_stuff);
9337             PL_lex_stuff = NULL;
9338         }
9339         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9340     }
9341     if (PL_madskills) {
9342         CURMAD('z', PL_thisopen);
9343         CURMAD('R', PL_thisstuff);
9344         CURMAD('Z', PL_thisclose);
9345     }
9346
9347     complement = del = squash = 0;
9348 #ifdef PERL_MAD
9349     modstart = s;
9350 #endif
9351     while (1) {
9352         switch (*s) {
9353         case 'c':
9354             complement = OPpTRANS_COMPLEMENT;
9355             break;
9356         case 'd':
9357             del = OPpTRANS_DELETE;
9358             break;
9359         case 's':
9360             squash = OPpTRANS_SQUASH;
9361             break;
9362         case 'r':
9363             nondestruct = 1;
9364             break;
9365         default:
9366             goto no_more;
9367         }
9368         s++;
9369     }
9370   no_more:
9371
9372     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9373     o->op_private &= ~OPpTRANS_ALL;
9374     o->op_private |= del|squash|complement|
9375       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9376       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
9377
9378     PL_lex_op = o;
9379     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9380
9381 #ifdef PERL_MAD
9382     if (PL_madskills) {
9383         if (modstart != s)
9384             curmad('m', newSVpvn(modstart, s - modstart));
9385         append_madprops(PL_thismad, o, 0);
9386         PL_thismad = 0;
9387     }
9388 #endif
9389
9390     return s;
9391 }
9392
9393 STATIC char *
9394 S_scan_heredoc(pTHX_ register char *s)
9395 {
9396     dVAR;
9397     SV *herewas;
9398     I32 op_type = OP_SCALAR;
9399     I32 len;
9400     SV *tmpstr;
9401     char term;
9402     const char *found_newline;
9403     register char *d;
9404     register char *e;
9405     char *peek;
9406     const int outer = (PL_rsfp || PL_parser->filtered)
9407                    && !(PL_lex_inwhat == OP_SCALAR);
9408 #ifdef PERL_MAD
9409     I32 stuffstart = s - SvPVX(PL_linestr);
9410     char *tstart;
9411
9412     PL_realtokenstart = -1;
9413 #endif
9414
9415     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9416
9417     s += 2;
9418     d = PL_tokenbuf;
9419     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9420     if (!outer)
9421         *d++ = '\n';
9422     peek = s;
9423     while (SPACE_OR_TAB(*peek))
9424         peek++;
9425     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9426         s = peek;
9427         term = *s++;
9428         s = delimcpy(d, e, s, PL_bufend, term, &len);
9429         d += len;
9430         if (s < PL_bufend)
9431             s++;
9432     }
9433     else {
9434         if (*s == '\\')
9435             s++, term = '\'';
9436         else
9437             term = '"';
9438         if (!isALNUM_lazy_if(s,UTF))
9439             deprecate("bare << to mean <<\"\"");
9440         for (; isALNUM_lazy_if(s,UTF); s++) {
9441             if (d < e)
9442                 *d++ = *s;
9443         }
9444     }
9445     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9446         Perl_croak(aTHX_ "Delimiter for here document is too long");
9447     *d++ = '\n';
9448     *d = '\0';
9449     len = d - PL_tokenbuf;
9450
9451 #ifdef PERL_MAD
9452     if (PL_madskills) {
9453         tstart = PL_tokenbuf + !outer;
9454         PL_thisclose = newSVpvn(tstart, len - !outer);
9455         tstart = SvPVX(PL_linestr) + stuffstart;
9456         PL_thisopen = newSVpvn(tstart, s - tstart);
9457         stuffstart = s - SvPVX(PL_linestr);
9458     }
9459 #endif
9460 #ifndef PERL_STRICT_CR
9461     d = strchr(s, '\r');
9462     if (d) {
9463         char * const olds = s;
9464         s = d;
9465         while (s < PL_bufend) {
9466             if (*s == '\r') {
9467                 *d++ = '\n';
9468                 if (*++s == '\n')
9469                     s++;
9470             }
9471             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9472                 *d++ = *s++;
9473                 s++;
9474             }
9475             else
9476                 *d++ = *s++;
9477         }
9478         *d = '\0';
9479         PL_bufend = d;
9480         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9481         s = olds;
9482     }
9483 #endif
9484 #ifdef PERL_MAD
9485     found_newline = 0;
9486 #endif
9487     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9488         herewas = newSVpvn(s,PL_bufend-s);
9489     }
9490     else {
9491 #ifdef PERL_MAD
9492         herewas = newSVpvn(s-1,found_newline-s+1);
9493 #else
9494         s--;
9495         herewas = newSVpvn(s,found_newline-s);
9496 #endif
9497     }
9498 #ifdef PERL_MAD
9499     if (PL_madskills) {
9500         tstart = SvPVX(PL_linestr) + stuffstart;
9501         if (PL_thisstuff)
9502             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9503         else
9504             PL_thisstuff = newSVpvn(tstart, s - tstart);
9505     }
9506 #endif
9507     s += SvCUR(herewas);
9508
9509 #ifdef PERL_MAD
9510     stuffstart = s - SvPVX(PL_linestr);
9511
9512     if (found_newline)
9513         s--;
9514 #endif
9515
9516     tmpstr = newSV_type(SVt_PVIV);
9517     SvGROW(tmpstr, 80);
9518     if (term == '\'') {
9519         op_type = OP_CONST;
9520         SvIV_set(tmpstr, -1);
9521     }
9522     else if (term == '`') {
9523         op_type = OP_BACKTICK;
9524         SvIV_set(tmpstr, '\\');
9525     }
9526
9527     CLINE;
9528     PL_multi_start = CopLINE(PL_curcop);
9529     PL_multi_open = PL_multi_close = '<';
9530     term = *PL_tokenbuf;
9531     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp
9532      && !PL_parser->filtered) {
9533         char * const bufptr = PL_sublex_info.super_bufptr;
9534         char * const bufend = PL_sublex_info.super_bufend;
9535         char * const olds = s - SvCUR(herewas);
9536         s = strchr(bufptr, '\n');
9537         if (!s)
9538             s = bufend;
9539         d = s;
9540         while (s < bufend &&
9541           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9542             if (*s++ == '\n')
9543                 CopLINE_inc(PL_curcop);
9544         }
9545         if (s >= bufend) {
9546             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9547             missingterm(PL_tokenbuf);
9548         }
9549         sv_setpvn(herewas,bufptr,d-bufptr+1);
9550         sv_setpvn(tmpstr,d+1,s-d);
9551         s += len - 1;
9552         sv_catpvn(herewas,s,bufend-s);
9553         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9554
9555         s = olds;
9556         goto retval;
9557     }
9558     else if (!outer) {
9559         d = s;
9560         while (s < PL_bufend &&
9561           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9562             if (*s++ == '\n')
9563                 CopLINE_inc(PL_curcop);
9564         }
9565         if (s >= PL_bufend) {
9566             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9567             missingterm(PL_tokenbuf);
9568         }
9569         sv_setpvn(tmpstr,d+1,s-d);
9570 #ifdef PERL_MAD
9571         if (PL_madskills) {
9572             if (PL_thisstuff)
9573                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9574             else
9575                 PL_thisstuff = newSVpvn(d + 1, s - d);
9576             stuffstart = s - SvPVX(PL_linestr);
9577         }
9578 #endif
9579         s += len - 1;
9580         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9581
9582         sv_catpvn(herewas,s,PL_bufend-s);
9583         sv_setsv(PL_linestr,herewas);
9584         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9585         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9586         PL_last_lop = PL_last_uni = NULL;
9587     }
9588     else
9589         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9590     while (s >= PL_bufend) {    /* multiple line string? */
9591 #ifdef PERL_MAD
9592         if (PL_madskills) {
9593             tstart = SvPVX(PL_linestr) + stuffstart;
9594             if (PL_thisstuff)
9595                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9596             else
9597                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9598         }
9599 #endif
9600         PL_bufptr = s;
9601         CopLINE_inc(PL_curcop);
9602         if (!outer || !lex_next_chunk(0)) {
9603             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9604             missingterm(PL_tokenbuf);
9605         }
9606         CopLINE_dec(PL_curcop);
9607         s = PL_bufptr;
9608 #ifdef PERL_MAD
9609         stuffstart = s - SvPVX(PL_linestr);
9610 #endif
9611         CopLINE_inc(PL_curcop);
9612         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9613         PL_last_lop = PL_last_uni = NULL;
9614 #ifndef PERL_STRICT_CR
9615         if (PL_bufend - PL_linestart >= 2) {
9616             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9617                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9618             {
9619                 PL_bufend[-2] = '\n';
9620                 PL_bufend--;
9621                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9622             }
9623             else if (PL_bufend[-1] == '\r')
9624                 PL_bufend[-1] = '\n';
9625         }
9626         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9627             PL_bufend[-1] = '\n';
9628 #endif
9629         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9630             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9631             *(SvPVX(PL_linestr) + off ) = ' ';
9632             lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
9633             sv_catsv(PL_linestr,herewas);
9634             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9635             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9636         }
9637         else {
9638             s = PL_bufend;
9639             sv_catsv(tmpstr,PL_linestr);
9640         }
9641     }
9642     s++;
9643 retval:
9644     PL_multi_end = CopLINE(PL_curcop);
9645     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9646         SvPV_shrink_to_cur(tmpstr);
9647     }
9648     SvREFCNT_dec(herewas);
9649     if (!IN_BYTES) {
9650         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9651             SvUTF8_on(tmpstr);
9652         else if (PL_encoding)
9653             sv_recode_to_utf8(tmpstr, PL_encoding);
9654     }
9655     PL_lex_stuff = tmpstr;
9656     pl_yylval.ival = op_type;
9657     return s;
9658 }
9659
9660 /* scan_inputsymbol
9661    takes: current position in input buffer
9662    returns: new position in input buffer
9663    side-effects: pl_yylval and lex_op are set.
9664
9665    This code handles:
9666
9667    <>           read from ARGV
9668    <FH>         read from filehandle
9669    <pkg::FH>    read from package qualified filehandle
9670    <pkg'FH>     read from package qualified filehandle
9671    <$fh>        read from filehandle in $fh
9672    <*.h>        filename glob
9673
9674 */
9675
9676 STATIC char *
9677 S_scan_inputsymbol(pTHX_ char *start)
9678 {
9679     dVAR;
9680     register char *s = start;           /* current position in buffer */
9681     char *end;
9682     I32 len;
9683     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9684     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9685
9686     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9687
9688     end = strchr(s, '\n');
9689     if (!end)
9690         end = PL_bufend;
9691     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9692
9693     /* die if we didn't have space for the contents of the <>,
9694        or if it didn't end, or if we see a newline
9695     */
9696
9697     if (len >= (I32)sizeof PL_tokenbuf)
9698         Perl_croak(aTHX_ "Excessively long <> operator");
9699     if (s >= end)
9700         Perl_croak(aTHX_ "Unterminated <> operator");
9701
9702     s++;
9703
9704     /* check for <$fh>
9705        Remember, only scalar variables are interpreted as filehandles by
9706        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9707        treated as a glob() call.
9708        This code makes use of the fact that except for the $ at the front,
9709        a scalar variable and a filehandle look the same.
9710     */
9711     if (*d == '$' && d[1]) d++;
9712
9713     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9714     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9715         d += UTF ? UTF8SKIP(d) : 1;
9716
9717     /* If we've tried to read what we allow filehandles to look like, and
9718        there's still text left, then it must be a glob() and not a getline.
9719        Use scan_str to pull out the stuff between the <> and treat it
9720        as nothing more than a string.
9721     */
9722
9723     if (d - PL_tokenbuf != len) {
9724         pl_yylval.ival = OP_GLOB;
9725         s = scan_str(start,!!PL_madskills,FALSE);
9726         if (!s)
9727            Perl_croak(aTHX_ "Glob not terminated");
9728         return s;
9729     }
9730     else {
9731         bool readline_overriden = FALSE;
9732         GV *gv_readline;
9733         GV **gvp;
9734         /* we're in a filehandle read situation */
9735         d = PL_tokenbuf;
9736
9737         /* turn <> into <ARGV> */
9738         if (!len)
9739             Copy("ARGV",d,5,char);
9740
9741         /* Check whether readline() is overriden */
9742         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9743         if ((gv_readline
9744                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9745                 ||
9746                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9747                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9748                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9749             readline_overriden = TRUE;
9750
9751         /* if <$fh>, create the ops to turn the variable into a
9752            filehandle
9753         */
9754         if (*d == '$') {
9755             /* try to find it in the pad for this block, otherwise find
9756                add symbol table ops
9757             */
9758             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9759             if (tmp != NOT_IN_PAD) {
9760                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9761                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9762                     HEK * const stashname = HvNAME_HEK(stash);
9763                     SV * const sym = sv_2mortal(newSVhek(stashname));
9764                     sv_catpvs(sym, "::");
9765                     sv_catpv(sym, d+1);
9766                     d = SvPVX(sym);
9767                     goto intro_sym;
9768                 }
9769                 else {
9770                     OP * const o = newOP(OP_PADSV, 0);
9771                     o->op_targ = tmp;
9772                     PL_lex_op = readline_overriden
9773                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9774                                 op_append_elem(OP_LIST, o,
9775                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9776                         : (OP*)newUNOP(OP_READLINE, 0, o);
9777                 }
9778             }
9779             else {
9780                 GV *gv;
9781                 ++d;
9782 intro_sym:
9783                 gv = gv_fetchpv(d,
9784                                 (PL_in_eval
9785                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9786                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
9787                                 SVt_PV);
9788                 PL_lex_op = readline_overriden
9789                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9790                             op_append_elem(OP_LIST,
9791                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9792                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9793                     : (OP*)newUNOP(OP_READLINE, 0,
9794                             newUNOP(OP_RV2SV, 0,
9795                                 newGVOP(OP_GV, 0, gv)));
9796             }
9797             if (!readline_overriden)
9798                 PL_lex_op->op_flags |= OPf_SPECIAL;
9799             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9800             pl_yylval.ival = OP_NULL;
9801         }
9802
9803         /* If it's none of the above, it must be a literal filehandle
9804            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9805         else {
9806             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9807             PL_lex_op = readline_overriden
9808                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9809                         op_append_elem(OP_LIST,
9810                             newGVOP(OP_GV, 0, gv),
9811                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9812                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9813             pl_yylval.ival = OP_NULL;
9814         }
9815     }
9816
9817     return s;
9818 }
9819
9820
9821 /* scan_str
9822    takes: start position in buffer
9823           keep_quoted preserve \ on the embedded delimiter(s)
9824           keep_delims preserve the delimiters around the string
9825    returns: position to continue reading from buffer
9826    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9827         updates the read buffer.
9828
9829    This subroutine pulls a string out of the input.  It is called for:
9830         q               single quotes           q(literal text)
9831         '               single quotes           'literal text'
9832         qq              double quotes           qq(interpolate $here please)
9833         "               double quotes           "interpolate $here please"
9834         qx              backticks               qx(/bin/ls -l)
9835         `               backticks               `/bin/ls -l`
9836         qw              quote words             @EXPORT_OK = qw( func() $spam )
9837         m//             regexp match            m/this/
9838         s///            regexp substitute       s/this/that/
9839         tr///           string transliterate    tr/this/that/
9840         y///            string transliterate    y/this/that/
9841         ($*@)           sub prototypes          sub foo ($)
9842         (stuff)         sub attr parameters     sub foo : attr(stuff)
9843         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9844
9845    In most of these cases (all but <>, patterns and transliterate)
9846    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9847    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9848    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9849    calls scan_str().
9850
9851    It skips whitespace before the string starts, and treats the first
9852    character as the delimiter.  If the delimiter is one of ([{< then
9853    the corresponding "close" character )]}> is used as the closing
9854    delimiter.  It allows quoting of delimiters, and if the string has
9855    balanced delimiters ([{<>}]) it allows nesting.
9856
9857    On success, the SV with the resulting string is put into lex_stuff or,
9858    if that is already non-NULL, into lex_repl. The second case occurs only
9859    when parsing the RHS of the special constructs s/// and tr/// (y///).
9860    For convenience, the terminating delimiter character is stuffed into
9861    SvIVX of the SV.
9862 */
9863
9864 STATIC char *
9865 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9866 {
9867     dVAR;
9868     SV *sv;                             /* scalar value: string */
9869     const char *tmps;                   /* temp string, used for delimiter matching */
9870     register char *s = start;           /* current position in the buffer */
9871     register char term;                 /* terminating character */
9872     register char *to;                  /* current position in the sv's data */
9873     I32 brackets = 1;                   /* bracket nesting level */
9874     bool has_utf8 = FALSE;              /* is there any utf8 content? */
9875     I32 termcode;                       /* terminating char. code */
9876     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
9877     STRLEN termlen;                     /* length of terminating string */
9878     int last_off = 0;                   /* last position for nesting bracket */
9879 #ifdef PERL_MAD
9880     int stuffstart;
9881     char *tstart;
9882 #endif
9883
9884     PERL_ARGS_ASSERT_SCAN_STR;
9885
9886     /* skip space before the delimiter */
9887     if (isSPACE(*s)) {
9888         s = PEEKSPACE(s);
9889     }
9890
9891 #ifdef PERL_MAD
9892     if (PL_realtokenstart >= 0) {
9893         stuffstart = PL_realtokenstart;
9894         PL_realtokenstart = -1;
9895     }
9896     else
9897         stuffstart = start - SvPVX(PL_linestr);
9898 #endif
9899     /* mark where we are, in case we need to report errors */
9900     CLINE;
9901
9902     /* after skipping whitespace, the next character is the terminator */
9903     term = *s;
9904     if (!UTF) {
9905         termcode = termstr[0] = term;
9906         termlen = 1;
9907     }
9908     else {
9909         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9910         Copy(s, termstr, termlen, U8);
9911         if (!UTF8_IS_INVARIANT(term))
9912             has_utf8 = TRUE;
9913     }
9914
9915     /* mark where we are */
9916     PL_multi_start = CopLINE(PL_curcop);
9917     PL_multi_open = term;
9918
9919     /* find corresponding closing delimiter */
9920     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9921         termcode = termstr[0] = term = tmps[5];
9922
9923     PL_multi_close = term;
9924
9925     /* create a new SV to hold the contents.  79 is the SV's initial length.
9926        What a random number. */
9927     sv = newSV_type(SVt_PVIV);
9928     SvGROW(sv, 80);
9929     SvIV_set(sv, termcode);
9930     (void)SvPOK_only(sv);               /* validate pointer */
9931
9932     /* move past delimiter and try to read a complete string */
9933     if (keep_delims)
9934         sv_catpvn(sv, s, termlen);
9935     s += termlen;
9936 #ifdef PERL_MAD
9937     tstart = SvPVX(PL_linestr) + stuffstart;
9938     if (!PL_thisopen && !keep_delims) {
9939         PL_thisopen = newSVpvn(tstart, s - tstart);
9940         stuffstart = s - SvPVX(PL_linestr);
9941     }
9942 #endif
9943     for (;;) {
9944         if (PL_encoding && !UTF) {
9945             bool cont = TRUE;
9946
9947             while (cont) {
9948                 int offset = s - SvPVX_const(PL_linestr);
9949                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9950                                            &offset, (char*)termstr, termlen);
9951                 const char * const ns = SvPVX_const(PL_linestr) + offset;
9952                 char * const svlast = SvEND(sv) - 1;
9953
9954                 for (; s < ns; s++) {
9955                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9956                         CopLINE_inc(PL_curcop);
9957                 }
9958                 if (!found)
9959                     goto read_more_line;
9960                 else {
9961                     /* handle quoted delimiters */
9962                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9963                         const char *t;
9964                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9965                             t--;
9966                         if ((svlast-1 - t) % 2) {
9967                             if (!keep_quoted) {
9968                                 *(svlast-1) = term;
9969                                 *svlast = '\0';
9970                                 SvCUR_set(sv, SvCUR(sv) - 1);
9971                             }
9972                             continue;
9973                         }
9974                     }
9975                     if (PL_multi_open == PL_multi_close) {
9976                         cont = FALSE;
9977                     }
9978                     else {
9979                         const char *t;
9980                         char *w;
9981                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9982                             /* At here, all closes are "was quoted" one,
9983                                so we don't check PL_multi_close. */
9984                             if (*t == '\\') {
9985                                 if (!keep_quoted && *(t+1) == PL_multi_open)
9986                                     t++;
9987                                 else
9988                                     *w++ = *t++;
9989                             }
9990                             else if (*t == PL_multi_open)
9991                                 brackets++;
9992
9993                             *w = *t;
9994                         }
9995                         if (w < t) {
9996                             *w++ = term;
9997                             *w = '\0';
9998                             SvCUR_set(sv, w - SvPVX_const(sv));
9999                         }
10000                         last_off = w - SvPVX(sv);
10001                         if (--brackets <= 0)
10002                             cont = FALSE;
10003                     }
10004                 }
10005             }
10006             if (!keep_delims) {
10007                 SvCUR_set(sv, SvCUR(sv) - 1);
10008                 *SvEND(sv) = '\0';
10009             }
10010             break;
10011         }
10012
10013         /* extend sv if need be */
10014         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10015         /* set 'to' to the next character in the sv's string */
10016         to = SvPVX(sv)+SvCUR(sv);
10017
10018         /* if open delimiter is the close delimiter read unbridle */
10019         if (PL_multi_open == PL_multi_close) {
10020             for (; s < PL_bufend; s++,to++) {
10021                 /* embedded newlines increment the current line number */
10022                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10023                     CopLINE_inc(PL_curcop);
10024                 /* handle quoted delimiters */
10025                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10026                     if (!keep_quoted && s[1] == term)
10027                         s++;
10028                 /* any other quotes are simply copied straight through */
10029                     else
10030                         *to++ = *s++;
10031                 }
10032                 /* terminate when run out of buffer (the for() condition), or
10033                    have found the terminator */
10034                 else if (*s == term) {
10035                     if (termlen == 1)
10036                         break;
10037                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10038                         break;
10039                 }
10040                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10041                     has_utf8 = TRUE;
10042                 *to = *s;
10043             }
10044         }
10045
10046         /* if the terminator isn't the same as the start character (e.g.,
10047            matched brackets), we have to allow more in the quoting, and
10048            be prepared for nested brackets.
10049         */
10050         else {
10051             /* read until we run out of string, or we find the terminator */
10052             for (; s < PL_bufend; s++,to++) {
10053                 /* embedded newlines increment the line count */
10054                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10055                     CopLINE_inc(PL_curcop);
10056                 /* backslashes can escape the open or closing characters */
10057                 if (*s == '\\' && s+1 < PL_bufend) {
10058                     if (!keep_quoted &&
10059                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10060                         s++;
10061                     else
10062                         *to++ = *s++;
10063                 }
10064                 /* allow nested opens and closes */
10065                 else if (*s == PL_multi_close && --brackets <= 0)
10066                     break;
10067                 else if (*s == PL_multi_open)
10068                     brackets++;
10069                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10070                     has_utf8 = TRUE;
10071                 *to = *s;
10072             }
10073         }
10074         /* terminate the copied string and update the sv's end-of-string */
10075         *to = '\0';
10076         SvCUR_set(sv, to - SvPVX_const(sv));
10077
10078         /*
10079          * this next chunk reads more into the buffer if we're not done yet
10080          */
10081
10082         if (s < PL_bufend)
10083             break;              /* handle case where we are done yet :-) */
10084
10085 #ifndef PERL_STRICT_CR
10086         if (to - SvPVX_const(sv) >= 2) {
10087             if ((to[-2] == '\r' && to[-1] == '\n') ||
10088                 (to[-2] == '\n' && to[-1] == '\r'))
10089             {
10090                 to[-2] = '\n';
10091                 to--;
10092                 SvCUR_set(sv, to - SvPVX_const(sv));
10093             }
10094             else if (to[-1] == '\r')
10095                 to[-1] = '\n';
10096         }
10097         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10098             to[-1] = '\n';
10099 #endif
10100
10101      read_more_line:
10102         /* if we're out of file, or a read fails, bail and reset the current
10103            line marker so we can report where the unterminated string began
10104         */
10105 #ifdef PERL_MAD
10106         if (PL_madskills) {
10107             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10108             if (PL_thisstuff)
10109                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10110             else
10111                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10112         }
10113 #endif
10114         CopLINE_inc(PL_curcop);
10115         PL_bufptr = PL_bufend;
10116         if (!lex_next_chunk(0)) {
10117             sv_free(sv);
10118             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10119             return NULL;
10120         }
10121         s = PL_bufptr;
10122 #ifdef PERL_MAD
10123         stuffstart = 0;
10124 #endif
10125     }
10126
10127     /* at this point, we have successfully read the delimited string */
10128
10129     if (!PL_encoding || UTF) {
10130 #ifdef PERL_MAD
10131         if (PL_madskills) {
10132             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10133             const int len = s - tstart;
10134             if (PL_thisstuff)
10135                 sv_catpvn(PL_thisstuff, tstart, len);
10136             else
10137                 PL_thisstuff = newSVpvn(tstart, len);
10138             if (!PL_thisclose && !keep_delims)
10139                 PL_thisclose = newSVpvn(s,termlen);
10140         }
10141 #endif
10142
10143         if (keep_delims)
10144             sv_catpvn(sv, s, termlen);
10145         s += termlen;
10146     }
10147 #ifdef PERL_MAD
10148     else {
10149         if (PL_madskills) {
10150             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10151             const int len = s - tstart - termlen;
10152             if (PL_thisstuff)
10153                 sv_catpvn(PL_thisstuff, tstart, len);
10154             else
10155                 PL_thisstuff = newSVpvn(tstart, len);
10156             if (!PL_thisclose && !keep_delims)
10157                 PL_thisclose = newSVpvn(s - termlen,termlen);
10158         }
10159     }
10160 #endif
10161     if (has_utf8 || PL_encoding)
10162         SvUTF8_on(sv);
10163
10164     PL_multi_end = CopLINE(PL_curcop);
10165
10166     /* if we allocated too much space, give some back */
10167     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10168         SvLEN_set(sv, SvCUR(sv) + 1);
10169         SvPV_renew(sv, SvLEN(sv));
10170     }
10171
10172     /* decide whether this is the first or second quoted string we've read
10173        for this op
10174     */
10175
10176     if (PL_lex_stuff)
10177         PL_lex_repl = sv;
10178     else
10179         PL_lex_stuff = sv;
10180     return s;
10181 }
10182
10183 /*
10184   scan_num
10185   takes: pointer to position in buffer
10186   returns: pointer to new position in buffer
10187   side-effects: builds ops for the constant in pl_yylval.op
10188
10189   Read a number in any of the formats that Perl accepts:
10190
10191   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10192   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10193   0b[01](_?[01])*
10194   0[0-7](_?[0-7])*
10195   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10196
10197   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10198   thing it reads.
10199
10200   If it reads a number without a decimal point or an exponent, it will
10201   try converting the number to an integer and see if it can do so
10202   without loss of precision.
10203 */
10204
10205 char *
10206 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10207 {
10208     dVAR;
10209     register const char *s = start;     /* current position in buffer */
10210     register char *d;                   /* destination in temp buffer */
10211     register char *e;                   /* end of temp buffer */
10212     NV nv;                              /* number read, as a double */
10213     SV *sv = NULL;                      /* place to put the converted number */
10214     bool floatit;                       /* boolean: int or float? */
10215     const char *lastub = NULL;          /* position of last underbar */
10216     static char const number_too_long[] = "Number too long";
10217
10218     PERL_ARGS_ASSERT_SCAN_NUM;
10219
10220     /* We use the first character to decide what type of number this is */
10221
10222     switch (*s) {
10223     default:
10224         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10225
10226     /* if it starts with a 0, it could be an octal number, a decimal in
10227        0.13 disguise, or a hexadecimal number, or a binary number. */
10228     case '0':
10229         {
10230           /* variables:
10231              u          holds the "number so far"
10232              shift      the power of 2 of the base
10233                         (hex == 4, octal == 3, binary == 1)
10234              overflowed was the number more than we can hold?
10235
10236              Shift is used when we add a digit.  It also serves as an "are
10237              we in octal/hex/binary?" indicator to disallow hex characters
10238              when in octal mode.
10239            */
10240             NV n = 0.0;
10241             UV u = 0;
10242             I32 shift;
10243             bool overflowed = FALSE;
10244             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10245             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10246             static const char* const bases[5] =
10247               { "", "binary", "", "octal", "hexadecimal" };
10248             static const char* const Bases[5] =
10249               { "", "Binary", "", "Octal", "Hexadecimal" };
10250             static const char* const maxima[5] =
10251               { "",
10252                 "0b11111111111111111111111111111111",
10253                 "",
10254                 "037777777777",
10255                 "0xffffffff" };
10256             const char *base, *Base, *max;
10257
10258             /* check for hex */
10259             if (s[1] == 'x' || s[1] == 'X') {
10260                 shift = 4;
10261                 s += 2;
10262                 just_zero = FALSE;
10263             } else if (s[1] == 'b' || s[1] == 'B') {
10264                 shift = 1;
10265                 s += 2;
10266                 just_zero = FALSE;
10267             }
10268             /* check for a decimal in disguise */
10269             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10270                 goto decimal;
10271             /* so it must be octal */
10272             else {
10273                 shift = 3;
10274                 s++;
10275             }
10276
10277             if (*s == '_') {
10278                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10279                                "Misplaced _ in number");
10280                lastub = s++;
10281             }
10282
10283             base = bases[shift];
10284             Base = Bases[shift];
10285             max  = maxima[shift];
10286
10287             /* read the rest of the number */
10288             for (;;) {
10289                 /* x is used in the overflow test,
10290                    b is the digit we're adding on. */
10291                 UV x, b;
10292
10293                 switch (*s) {
10294
10295                 /* if we don't mention it, we're done */
10296                 default:
10297                     goto out;
10298
10299                 /* _ are ignored -- but warned about if consecutive */
10300                 case '_':
10301                     if (lastub && s == lastub + 1)
10302                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10303                                        "Misplaced _ in number");
10304                     lastub = s++;
10305                     break;
10306
10307                 /* 8 and 9 are not octal */
10308                 case '8': case '9':
10309                     if (shift == 3)
10310                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10311                     /* FALL THROUGH */
10312
10313                 /* octal digits */
10314                 case '2': case '3': case '4':
10315                 case '5': case '6': case '7':
10316                     if (shift == 1)
10317                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10318                     /* FALL THROUGH */
10319
10320                 case '0': case '1':
10321                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10322                     goto digit;
10323
10324                 /* hex digits */
10325                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10326                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10327                     /* make sure they said 0x */
10328                     if (shift != 4)
10329                         goto out;
10330                     b = (*s++ & 7) + 9;
10331
10332                     /* Prepare to put the digit we have onto the end
10333                        of the number so far.  We check for overflows.
10334                     */
10335
10336                   digit:
10337                     just_zero = FALSE;
10338                     if (!overflowed) {
10339                         x = u << shift; /* make room for the digit */
10340
10341                         if ((x >> shift) != u
10342                             && !(PL_hints & HINT_NEW_BINARY)) {
10343                             overflowed = TRUE;
10344                             n = (NV) u;
10345                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10346                                              "Integer overflow in %s number",
10347                                              base);
10348                         } else
10349                             u = x | b;          /* add the digit to the end */
10350                     }
10351                     if (overflowed) {
10352                         n *= nvshift[shift];
10353                         /* If an NV has not enough bits in its
10354                          * mantissa to represent an UV this summing of
10355                          * small low-order numbers is a waste of time
10356                          * (because the NV cannot preserve the
10357                          * low-order bits anyway): we could just
10358                          * remember when did we overflow and in the
10359                          * end just multiply n by the right
10360                          * amount. */
10361                         n += (NV) b;
10362                     }
10363                     break;
10364                 }
10365             }
10366
10367           /* if we get here, we had success: make a scalar value from
10368              the number.
10369           */
10370           out:
10371
10372             /* final misplaced underbar check */
10373             if (s[-1] == '_') {
10374                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10375             }
10376
10377             if (overflowed) {
10378                 if (n > 4294967295.0)
10379                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10380                                    "%s number > %s non-portable",
10381                                    Base, max);
10382                 sv = newSVnv(n);
10383             }
10384             else {
10385 #if UVSIZE > 4
10386                 if (u > 0xffffffff)
10387                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10388                                    "%s number > %s non-portable",
10389                                    Base, max);
10390 #endif
10391                 sv = newSVuv(u);
10392             }
10393             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10394                 sv = new_constant(start, s - start, "integer",
10395                                   sv, NULL, NULL, 0);
10396             else if (PL_hints & HINT_NEW_BINARY)
10397                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10398         }
10399         break;
10400
10401     /*
10402       handle decimal numbers.
10403       we're also sent here when we read a 0 as the first digit
10404     */
10405     case '1': case '2': case '3': case '4': case '5':
10406     case '6': case '7': case '8': case '9': case '.':
10407       decimal:
10408         d = PL_tokenbuf;
10409         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10410         floatit = FALSE;
10411
10412         /* read next group of digits and _ and copy into d */
10413         while (isDIGIT(*s) || *s == '_') {
10414             /* skip underscores, checking for misplaced ones
10415                if -w is on
10416             */
10417             if (*s == '_') {
10418                 if (lastub && s == lastub + 1)
10419                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10420                                    "Misplaced _ in number");
10421                 lastub = s++;
10422             }
10423             else {
10424                 /* check for end of fixed-length buffer */
10425                 if (d >= e)
10426                     Perl_croak(aTHX_ number_too_long);
10427                 /* if we're ok, copy the character */
10428                 *d++ = *s++;
10429             }
10430         }
10431
10432         /* final misplaced underbar check */
10433         if (lastub && s == lastub + 1) {
10434             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10435         }
10436
10437         /* read a decimal portion if there is one.  avoid
10438            3..5 being interpreted as the number 3. followed
10439            by .5
10440         */
10441         if (*s == '.' && s[1] != '.') {
10442             floatit = TRUE;
10443             *d++ = *s++;
10444
10445             if (*s == '_') {
10446                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10447                                "Misplaced _ in number");
10448                 lastub = s;
10449             }
10450
10451             /* copy, ignoring underbars, until we run out of digits.
10452             */
10453             for (; isDIGIT(*s) || *s == '_'; s++) {
10454                 /* fixed length buffer check */
10455                 if (d >= e)
10456                     Perl_croak(aTHX_ number_too_long);
10457                 if (*s == '_') {
10458                    if (lastub && s == lastub + 1)
10459                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10460                                       "Misplaced _ in number");
10461                    lastub = s;
10462                 }
10463                 else
10464                     *d++ = *s;
10465             }
10466             /* fractional part ending in underbar? */
10467             if (s[-1] == '_') {
10468                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10469                                "Misplaced _ in number");
10470             }
10471             if (*s == '.' && isDIGIT(s[1])) {
10472                 /* oops, it's really a v-string, but without the "v" */
10473                 s = start;
10474                 goto vstring;
10475             }
10476         }
10477
10478         /* read exponent part, if present */
10479         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10480             floatit = TRUE;
10481             s++;
10482
10483             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10484             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10485
10486             /* stray preinitial _ */
10487             if (*s == '_') {
10488                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10489                                "Misplaced _ in number");
10490                 lastub = s++;
10491             }
10492
10493             /* allow positive or negative exponent */
10494             if (*s == '+' || *s == '-')
10495                 *d++ = *s++;
10496
10497             /* stray initial _ */
10498             if (*s == '_') {
10499                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10500                                "Misplaced _ in number");
10501                 lastub = s++;
10502             }
10503
10504             /* read digits of exponent */
10505             while (isDIGIT(*s) || *s == '_') {
10506                 if (isDIGIT(*s)) {
10507                     if (d >= e)
10508                         Perl_croak(aTHX_ number_too_long);
10509                     *d++ = *s++;
10510                 }
10511                 else {
10512                    if (((lastub && s == lastub + 1) ||
10513                         (!isDIGIT(s[1]) && s[1] != '_')))
10514                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10515                                       "Misplaced _ in number");
10516                    lastub = s++;
10517                 }
10518             }
10519         }
10520
10521
10522         /*
10523            We try to do an integer conversion first if no characters
10524            indicating "float" have been found.
10525          */
10526
10527         if (!floatit) {
10528             UV uv;
10529             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10530
10531             if (flags == IS_NUMBER_IN_UV) {
10532               if (uv <= IV_MAX)
10533                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10534               else
10535                 sv = newSVuv(uv);
10536             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10537               if (uv <= (UV) IV_MIN)
10538                 sv = newSViv(-(IV)uv);
10539               else
10540                 floatit = TRUE;
10541             } else
10542               floatit = TRUE;
10543         }
10544         if (floatit) {
10545             /* terminate the string */
10546             *d = '\0';
10547             nv = Atof(PL_tokenbuf);
10548             sv = newSVnv(nv);
10549         }
10550
10551         if ( floatit
10552              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10553             const char *const key = floatit ? "float" : "integer";
10554             const STRLEN keylen = floatit ? 5 : 7;
10555             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10556                                 key, keylen, sv, NULL, NULL, 0);
10557         }
10558         break;
10559
10560     /* if it starts with a v, it could be a v-string */
10561     case 'v':
10562 vstring:
10563                 sv = newSV(5); /* preallocate storage space */
10564                 s = scan_vstring(s, PL_bufend, sv);
10565         break;
10566     }
10567
10568     /* make the op for the constant and return */
10569
10570     if (sv)
10571         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10572     else
10573         lvalp->opval = NULL;
10574
10575     return (char *)s;
10576 }
10577
10578 STATIC char *
10579 S_scan_formline(pTHX_ register char *s)
10580 {
10581     dVAR;
10582     register char *eol;
10583     register char *t;
10584     SV * const stuff = newSVpvs("");
10585     bool needargs = FALSE;
10586     bool eofmt = FALSE;
10587 #ifdef PERL_MAD
10588     char *tokenstart = s;
10589     SV* savewhite = NULL;
10590
10591     if (PL_madskills) {
10592         savewhite = PL_thiswhite;
10593         PL_thiswhite = 0;
10594     }
10595 #endif
10596
10597     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10598
10599     while (!needargs) {
10600         if (*s == '.') {
10601             t = s+1;
10602 #ifdef PERL_STRICT_CR
10603             while (SPACE_OR_TAB(*t))
10604                 t++;
10605 #else
10606             while (SPACE_OR_TAB(*t) || *t == '\r')
10607                 t++;
10608 #endif
10609             if (*t == '\n' || t == PL_bufend) {
10610                 eofmt = TRUE;
10611                 break;
10612             }
10613         }
10614         if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
10615             eol = (char *) memchr(s,'\n',PL_bufend-s);
10616             if (!eol++)
10617                 eol = PL_bufend;
10618         }
10619         else
10620             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10621         if (*s != '#') {
10622             for (t = s; t < eol; t++) {
10623                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10624                     needargs = FALSE;
10625                     goto enough;        /* ~~ must be first line in formline */
10626                 }
10627                 if (*t == '@' || *t == '^')
10628                     needargs = TRUE;
10629             }
10630             if (eol > s) {
10631                 sv_catpvn(stuff, s, eol-s);
10632 #ifndef PERL_STRICT_CR
10633                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10634                     char *end = SvPVX(stuff) + SvCUR(stuff);
10635                     end[-2] = '\n';
10636                     end[-1] = '\0';
10637                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10638                 }
10639 #endif
10640             }
10641             else
10642               break;
10643         }
10644         s = (char*)eol;
10645         if (PL_rsfp || PL_parser->filtered) {
10646             bool got_some;
10647 #ifdef PERL_MAD
10648             if (PL_madskills) {
10649                 if (PL_thistoken)
10650                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10651                 else
10652                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10653             }
10654 #endif
10655             PL_bufptr = PL_bufend;
10656             CopLINE_inc(PL_curcop);
10657             got_some = lex_next_chunk(0);
10658             CopLINE_dec(PL_curcop);
10659             s = PL_bufptr;
10660 #ifdef PERL_MAD
10661             tokenstart = PL_bufptr;
10662 #endif
10663             if (!got_some)
10664                 break;
10665         }
10666         incline(s);
10667     }
10668   enough:
10669     if (SvCUR(stuff)) {
10670         PL_expect = XTERM;
10671         if (needargs) {
10672             PL_lex_state = LEX_NORMAL;
10673             start_force(PL_curforce);
10674             NEXTVAL_NEXTTOKE.ival = 0;
10675             force_next(',');
10676         }
10677         else
10678             PL_lex_state = LEX_FORMLINE;
10679         if (!IN_BYTES) {
10680             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10681                 SvUTF8_on(stuff);
10682             else if (PL_encoding)
10683                 sv_recode_to_utf8(stuff, PL_encoding);
10684         }
10685         start_force(PL_curforce);
10686         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10687         force_next(THING);
10688         start_force(PL_curforce);
10689         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10690         force_next(LSTOP);
10691     }
10692     else {
10693         SvREFCNT_dec(stuff);
10694         if (eofmt)
10695             PL_lex_formbrack = 0;
10696         PL_bufptr = s;
10697     }
10698 #ifdef PERL_MAD
10699     if (PL_madskills) {
10700         if (PL_thistoken)
10701             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10702         else
10703             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10704         PL_thiswhite = savewhite;
10705     }
10706 #endif
10707     return s;
10708 }
10709
10710 I32
10711 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10712 {
10713     dVAR;
10714     const I32 oldsavestack_ix = PL_savestack_ix;
10715     CV* const outsidecv = PL_compcv;
10716
10717     if (PL_compcv) {
10718         assert(SvTYPE(PL_compcv) == SVt_PVCV);
10719     }
10720     SAVEI32(PL_subline);
10721     save_item(PL_subname);
10722     SAVESPTR(PL_compcv);
10723
10724     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10725     CvFLAGS(PL_compcv) |= flags;
10726
10727     PL_subline = CopLINE(PL_curcop);
10728     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10729     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10730     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10731
10732     return oldsavestack_ix;
10733 }
10734
10735 #ifdef __SC__
10736 #pragma segment Perl_yylex
10737 #endif
10738 static int
10739 S_yywarn(pTHX_ const char *const s, U32 flags)
10740 {
10741     dVAR;
10742
10743     PERL_ARGS_ASSERT_YYWARN;
10744
10745     PL_in_eval |= EVAL_WARNONLY;
10746     yyerror_pv(s, flags);
10747     PL_in_eval &= ~EVAL_WARNONLY;
10748     return 0;
10749 }
10750
10751 int
10752 Perl_yyerror(pTHX_ const char *const s)
10753 {
10754     PERL_ARGS_ASSERT_YYERROR;
10755     return yyerror_pvn(s, strlen(s), 0);
10756 }
10757
10758 int
10759 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10760 {
10761     PERL_ARGS_ASSERT_YYERROR_PV;
10762     return yyerror_pvn(s, strlen(s), flags);
10763 }
10764
10765 int
10766 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10767 {
10768     dVAR;
10769     const char *context = NULL;
10770     int contlen = -1;
10771     SV *msg;
10772     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10773     int yychar  = PL_parser->yychar;
10774     U32 is_utf8 = flags & SVf_UTF8;
10775
10776     PERL_ARGS_ASSERT_YYERROR_PVN;
10777
10778     if (!yychar || (yychar == ';' && !PL_rsfp))
10779         sv_catpvs(where_sv, "at EOF");
10780     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10781       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10782       PL_oldbufptr != PL_bufptr) {
10783         /*
10784                 Only for NetWare:
10785                 The code below is removed for NetWare because it abends/crashes on NetWare
10786                 when the script has error such as not having the closing quotes like:
10787                     if ($var eq "value)
10788                 Checking of white spaces is anyway done in NetWare code.
10789         */
10790 #ifndef NETWARE
10791         while (isSPACE(*PL_oldoldbufptr))
10792             PL_oldoldbufptr++;
10793 #endif
10794         context = PL_oldoldbufptr;
10795         contlen = PL_bufptr - PL_oldoldbufptr;
10796     }
10797     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10798       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10799         /*
10800                 Only for NetWare:
10801                 The code below is removed for NetWare because it abends/crashes on NetWare
10802                 when the script has error such as not having the closing quotes like:
10803                     if ($var eq "value)
10804                 Checking of white spaces is anyway done in NetWare code.
10805         */
10806 #ifndef NETWARE
10807         while (isSPACE(*PL_oldbufptr))
10808             PL_oldbufptr++;
10809 #endif
10810         context = PL_oldbufptr;
10811         contlen = PL_bufptr - PL_oldbufptr;
10812     }
10813     else if (yychar > 255)
10814         sv_catpvs(where_sv, "next token ???");
10815     else if (yychar == -2) { /* YYEMPTY */
10816         if (PL_lex_state == LEX_NORMAL ||
10817            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10818             sv_catpvs(where_sv, "at end of line");
10819         else if (PL_lex_inpat)
10820             sv_catpvs(where_sv, "within pattern");
10821         else
10822             sv_catpvs(where_sv, "within string");
10823     }
10824     else {
10825         sv_catpvs(where_sv, "next char ");
10826         if (yychar < 32)
10827             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10828         else if (isPRINT_LC(yychar)) {
10829             const char string = yychar;
10830             sv_catpvn(where_sv, &string, 1);
10831         }
10832         else
10833             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10834     }
10835     msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
10836     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10837         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10838     if (context)
10839         Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
10840                             SVfARG(newSVpvn_flags(context, contlen,
10841                                         SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
10842     else
10843         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
10844     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10845         Perl_sv_catpvf(aTHX_ msg,
10846         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10847                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10848         PL_multi_end = 0;
10849     }
10850     if (PL_in_eval & EVAL_WARNONLY) {
10851         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10852     }
10853     else
10854         qerror(msg);
10855     if (PL_error_count >= 10) {
10856         if (PL_in_eval && SvCUR(ERRSV))
10857             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10858                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
10859         else
10860             Perl_croak(aTHX_ "%s has too many errors.\n",
10861             OutCopFILE(PL_curcop));
10862     }
10863     PL_in_my = 0;
10864     PL_in_my_stash = NULL;
10865     return 0;
10866 }
10867 #ifdef __SC__
10868 #pragma segment Main
10869 #endif
10870
10871 STATIC char*
10872 S_swallow_bom(pTHX_ U8 *s)
10873 {
10874     dVAR;
10875     const STRLEN slen = SvCUR(PL_linestr);
10876
10877     PERL_ARGS_ASSERT_SWALLOW_BOM;
10878
10879     switch (s[0]) {
10880     case 0xFF:
10881         if (s[1] == 0xFE) {
10882             /* UTF-16 little-endian? (or UTF-32LE?) */
10883             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10884                 /* diag_listed_as: Unsupported script encoding %s */
10885                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10886 #ifndef PERL_NO_UTF16_FILTER
10887             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10888             s += 2;
10889             if (PL_bufend > (char*)s) {
10890                 s = add_utf16_textfilter(s, TRUE);
10891             }
10892 #else
10893             /* diag_listed_as: Unsupported script encoding %s */
10894             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10895 #endif
10896         }
10897         break;
10898     case 0xFE:
10899         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10900 #ifndef PERL_NO_UTF16_FILTER
10901             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10902             s += 2;
10903             if (PL_bufend > (char *)s) {
10904                 s = add_utf16_textfilter(s, FALSE);
10905             }
10906 #else
10907             /* diag_listed_as: Unsupported script encoding %s */
10908             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10909 #endif
10910         }
10911         break;
10912     case 0xEF:
10913         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10914             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10915             s += 3;                      /* UTF-8 */
10916         }
10917         break;
10918     case 0:
10919         if (slen > 3) {
10920              if (s[1] == 0) {
10921                   if (s[2] == 0xFE && s[3] == 0xFF) {
10922                        /* UTF-32 big-endian */
10923                        /* diag_listed_as: Unsupported script encoding %s */
10924                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10925                   }
10926              }
10927              else if (s[2] == 0 && s[3] != 0) {
10928                   /* Leading bytes
10929                    * 00 xx 00 xx
10930                    * are a good indicator of UTF-16BE. */
10931 #ifndef PERL_NO_UTF16_FILTER
10932                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10933                   s = add_utf16_textfilter(s, FALSE);
10934 #else
10935                   /* diag_listed_as: Unsupported script encoding %s */
10936                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10937 #endif
10938              }
10939         }
10940 #ifdef EBCDIC
10941     case 0xDD:
10942         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10943             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10944             s += 4;                      /* UTF-8 */
10945         }
10946         break;
10947 #endif
10948
10949     default:
10950          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10951                   /* Leading bytes
10952                    * xx 00 xx 00
10953                    * are a good indicator of UTF-16LE. */
10954 #ifndef PERL_NO_UTF16_FILTER
10955               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10956               s = add_utf16_textfilter(s, TRUE);
10957 #else
10958               /* diag_listed_as: Unsupported script encoding %s */
10959               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10960 #endif
10961          }
10962     }
10963     return (char*)s;
10964 }
10965
10966
10967 #ifndef PERL_NO_UTF16_FILTER
10968 static I32
10969 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10970 {
10971     dVAR;
10972     SV *const filter = FILTER_DATA(idx);
10973     /* We re-use this each time round, throwing the contents away before we
10974        return.  */
10975     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10976     SV *const utf8_buffer = filter;
10977     IV status = IoPAGE(filter);
10978     const bool reverse = cBOOL(IoLINES(filter));
10979     I32 retval;
10980
10981     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10982
10983     /* As we're automatically added, at the lowest level, and hence only called
10984        from this file, we can be sure that we're not called in block mode. Hence
10985        don't bother writing code to deal with block mode.  */
10986     if (maxlen) {
10987         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10988     }
10989     if (status < 0) {
10990         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10991     }
10992     DEBUG_P(PerlIO_printf(Perl_debug_log,
10993                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10994                           FPTR2DPTR(void *, S_utf16_textfilter),
10995                           reverse ? 'l' : 'b', idx, maxlen, status,
10996                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10997
10998     while (1) {
10999         STRLEN chars;
11000         STRLEN have;
11001         I32 newlen;
11002         U8 *end;
11003         /* First, look in our buffer of existing UTF-8 data:  */
11004         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11005
11006         if (nl) {
11007             ++nl;
11008         } else if (status == 0) {
11009             /* EOF */
11010             IoPAGE(filter) = 0;
11011             nl = SvEND(utf8_buffer);
11012         }
11013         if (nl) {
11014             STRLEN got = nl - SvPVX(utf8_buffer);
11015             /* Did we have anything to append?  */
11016             retval = got != 0;
11017             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11018             /* Everything else in this code works just fine if SVp_POK isn't
11019                set.  This, however, needs it, and we need it to work, else
11020                we loop infinitely because the buffer is never consumed.  */
11021             sv_chop(utf8_buffer, nl);
11022             break;
11023         }
11024
11025         /* OK, not a complete line there, so need to read some more UTF-16.
11026            Read an extra octect if the buffer currently has an odd number. */
11027         while (1) {
11028             if (status <= 0)
11029                 break;
11030             if (SvCUR(utf16_buffer) >= 2) {
11031                 /* Location of the high octet of the last complete code point.
11032                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11033                    *coupled* with all the benefits of partial reads and
11034                    endianness.  */
11035                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11036                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11037
11038                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11039                     break;
11040                 }
11041
11042                 /* We have the first half of a surrogate. Read more.  */
11043                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11044             }
11045
11046             status = FILTER_READ(idx + 1, utf16_buffer,
11047                                  160 + (SvCUR(utf16_buffer) & 1));
11048             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11049             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11050             if (status < 0) {
11051                 /* Error */
11052                 IoPAGE(filter) = status;
11053                 return status;
11054             }
11055         }
11056
11057         chars = SvCUR(utf16_buffer) >> 1;
11058         have = SvCUR(utf8_buffer);
11059         SvGROW(utf8_buffer, have + chars * 3 + 1);
11060
11061         if (reverse) {
11062             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11063                                          (U8*)SvPVX_const(utf8_buffer) + have,
11064                                          chars * 2, &newlen);
11065         } else {
11066             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11067                                 (U8*)SvPVX_const(utf8_buffer) + have,
11068                                 chars * 2, &newlen);
11069         }
11070         SvCUR_set(utf8_buffer, have + newlen);
11071         *end = '\0';
11072
11073         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11074            it's private to us, and utf16_to_utf8{,reversed} take a
11075            (pointer,length) pair, rather than a NUL-terminated string.  */
11076         if(SvCUR(utf16_buffer) & 1) {
11077             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11078             SvCUR_set(utf16_buffer, 1);
11079         } else {
11080             SvCUR_set(utf16_buffer, 0);
11081         }
11082     }
11083     DEBUG_P(PerlIO_printf(Perl_debug_log,
11084                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11085                           status,
11086                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11087     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11088     return retval;
11089 }
11090
11091 static U8 *
11092 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11093 {
11094     SV *filter = filter_add(S_utf16_textfilter, NULL);
11095
11096     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11097
11098     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11099     sv_setpvs(filter, "");
11100     IoLINES(filter) = reversed;
11101     IoPAGE(filter) = 1; /* Not EOF */
11102
11103     /* Sadly, we have to return a valid pointer, come what may, so we have to
11104        ignore any error return from this.  */
11105     SvCUR_set(PL_linestr, 0);
11106     if (FILTER_READ(0, PL_linestr, 0)) {
11107         SvUTF8_on(PL_linestr);
11108     } else {
11109         SvUTF8_on(PL_linestr);
11110     }
11111     PL_bufend = SvEND(PL_linestr);
11112     return (U8*)SvPVX(PL_linestr);
11113 }
11114 #endif
11115
11116 /*
11117 Returns a pointer to the next character after the parsed
11118 vstring, as well as updating the passed in sv.
11119
11120 Function must be called like
11121
11122         sv = newSV(5);
11123         s = scan_vstring(s,e,sv);
11124
11125 where s and e are the start and end of the string.
11126 The sv should already be large enough to store the vstring
11127 passed in, for performance reasons.
11128
11129 */
11130
11131 char *
11132 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11133 {
11134     dVAR;
11135     const char *pos = s;
11136     const char *start = s;
11137
11138     PERL_ARGS_ASSERT_SCAN_VSTRING;
11139
11140     if (*pos == 'v') pos++;  /* get past 'v' */
11141     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11142         pos++;
11143     if ( *pos != '.') {
11144         /* this may not be a v-string if followed by => */
11145         const char *next = pos;
11146         while (next < e && isSPACE(*next))
11147             ++next;
11148         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11149             /* return string not v-string */
11150             sv_setpvn(sv,(char *)s,pos-s);
11151             return (char *)pos;
11152         }
11153     }
11154
11155     if (!isALPHA(*pos)) {
11156         U8 tmpbuf[UTF8_MAXBYTES+1];
11157
11158         if (*s == 'v')
11159             s++;  /* get past 'v' */
11160
11161         sv_setpvs(sv, "");
11162
11163         for (;;) {
11164             /* this is atoi() that tolerates underscores */
11165             U8 *tmpend;
11166             UV rev = 0;
11167             const char *end = pos;
11168             UV mult = 1;
11169             while (--end >= s) {
11170                 if (*end != '_') {
11171                     const UV orev = rev;
11172                     rev += (*end - '0') * mult;
11173                     mult *= 10;
11174                     if (orev > rev)
11175                         /* diag_listed_as: Integer overflow in %s number */
11176                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11177                                          "Integer overflow in decimal number");
11178                 }
11179             }
11180 #ifdef EBCDIC
11181             if (rev > 0x7FFFFFFF)
11182                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11183 #endif
11184             /* Append native character for the rev point */
11185             tmpend = uvchr_to_utf8(tmpbuf, rev);
11186             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11187             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11188                  SvUTF8_on(sv);
11189             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11190                  s = ++pos;
11191             else {
11192                  s = pos;
11193                  break;
11194             }
11195             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11196                  pos++;
11197         }
11198         SvPOK_on(sv);
11199         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11200         SvRMAGICAL_on(sv);
11201     }
11202     return (char *)s;
11203 }
11204
11205 int
11206 Perl_keyword_plugin_standard(pTHX_
11207         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11208 {
11209     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11210     PERL_UNUSED_CONTEXT;
11211     PERL_UNUSED_ARG(keyword_ptr);
11212     PERL_UNUSED_ARG(keyword_len);
11213     PERL_UNUSED_ARG(op_ptr);
11214     return KEYWORD_PLUGIN_DECLINE;
11215 }
11216
11217 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11218 static void
11219 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11220 {
11221     SAVEI32(PL_lex_brackets);
11222     if (PL_lex_brackets > 100)
11223         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11224     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11225     SAVEI32(PL_lex_allbrackets);
11226     PL_lex_allbrackets = 0;
11227     SAVEI8(PL_lex_fakeeof);
11228     PL_lex_fakeeof = (U8)fakeeof;
11229     if(yyparse(gramtype) && !PL_parser->error_count)
11230         qerror(Perl_mess(aTHX_ "Parse error"));
11231 }
11232
11233 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11234 static OP *
11235 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11236 {
11237     OP *o;
11238     ENTER;
11239     SAVEVPTR(PL_eval_root);
11240     PL_eval_root = NULL;
11241     parse_recdescent(gramtype, fakeeof);
11242     o = PL_eval_root;
11243     LEAVE;
11244     return o;
11245 }
11246
11247 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11248 static OP *
11249 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11250 {
11251     OP *exprop;
11252     if (flags & ~PARSE_OPTIONAL)
11253         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11254     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11255     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11256         if (!PL_parser->error_count)
11257             qerror(Perl_mess(aTHX_ "Parse error"));
11258         exprop = newOP(OP_NULL, 0);
11259     }
11260     return exprop;
11261 }
11262
11263 /*
11264 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11265
11266 Parse a Perl arithmetic expression.  This may contain operators of precedence
11267 down to the bit shift operators.  The expression must be followed (and thus
11268 terminated) either by a comparison or lower-precedence operator or by
11269 something that would normally terminate an expression such as semicolon.
11270 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11271 otherwise it is mandatory.  It is up to the caller to ensure that the
11272 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11273 the source of the code to be parsed and the lexical context for the
11274 expression.
11275
11276 The op tree representing the expression is returned.  If an optional
11277 expression is absent, a null pointer is returned, otherwise the pointer
11278 will be non-null.
11279
11280 If an error occurs in parsing or compilation, in most cases a valid op
11281 tree is returned anyway.  The error is reflected in the parser state,
11282 normally resulting in a single exception at the top level of parsing
11283 which covers all the compilation errors that occurred.  Some compilation
11284 errors, however, will throw an exception immediately.
11285
11286 =cut
11287 */
11288
11289 OP *
11290 Perl_parse_arithexpr(pTHX_ U32 flags)
11291 {
11292     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11293 }
11294
11295 /*
11296 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11297
11298 Parse a Perl term expression.  This may contain operators of precedence
11299 down to the assignment operators.  The expression must be followed (and thus
11300 terminated) either by a comma or lower-precedence operator or by
11301 something that would normally terminate an expression such as semicolon.
11302 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11303 otherwise it is mandatory.  It is up to the caller to ensure that the
11304 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11305 the source of the code to be parsed and the lexical context for the
11306 expression.
11307
11308 The op tree representing the expression is returned.  If an optional
11309 expression is absent, a null pointer is returned, otherwise the pointer
11310 will be non-null.
11311
11312 If an error occurs in parsing or compilation, in most cases a valid op
11313 tree is returned anyway.  The error is reflected in the parser state,
11314 normally resulting in a single exception at the top level of parsing
11315 which covers all the compilation errors that occurred.  Some compilation
11316 errors, however, will throw an exception immediately.
11317
11318 =cut
11319 */
11320
11321 OP *
11322 Perl_parse_termexpr(pTHX_ U32 flags)
11323 {
11324     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11325 }
11326
11327 /*
11328 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11329
11330 Parse a Perl list expression.  This may contain operators of precedence
11331 down to the comma operator.  The expression must be followed (and thus
11332 terminated) either by a low-precedence logic operator such as C<or> or by
11333 something that would normally terminate an expression such as semicolon.
11334 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11335 otherwise it is mandatory.  It is up to the caller to ensure that the
11336 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11337 the source of the code to be parsed and the lexical context for the
11338 expression.
11339
11340 The op tree representing the expression is returned.  If an optional
11341 expression is absent, a null pointer is returned, otherwise the pointer
11342 will be non-null.
11343
11344 If an error occurs in parsing or compilation, in most cases a valid op
11345 tree is returned anyway.  The error is reflected in the parser state,
11346 normally resulting in a single exception at the top level of parsing
11347 which covers all the compilation errors that occurred.  Some compilation
11348 errors, however, will throw an exception immediately.
11349
11350 =cut
11351 */
11352
11353 OP *
11354 Perl_parse_listexpr(pTHX_ U32 flags)
11355 {
11356     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11357 }
11358
11359 /*
11360 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11361
11362 Parse a single complete Perl expression.  This allows the full
11363 expression grammar, including the lowest-precedence operators such
11364 as C<or>.  The expression must be followed (and thus terminated) by a
11365 token that an expression would normally be terminated by: end-of-file,
11366 closing bracketing punctuation, semicolon, or one of the keywords that
11367 signals a postfix expression-statement modifier.  If I<flags> includes
11368 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11369 mandatory.  It is up to the caller to ensure that the dynamic parser
11370 state (L</PL_parser> et al) is correctly set to reflect the source of
11371 the code to be parsed and the lexical context for the expression.
11372
11373 The op tree representing the expression is returned.  If an optional
11374 expression is absent, a null pointer is returned, otherwise the pointer
11375 will be non-null.
11376
11377 If an error occurs in parsing or compilation, in most cases a valid op
11378 tree is returned anyway.  The error is reflected in the parser state,
11379 normally resulting in a single exception at the top level of parsing
11380 which covers all the compilation errors that occurred.  Some compilation
11381 errors, however, will throw an exception immediately.
11382
11383 =cut
11384 */
11385
11386 OP *
11387 Perl_parse_fullexpr(pTHX_ U32 flags)
11388 {
11389     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11390 }
11391
11392 /*
11393 =for apidoc Amx|OP *|parse_block|U32 flags
11394
11395 Parse a single complete Perl code block.  This consists of an opening
11396 brace, a sequence of statements, and a closing brace.  The block
11397 constitutes a lexical scope, so C<my> variables and various compile-time
11398 effects can be contained within it.  It is up to the caller to ensure
11399 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11400 reflect the source of the code to be parsed and the lexical context for
11401 the statement.
11402
11403 The op tree representing the code block is returned.  This is always a
11404 real op, never a null pointer.  It will normally be a C<lineseq> list,
11405 including C<nextstate> or equivalent ops.  No ops to construct any kind
11406 of runtime scope are included by virtue of it being a block.
11407
11408 If an error occurs in parsing or compilation, in most cases a valid op
11409 tree (most likely null) is returned anyway.  The error is reflected in
11410 the parser state, normally resulting in a single exception at the top
11411 level of parsing which covers all the compilation errors that occurred.
11412 Some compilation errors, however, will throw an exception immediately.
11413
11414 The I<flags> parameter is reserved for future use, and must always
11415 be zero.
11416
11417 =cut
11418 */
11419
11420 OP *
11421 Perl_parse_block(pTHX_ U32 flags)
11422 {
11423     if (flags)
11424         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11425     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11426 }
11427
11428 /*
11429 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11430
11431 Parse a single unadorned Perl statement.  This may be a normal imperative
11432 statement or a declaration that has compile-time effect.  It does not
11433 include any label or other affixture.  It is up to the caller to ensure
11434 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11435 reflect the source of the code to be parsed and the lexical context for
11436 the statement.
11437
11438 The op tree representing the statement is returned.  This may be a
11439 null pointer if the statement is null, for example if it was actually
11440 a subroutine definition (which has compile-time side effects).  If not
11441 null, it will be ops directly implementing the statement, suitable to
11442 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11443 equivalent op (except for those embedded in a scope contained entirely
11444 within the statement).
11445
11446 If an error occurs in parsing or compilation, in most cases a valid op
11447 tree (most likely null) is returned anyway.  The error is reflected in
11448 the parser state, normally resulting in a single exception at the top
11449 level of parsing which covers all the compilation errors that occurred.
11450 Some compilation errors, however, will throw an exception immediately.
11451
11452 The I<flags> parameter is reserved for future use, and must always
11453 be zero.
11454
11455 =cut
11456 */
11457
11458 OP *
11459 Perl_parse_barestmt(pTHX_ U32 flags)
11460 {
11461     if (flags)
11462         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11463     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11464 }
11465
11466 /*
11467 =for apidoc Amx|SV *|parse_label|U32 flags
11468
11469 Parse a single label, possibly optional, of the type that may prefix a
11470 Perl statement.  It is up to the caller to ensure that the dynamic parser
11471 state (L</PL_parser> et al) is correctly set to reflect the source of
11472 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11473 label is optional, otherwise it is mandatory.
11474
11475 The name of the label is returned in the form of a fresh scalar.  If an
11476 optional label is absent, a null pointer is returned.
11477
11478 If an error occurs in parsing, which can only occur if the label is
11479 mandatory, a valid label is returned anyway.  The error is reflected in
11480 the parser state, normally resulting in a single exception at the top
11481 level of parsing which covers all the compilation errors that occurred.
11482
11483 =cut
11484 */
11485
11486 SV *
11487 Perl_parse_label(pTHX_ U32 flags)
11488 {
11489     if (flags & ~PARSE_OPTIONAL)
11490         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11491     if (PL_lex_state == LEX_KNOWNEXT) {
11492         PL_parser->yychar = yylex();
11493         if (PL_parser->yychar == LABEL) {
11494             char *lpv = pl_yylval.pval;
11495             STRLEN llen = strlen(lpv);
11496             SV *lsv;
11497             PL_parser->yychar = YYEMPTY;
11498             lsv = newSV_type(SVt_PV);
11499             SvPV_set(lsv, lpv);
11500             SvCUR_set(lsv, llen);
11501             SvLEN_set(lsv, llen+1);
11502             SvPOK_on(lsv);
11503             return lsv;
11504         } else {
11505             yyunlex();
11506             goto no_label;
11507         }
11508     } else {
11509         char *s, *t;
11510         U8 c;
11511         STRLEN wlen, bufptr_pos;
11512         lex_read_space(0);
11513         t = s = PL_bufptr;
11514         c = (U8)*s;
11515         if (!isIDFIRST_A(c))
11516             goto no_label;
11517         do {
11518             c = (U8)*++t;
11519         } while(isWORDCHAR_A(c));
11520         wlen = t - s;
11521         if (word_takes_any_delimeter(s, wlen))
11522             goto no_label;
11523         bufptr_pos = s - SvPVX(PL_linestr);
11524         PL_bufptr = t;
11525         lex_read_space(LEX_KEEP_PREVIOUS);
11526         t = PL_bufptr;
11527         s = SvPVX(PL_linestr) + bufptr_pos;
11528         if (t[0] == ':' && t[1] != ':') {
11529             PL_oldoldbufptr = PL_oldbufptr;
11530             PL_oldbufptr = s;
11531             PL_bufptr = t+1;
11532             return newSVpvn(s, wlen);
11533         } else {
11534             PL_bufptr = s;
11535             no_label:
11536             if (flags & PARSE_OPTIONAL) {
11537                 return NULL;
11538             } else {
11539                 qerror(Perl_mess(aTHX_ "Parse error"));
11540                 return newSVpvs("x");
11541             }
11542         }
11543     }
11544 }
11545
11546 /*
11547 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11548
11549 Parse a single complete Perl statement.  This may be a normal imperative
11550 statement or a declaration that has compile-time effect, and may include
11551 optional labels.  It is up to the caller to ensure that the dynamic
11552 parser state (L</PL_parser> et al) is correctly set to reflect the source
11553 of the code to be parsed and the lexical context for the statement.
11554
11555 The op tree representing the statement is returned.  This may be a
11556 null pointer if the statement is null, for example if it was actually
11557 a subroutine definition (which has compile-time side effects).  If not
11558 null, it will be the result of a L</newSTATEOP> call, normally including
11559 a C<nextstate> or equivalent op.
11560
11561 If an error occurs in parsing or compilation, in most cases a valid op
11562 tree (most likely null) is returned anyway.  The error is reflected in
11563 the parser state, normally resulting in a single exception at the top
11564 level of parsing which covers all the compilation errors that occurred.
11565 Some compilation errors, however, will throw an exception immediately.
11566
11567 The I<flags> parameter is reserved for future use, and must always
11568 be zero.
11569
11570 =cut
11571 */
11572
11573 OP *
11574 Perl_parse_fullstmt(pTHX_ U32 flags)
11575 {
11576     if (flags)
11577         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11578     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11579 }
11580
11581 /*
11582 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11583
11584 Parse a sequence of zero or more Perl statements.  These may be normal
11585 imperative statements, including optional labels, or declarations
11586 that have compile-time effect, or any mixture thereof.  The statement
11587 sequence ends when a closing brace or end-of-file is encountered in a
11588 place where a new statement could have validly started.  It is up to
11589 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11590 is correctly set to reflect the source of the code to be parsed and the
11591 lexical context for the statements.
11592
11593 The op tree representing the statement sequence is returned.  This may
11594 be a null pointer if the statements were all null, for example if there
11595 were no statements or if there were only subroutine definitions (which
11596 have compile-time side effects).  If not null, it will be a C<lineseq>
11597 list, normally including C<nextstate> or equivalent ops.
11598
11599 If an error occurs in parsing or compilation, in most cases a valid op
11600 tree is returned anyway.  The error is reflected in the parser state,
11601 normally resulting in a single exception at the top level of parsing
11602 which covers all the compilation errors that occurred.  Some compilation
11603 errors, however, will throw an exception immediately.
11604
11605 The I<flags> parameter is reserved for future use, and must always
11606 be zero.
11607
11608 =cut
11609 */
11610
11611 OP *
11612 Perl_parse_stmtseq(pTHX_ U32 flags)
11613 {
11614     OP *stmtseqop;
11615     I32 c;
11616     if (flags)
11617         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11618     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11619     c = lex_peek_unichar(0);
11620     if (c != -1 && c != /*{*/'}')
11621         qerror(Perl_mess(aTHX_ "Parse error"));
11622     return stmtseqop;
11623 }
11624
11625 void
11626 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
11627 {
11628     PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
11629     deprecate("qw(...) as parentheses");
11630     force_next((4<<24)|')');
11631     if (qwlist->op_type == OP_STUB) {
11632         op_free(qwlist);
11633     }
11634     else {
11635         start_force(PL_curforce);
11636         NEXTVAL_NEXTTOKE.opval = qwlist;
11637         force_next(THING);
11638     }
11639     force_next((2<<24)|'(');
11640 }
11641
11642 /*
11643  * Local variables:
11644  * c-indentation-style: bsd
11645  * c-basic-offset: 4
11646  * indent-tabs-mode: t
11647  * End:
11648  *
11649  * ex: set ts=8 sts=4 sw=4 noet:
11650  */