toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XENUMMASK  0x3f
 130 #define XFAKEEOF   0x40
 131 #define XFAKEBRACK 0x80
 132
 133 #ifdef USE_UTF8_SCRIPTS
 134 #   define UTF (!IN_BYTES)
 135 #else
 136 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 137 #endif
 138
 139 /* The maximum number of characters preceding the unrecognized one to display */
 140 #define UNRECOGNIZED_PRECEDE_COUNT 10
 141
 142 /* In variables named $^X, these are the legal values for X.
 143  * 1999-02-27 mjd-perl-patch@plover.com */
 144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 145
 146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 147
 148 /* LEX_* are values for PL_lex_state, the state of the lexer.
 149  * They are arranged oddly so that the guard on the switch statement
 150  * can get by with a single comparison (if the compiler is smart enough).
 151  *
 152  * These values refer to the various states within a sublex parse,
 153  * i.e. within a double quotish string
 154  */
 155
 156 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 157
 158 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 159 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 160 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 161 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 162 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 163
 164                                    /* at end of code, eg "$x" followed by:  */
 165 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 166 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 167
 168 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 169                                         string or after \E, $foo, etc       */
 170 #define LEX_INTERPCONST          2 /* NOT USED */
 171 #define LEX_FORMLINE             1 /* expecting a format line               */
 172 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 173
 174
 175 #ifdef DEBUGGING
 176 static const char* const lex_state_names[] = {
 177     "KNOWNEXT",
 178     "FORMLINE",
 179     "INTERPCONST",
 180     "INTERPCONCAT",
 181     "INTERPENDMAYBE",
 182     "INTERPEND",
 183     "INTERPSTART",
 184     "INTERPPUSH",
 185     "INTERPCASEMOD",
 186     "INTERPNORMAL",
 187     "NORMAL"
 188 };
 189 #endif
 190
 191 #ifdef ff_next
 192 #undef ff_next
 193 #endif
 194
 195 #include "keywords.h"
 196
 197 /* CLINE is a macro that ensures PL_copline has a sane value */
 198
 199 #ifdef CLINE
 200 #undef CLINE
 201 #endif
 202 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 203
 204 #ifdef PERL_MAD
 205 #  define SKIPSPACE0(s) skipspace0(s)
 206 #  define SKIPSPACE1(s) skipspace1(s)
 207 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 208 #  define PEEKSPACE(s) skipspace2(s,0)
 209 #else
 210 #  define SKIPSPACE0(s) skipspace(s)
 211 #  define SKIPSPACE1(s) skipspace(s)
 212 #  define SKIPSPACE2(s,tsv) skipspace(s)
 213 #  define PEEKSPACE(s) skipspace(s)
 214 #endif
 215
 216 /*
 217  * Convenience functions to return different tokens and prime the
 218  * lexer for the next token.  They all take an argument.
 219  *
 220  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 221  * OPERATOR     : generic operator
 222  * AOPERATOR    : assignment operator
 223  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 224  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 225  * PREREF       : *EXPR where EXPR is not a simple identifier
 226  * TERM         : expression term
 227  * LOOPX        : loop exiting command (goto, last, dump, etc)
 228  * FTST         : file test operator
 229  * FUN0         : zero-argument function
 230  * FUN0OP       : zero-argument function, with its op created in this file
 231  * FUN1         : not used, except for not, which isn't a UNIOP
 232  * BOop         : bitwise or or xor
 233  * BAop         : bitwise and
 234  * SHop         : shift operator
 235  * PWop         : power operator
 236  * PMop         : pattern-matching operator
 237  * Aop          : addition-level operator
 238  * Mop          : multiplication-level operator
 239  * Eop          : equality-testing operator
 240  * Rop          : relational operator <= != gt
 241  *
 242  * Also see LOP and lop() below.
 243  */
 244
 245 #ifdef DEBUGGING /* Serve -DT. */
 246 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 247 #else
 248 #   define REPORT(retval) (retval)
 249 #endif
 250
 251 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 252 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 253 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 254 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 255 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 256 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 257 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 258 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 259 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 260 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 261 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 262 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 263 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 264 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 265 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 266 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 267 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 268 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 269 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 270 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 271 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 272
 273 /* This bit of chicanery makes a unary function followed by
 274  * a parenthesis into a function with one argument, highest precedence.
 275  * The UNIDOR macro is for unary functions that can be followed by the //
 276  * operator (such as C<shift // 0>).
 277  */
 278 #define UNI3(f,x,have_x) { \
 279         pl_yylval.ival = f; \
 280         if (have_x) PL_expect = x; \
 281         PL_bufptr = s; \
 282         PL_last_uni = PL_oldbufptr; \
 283         PL_last_lop_op = f; \
 284         if (*s == '(') \
 285             return REPORT( (int)FUNC1 ); \
 286         s = PEEKSPACE(s); \
 287         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 288         }
 289 #define UNI(f)    UNI3(f,XTERM,1)
 290 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 291 #define UNIPROTO(f,optional) { \
 292         if (optional) PL_last_uni = PL_oldbufptr; \
 293         OPERATOR(f); \
 294         }
 295
 296 #define UNIBRACK(f) UNI3(f,0,0)
 297
 298 /* grandfather return to old style */
 299 #define OLDLOP(f) \
 300         do { \
 301             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 302                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 303             pl_yylval.ival = (f); \
 304             PL_expect = XTERM; \
 305             PL_bufptr = s; \
 306             return (int)LSTOP; \
 307         } while(0)
 308
 309 #ifdef DEBUGGING
 310
 311 /* how to interpret the pl_yylval associated with the token */
 312 enum token_type {
 313     TOKENTYPE_NONE,
 314     TOKENTYPE_IVAL,
 315     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 316     TOKENTYPE_PVAL,
 317     TOKENTYPE_OPVAL,
 318     TOKENTYPE_GVVAL
 319 };
 320
 321 static struct debug_tokens {
 322     const int token;
 323     enum token_type type;
 324     const char *name;
 325 } const debug_tokens[] =
 326 {
 327     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 328     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 329     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 330     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 331     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 332     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 333     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 334     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 335     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 336     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 337     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 338     { DO,               TOKENTYPE_NONE,         "DO" },
 339     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 340     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 341     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 342     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 343     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 344     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 345     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 346     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 347     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 348     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 349     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 350     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 351     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 352     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 353     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 354     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 355     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 356     { IF,               TOKENTYPE_IVAL,         "IF" },
 357     { LABEL,            TOKENTYPE_OPVAL,        "LABEL" },
 358     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 359     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 360     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 361     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 362     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 363     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 364     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 365     { MY,               TOKENTYPE_IVAL,         "MY" },
 366     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 367     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 368     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 369     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 370     { OROR,             TOKENTYPE_NONE,         "OROR" },
 371     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 372     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 373     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 374     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 375     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 376     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 377     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 378     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 379     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 380     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 381     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 382     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 383     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 384     { SUB,              TOKENTYPE_NONE,         "SUB" },
 385     { THING,            TOKENTYPE_OPVAL,        "THING" },
 386     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 387     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 388     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 389     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 390     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 391     { USE,              TOKENTYPE_IVAL,         "USE" },
 392     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 393     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 394     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 395     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 396     { 0,                TOKENTYPE_NONE,         NULL }
 397 };
 398
 399 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 400
 401 STATIC int
 402 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 403 {
 404     dVAR;
 405
 406     PERL_ARGS_ASSERT_TOKEREPORT;
 407
 408     if (DEBUG_T_TEST) {
 409         const char *name = NULL;
 410         enum token_type type = TOKENTYPE_NONE;
 411         const struct debug_tokens *p;
 412         SV* const report = newSVpvs("<== ");
 413
 414         for (p = debug_tokens; p->token; p++) {
 415             if (p->token == (int)rv) {
 416                 name = p->name;
 417                 type = p->type;
 418                 break;
 419             }
 420         }
 421         if (name)
 422             Perl_sv_catpv(aTHX_ report, name);
 423         else if ((char)rv > ' ' && (char)rv < '~')
 424             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 425         else if (!rv)
 426             sv_catpvs(report, "EOF");
 427         else
 428             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 429         switch (type) {
 430         case TOKENTYPE_NONE:
 431         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 432             break;
 433         case TOKENTYPE_IVAL:
 434             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 435             break;
 436         case TOKENTYPE_OPNUM:
 437             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 438                                     PL_op_name[lvalp->ival]);
 439             break;
 440         case TOKENTYPE_PVAL:
 441             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 442             break;
 443         case TOKENTYPE_OPVAL:
 444             if (lvalp->opval) {
 445                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 446                                     PL_op_name[lvalp->opval->op_type]);
 447                 if (lvalp->opval->op_type == OP_CONST) {
 448                     Perl_sv_catpvf(aTHX_ report, " %s",
 449                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 450                 }
 451
 452             }
 453             else
 454                 sv_catpvs(report, "(opval=null)");
 455             break;
 456         }
 457         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 458     };
 459     return (int)rv;
 460 }
 461
 462
 463 /* print the buffer with suitable escapes */
 464
 465 STATIC void
 466 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 467 {
 468     SV* const tmp = newSVpvs("");
 469
 470     PERL_ARGS_ASSERT_PRINTBUF;
 471
 472     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 473     SvREFCNT_dec(tmp);
 474 }
 475
 476 #endif
 477
 478 static int
 479 S_deprecate_commaless_var_list(pTHX) {
 480     PL_expect = XTERM;
 481     deprecate("comma-less variable list");
 482     return REPORT(','); /* grandfather non-comma-format format */
 483 }
 484
 485 /*
 486  * S_ao
 487  *
 488  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 489  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 490  */
 491
 492 STATIC int
 493 S_ao(pTHX_ int toketype)
 494 {
 495     dVAR;
 496     if (*PL_bufptr == '=') {
 497         PL_bufptr++;
 498         if (toketype == ANDAND)
 499             pl_yylval.ival = OP_ANDASSIGN;
 500         else if (toketype == OROR)
 501             pl_yylval.ival = OP_ORASSIGN;
 502         else if (toketype == DORDOR)
 503             pl_yylval.ival = OP_DORASSIGN;
 504         toketype = ASSIGNOP;
 505     }
 506     return toketype;
 507 }
 508
 509 /*
 510  * S_no_op
 511  * When Perl expects an operator and finds something else, no_op
 512  * prints the warning.  It always prints "<something> found where
 513  * operator expected.  It prints "Missing semicolon on previous line?"
 514  * if the surprise occurs at the start of the line.  "do you need to
 515  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 516  * where the compiler doesn't know if foo is a method call or a function.
 517  * It prints "Missing operator before end of line" if there's nothing
 518  * after the missing operator, or "... before <...>" if there is something
 519  * after the missing operator.
 520  */
 521
 522 STATIC void
 523 S_no_op(pTHX_ const char *const what, char *s)
 524 {
 525     dVAR;
 526     char * const oldbp = PL_bufptr;
 527     const bool is_first = (PL_oldbufptr == PL_linestart);
 528
 529     PERL_ARGS_ASSERT_NO_OP;
 530
 531     if (!s)
 532         s = oldbp;
 533     else
 534         PL_bufptr = s;
 535     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 536     if (ckWARN_d(WARN_SYNTAX)) {
 537         if (is_first)
 538             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 539                     "\t(Missing semicolon on previous line?)\n");
 540         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 541             const char *t;
 542             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
 543                                                             t += UTF ? UTF8SKIP(t) : 1)
 544                 NOOP;
 545             if (t < PL_bufptr && isSPACE(*t))
 546                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 547                         "\t(Do you need to predeclare %"SVf"?)\n",
 548                     SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
 549                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 550         }
 551         else {
 552             assert(s >= oldbp);
 553             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 554                     "\t(Missing operator before %"SVf"?)\n",
 555                     SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
 556                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 557         }
 558     }
 559     PL_bufptr = oldbp;
 560 }
 561
 562 /*
 563  * S_missingterm
 564  * Complain about missing quote/regexp/heredoc terminator.
 565  * If it's called with NULL then it cauterizes the line buffer.
 566  * If we're in a delimited string and the delimiter is a control
 567  * character, it's reformatted into a two-char sequence like ^C.
 568  * This is fatal.
 569  */
 570
 571 STATIC void
 572 S_missingterm(pTHX_ char *s)
 573 {
 574     dVAR;
 575     char tmpbuf[3];
 576     char q;
 577     if (s) {
 578         char * const nl = strrchr(s,'\n');
 579         if (nl)
 580             *nl = '\0';
 581     }
 582     else if (isCNTRL(PL_multi_close)) {
 583         *tmpbuf = '^';
 584         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 585         tmpbuf[2] = '\0';
 586         s = tmpbuf;
 587     }
 588     else {
 589         *tmpbuf = (char)PL_multi_close;
 590         tmpbuf[1] = '\0';
 591         s = tmpbuf;
 592     }
 593     q = strchr(s,'"') ? '\'' : '"';
 594     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 595 }
 596
 597 #include "feature.h"
 598
 599 /*
 600  * Check whether the named feature is enabled.
 601  */
 602 bool
 603 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 604 {
 605     dVAR;
 606     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 607
 608     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 609
 610     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 611
 612     if (namelen > MAX_FEATURE_LEN)
 613         return FALSE;
 614     memcpy(&he_name[8], name, namelen);
 615
 616     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 617                                      REFCOUNTED_HE_EXISTS));
 618 }
 619
 620 /*
 621  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 622  * utf16-to-utf8-reversed.
 623  */
 624
 625 #ifdef PERL_CR_FILTER
 626 static void
 627 strip_return(SV *sv)
 628 {
 629     register const char *s = SvPVX_const(sv);
 630     register const char * const e = s + SvCUR(sv);
 631
 632     PERL_ARGS_ASSERT_STRIP_RETURN;
 633
 634     /* outer loop optimized to do nothing if there are no CR-LFs */
 635     while (s < e) {
 636         if (*s++ == '\r' && *s == '\n') {
 637             /* hit a CR-LF, need to copy the rest */
 638             register char *d = s - 1;
 639             *d++ = *s++;
 640             while (s < e) {
 641                 if (*s == '\r' && s[1] == '\n')
 642                     s++;
 643                 *d++ = *s++;
 644             }
 645             SvCUR(sv) -= s - d;
 646             return;
 647         }
 648     }
 649 }
 650
 651 STATIC I32
 652 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 653 {
 654     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 655     if (count > 0 && !maxlen)
 656         strip_return(sv);
 657     return count;
 658 }
 659 #endif
 660
 661 /*
 662 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 663
 664 Creates and initialises a new lexer/parser state object, supplying
 665 a context in which to lex and parse from a new source of Perl code.
 666 A pointer to the new state object is placed in L</PL_parser>.  An entry
 667 is made on the save stack so that upon unwinding the new state object
 668 will be destroyed and the former value of L</PL_parser> will be restored.
 669 Nothing else need be done to clean up the parsing context.
 670
 671 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 672 non-null, provides a string (in SV form) containing code to be parsed.
 673 A copy of the string is made, so subsequent modification of I<line>
 674 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 675 from which code will be read to be parsed.  If both are non-null, the
 676 code in I<line> comes first and must consist of complete lines of input,
 677 and I<rsfp> supplies the remainder of the source.
 678
 679 The I<flags> parameter is reserved for future use.  Currently it is only
 680 used by perl internally, so extensions should always pass zero.
 681
 682 =cut
 683 */
 684
 685 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 686    can share filters with the current parser.
 687    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 688    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 689    destruction. This is used to handle the case of defaulting to reading the
 690    script from the standard input because no filename was given on the command
 691    line (without getting confused by situation where STDIN has been closed, so
 692    the script handle is opened on fd 0)  */
 693
 694 void
 695 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 696 {
 697     dVAR;
 698     const char *s = NULL;
 699     yy_parser *parser, *oparser;
 700     if (flags && flags & ~LEX_START_FLAGS)
 701         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 702
 703     /* create and initialise a parser */
 704
 705     Newxz(parser, 1, yy_parser);
 706     parser->old_parser = oparser = PL_parser;
 707     PL_parser = parser;
 708
 709     parser->stack = NULL;
 710     parser->ps = NULL;
 711     parser->stack_size = 0;
 712
 713     /* on scope exit, free this parser and restore any outer one */
 714     SAVEPARSER(parser);
 715     parser->saved_curcop = PL_curcop;
 716
 717     /* initialise lexer state */
 718
 719 #ifdef PERL_MAD
 720     parser->curforce = -1;
 721 #else
 722     parser->nexttoke = 0;
 723 #endif
 724     parser->error_count = oparser ? oparser->error_count : 0;
 725     parser->copline = NOLINE;
 726     parser->lex_state = LEX_NORMAL;
 727     parser->expect = XSTATE;
 728     parser->rsfp = rsfp;
 729     parser->rsfp_filters =
 730       !(flags & LEX_START_SAME_FILTER) || !oparser
 731         ? NULL
 732         : MUTABLE_AV(SvREFCNT_inc(
 733             oparser->rsfp_filters
 734              ? oparser->rsfp_filters
 735              : (oparser->rsfp_filters = newAV())
 736           ));
 737
 738     Newx(parser->lex_brackstack, 120, char);
 739     Newx(parser->lex_casestack, 12, char);
 740     *parser->lex_casestack = '\0';
 741
 742     if (line) {
 743         STRLEN len;
 744         s = SvPV_const(line, len);
 745         parser->linestr = flags & LEX_START_COPIED
 746                             ? SvREFCNT_inc_simple_NN(line)
 747                             : newSVpvn_flags(s, len, SvUTF8(line));
 748         if (!len || s[len-1] != ';')
 749             sv_catpvs(parser->linestr, "\n;");
 750     } else {
 751         parser->linestr = newSVpvs("\n;");
 752     }
 753     parser->oldoldbufptr =
 754         parser->oldbufptr =
 755         parser->bufptr =
 756         parser->linestart = SvPVX(parser->linestr);
 757     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 758     parser->last_lop = parser->last_uni = NULL;
 759     parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 760                                  |LEX_DONT_CLOSE_RSFP);
 761
 762     parser->in_pod = parser->filtered = 0;
 763 }
 764
 765
 766 /* delete a parser object */
 767
 768 void
 769 Perl_parser_free(pTHX_  const yy_parser *parser)
 770 {
 771     PERL_ARGS_ASSERT_PARSER_FREE;
 772
 773     PL_curcop = parser->saved_curcop;
 774     SvREFCNT_dec(parser->linestr);
 775
 776     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 777         PerlIO_clearerr(parser->rsfp);
 778     else if (parser->rsfp && (!parser->old_parser ||
 779                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 780         PerlIO_close(parser->rsfp);
 781     SvREFCNT_dec(parser->rsfp_filters);
 782
 783     Safefree(parser->lex_brackstack);
 784     Safefree(parser->lex_casestack);
 785     PL_parser = parser->old_parser;
 786     Safefree(parser);
 787 }
 788
 789
 790 /*
 791 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 792
 793 Buffer scalar containing the chunk currently under consideration of the
 794 text currently being lexed.  This is always a plain string scalar (for
 795 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 796 normal scalar means; instead refer to the buffer directly by the pointer
 797 variables described below.
 798
 799 The lexer maintains various C<char*> pointers to things in the
 800 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 801 reallocated, all of these pointers must be updated.  Don't attempt to
 802 do this manually, but rather use L</lex_grow_linestr> if you need to
 803 reallocate the buffer.
 804
 805 The content of the text chunk in the buffer is commonly exactly one
 806 complete line of input, up to and including a newline terminator,
 807 but there are situations where it is otherwise.  The octets of the
 808 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 809 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 810 flag on this scalar, which may disagree with it.
 811
 812 For direct examination of the buffer, the variable
 813 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 814 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 815 of these pointers is usually preferable to examination of the scalar
 816 through normal scalar means.
 817
 818 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 819
 820 Direct pointer to the end of the chunk of text currently being lexed, the
 821 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 822 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 823 always located at the end of the buffer, and does not count as part of
 824 the buffer's contents.
 825
 826 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 827
 828 Points to the current position of lexing inside the lexer buffer.
 829 Characters around this point may be freely examined, within
 830 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 831 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 832 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 833
 834 Lexing code (whether in the Perl core or not) moves this pointer past
 835 the characters that it consumes.  It is also expected to perform some
 836 bookkeeping whenever a newline character is consumed.  This movement
 837 can be more conveniently performed by the function L</lex_read_to>,
 838 which handles newlines appropriately.
 839
 840 Interpretation of the buffer's octets can be abstracted out by
 841 using the slightly higher-level functions L</lex_peek_unichar> and
 842 L</lex_read_unichar>.
 843
 844 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 845
 846 Points to the start of the current line inside the lexer buffer.
 847 This is useful for indicating at which column an error occurred, and
 848 not much else.  This must be updated by any lexing code that consumes
 849 a newline; the function L</lex_read_to> handles this detail.
 850
 851 =cut
 852 */
 853
 854 /*
 855 =for apidoc Amx|bool|lex_bufutf8
 856
 857 Indicates whether the octets in the lexer buffer
 858 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 859 of Unicode characters.  If not, they should be interpreted as Latin-1
 860 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 861
 862 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 863 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 864 encoding.
 865
 866 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 867 is significant, but not the whole story regarding the input character
 868 encoding.  Normally, when a file is being read, the scalar contains octets
 869 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 870 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 871 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 872 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 873 is in effect.  This logic may change in the future; use this function
 874 instead of implementing the logic yourself.
 875
 876 =cut
 877 */
 878
 879 bool
 880 Perl_lex_bufutf8(pTHX)
 881 {
 882     return UTF;
 883 }
 884
 885 /*
 886 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 887
 888 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 889 at least I<len> octets (including terminating NUL).  Returns a
 890 pointer to the reallocated buffer.  This is necessary before making
 891 any direct modification of the buffer that would increase its length.
 892 L</lex_stuff_pvn> provides a more convenient way to insert text into
 893 the buffer.
 894
 895 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 896 this function updates all of the lexer's variables that point directly
 897 into the buffer.
 898
 899 =cut
 900 */
 901
 902 char *
 903 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 904 {
 905     SV *linestr;
 906     char *buf;
 907     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 908     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 909     linestr = PL_parser->linestr;
 910     buf = SvPVX(linestr);
 911     if (len <= SvLEN(linestr))
 912         return buf;
 913     bufend_pos = PL_parser->bufend - buf;
 914     bufptr_pos = PL_parser->bufptr - buf;
 915     oldbufptr_pos = PL_parser->oldbufptr - buf;
 916     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 917     linestart_pos = PL_parser->linestart - buf;
 918     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 919     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 920     buf = sv_grow(linestr, len);
 921     PL_parser->bufend = buf + bufend_pos;
 922     PL_parser->bufptr = buf + bufptr_pos;
 923     PL_parser->oldbufptr = buf + oldbufptr_pos;
 924     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 925     PL_parser->linestart = buf + linestart_pos;
 926     if (PL_parser->last_uni)
 927         PL_parser->last_uni = buf + last_uni_pos;
 928     if (PL_parser->last_lop)
 929         PL_parser->last_lop = buf + last_lop_pos;
 930     return buf;
 931 }
 932
 933 /*
 934 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 935
 936 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 937 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 938 reallocating the buffer if necessary.  This means that lexing code that
 939 runs later will see the characters as if they had appeared in the input.
 940 It is not recommended to do this as part of normal parsing, and most
 941 uses of this facility run the risk of the inserted characters being
 942 interpreted in an unintended manner.
 943
 944 The string to be inserted is represented by I<len> octets starting
 945 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 946 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 947 The characters are recoded for the lexer buffer, according to how the
 948 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 949 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 950 function is more convenient.
 951
 952 =cut
 953 */
 954
 955 void
 956 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 957 {
 958     dVAR;
 959     char *bufptr;
 960     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 961     if (flags & ~(LEX_STUFF_UTF8))
 962         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 963     if (UTF) {
 964         if (flags & LEX_STUFF_UTF8) {
 965             goto plain_copy;
 966         } else {
 967             STRLEN highhalf = 0;
 968             const char *p, *e = pv+len;
 969             for (p = pv; p != e; p++)
 970                 highhalf += !!(((U8)*p) & 0x80);
 971             if (!highhalf)
 972                 goto plain_copy;
 973             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 974             bufptr = PL_parser->bufptr;
 975             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 976             SvCUR_set(PL_parser->linestr,
 977                 SvCUR(PL_parser->linestr) + len+highhalf);
 978             PL_parser->bufend += len+highhalf;
 979             for (p = pv; p != e; p++) {
 980                 U8 c = (U8)*p;
 981                 if (c & 0x80) {
 982                     *bufptr++ = (char)(0xc0 | (c >> 6));
 983                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 984                 } else {
 985                     *bufptr++ = (char)c;
 986                 }
 987             }
 988         }
 989     } else {
 990         if (flags & LEX_STUFF_UTF8) {
 991             STRLEN highhalf = 0;
 992             const char *p, *e = pv+len;
 993             for (p = pv; p != e; p++) {
 994                 U8 c = (U8)*p;
 995                 if (c >= 0xc4) {
 996                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 997                                 "non-Latin-1 character into Latin-1 input");
 998                 } else if (c >= 0xc2 && p+1 != e &&
 999                             (((U8)p[1]) & 0xc0) == 0x80) {
1000                     p++;
1001                     highhalf++;
1002                 } else if (c >= 0x80) {
1003                     /* malformed UTF-8 */
1004                     ENTER;
1005                     SAVESPTR(PL_warnhook);
1006                     PL_warnhook = PERL_WARNHOOK_FATAL;
1007                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1008                     LEAVE;
1009                 }
1010             }
1011             if (!highhalf)
1012                 goto plain_copy;
1013             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1014             bufptr = PL_parser->bufptr;
1015             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1016             SvCUR_set(PL_parser->linestr,
1017                 SvCUR(PL_parser->linestr) + len-highhalf);
1018             PL_parser->bufend += len-highhalf;
1019             for (p = pv; p != e; p++) {
1020                 U8 c = (U8)*p;
1021                 if (c & 0x80) {
1022                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1023                     p++;
1024                 } else {
1025                     *bufptr++ = (char)c;
1026                 }
1027             }
1028         } else {
1029             plain_copy:
1030             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1031             bufptr = PL_parser->bufptr;
1032             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1033             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1034             PL_parser->bufend += len;
1035             Copy(pv, bufptr, len, char);
1036         }
1037     }
1038 }
1039
1040 /*
1041 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1042
1043 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1044 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1045 reallocating the buffer if necessary.  This means that lexing code that
1046 runs later will see the characters as if they had appeared in the input.
1047 It is not recommended to do this as part of normal parsing, and most
1048 uses of this facility run the risk of the inserted characters being
1049 interpreted in an unintended manner.
1050
1051 The string to be inserted is represented by octets starting at I<pv>
1052 and continuing to the first nul.  These octets are interpreted as either
1053 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1054 in I<flags>.  The characters are recoded for the lexer buffer, according
1055 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1056 If it is not convenient to nul-terminate a string to be inserted, the
1057 L</lex_stuff_pvn> function is more appropriate.
1058
1059 =cut
1060 */
1061
1062 void
1063 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1064 {
1065     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1066     lex_stuff_pvn(pv, strlen(pv), flags);
1067 }
1068
1069 /*
1070 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1071
1072 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1073 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1074 reallocating the buffer if necessary.  This means that lexing code that
1075 runs later will see the characters as if they had appeared in the input.
1076 It is not recommended to do this as part of normal parsing, and most
1077 uses of this facility run the risk of the inserted characters being
1078 interpreted in an unintended manner.
1079
1080 The string to be inserted is the string value of I<sv>.  The characters
1081 are recoded for the lexer buffer, according to how the buffer is currently
1082 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1083 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1084 need to construct a scalar.
1085
1086 =cut
1087 */
1088
1089 void
1090 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1091 {
1092     char *pv;
1093     STRLEN len;
1094     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1095     if (flags)
1096         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1097     pv = SvPV(sv, len);
1098     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1099 }
1100
1101 /*
1102 =for apidoc Amx|void|lex_unstuff|char *ptr
1103
1104 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1105 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1106 This hides the discarded text from any lexing code that runs later,
1107 as if the text had never appeared.
1108
1109 This is not the normal way to consume lexed text.  For that, use
1110 L</lex_read_to>.
1111
1112 =cut
1113 */
1114
1115 void
1116 Perl_lex_unstuff(pTHX_ char *ptr)
1117 {
1118     char *buf, *bufend;
1119     STRLEN unstuff_len;
1120     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1121     buf = PL_parser->bufptr;
1122     if (ptr < buf)
1123         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1124     if (ptr == buf)
1125         return;
1126     bufend = PL_parser->bufend;
1127     if (ptr > bufend)
1128         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1129     unstuff_len = ptr - buf;
1130     Move(ptr, buf, bufend+1-ptr, char);
1131     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1132     PL_parser->bufend = bufend - unstuff_len;
1133 }
1134
1135 /*
1136 =for apidoc Amx|void|lex_read_to|char *ptr
1137
1138 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1139 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1140 performing the correct bookkeeping whenever a newline character is passed.
1141 This is the normal way to consume lexed text.
1142
1143 Interpretation of the buffer's octets can be abstracted out by
1144 using the slightly higher-level functions L</lex_peek_unichar> and
1145 L</lex_read_unichar>.
1146
1147 =cut
1148 */
1149
1150 void
1151 Perl_lex_read_to(pTHX_ char *ptr)
1152 {
1153     char *s;
1154     PERL_ARGS_ASSERT_LEX_READ_TO;
1155     s = PL_parser->bufptr;
1156     if (ptr < s || ptr > PL_parser->bufend)
1157         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1158     for (; s != ptr; s++)
1159         if (*s == '\n') {
1160             CopLINE_inc(PL_curcop);
1161             PL_parser->linestart = s+1;
1162         }
1163     PL_parser->bufptr = ptr;
1164 }
1165
1166 /*
1167 =for apidoc Amx|void|lex_discard_to|char *ptr
1168
1169 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1170 up to I<ptr>.  The remaining content of the buffer will be moved, and
1171 all pointers into the buffer updated appropriately.  I<ptr> must not
1172 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1173 it is not permitted to discard text that has yet to be lexed.
1174
1175 Normally it is not necessarily to do this directly, because it suffices to
1176 use the implicit discarding behaviour of L</lex_next_chunk> and things
1177 based on it.  However, if a token stretches across multiple lines,
1178 and the lexing code has kept multiple lines of text in the buffer for
1179 that purpose, then after completion of the token it would be wise to
1180 explicitly discard the now-unneeded earlier lines, to avoid future
1181 multi-line tokens growing the buffer without bound.
1182
1183 =cut
1184 */
1185
1186 void
1187 Perl_lex_discard_to(pTHX_ char *ptr)
1188 {
1189     char *buf;
1190     STRLEN discard_len;
1191     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1192     buf = SvPVX(PL_parser->linestr);
1193     if (ptr < buf)
1194         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1195     if (ptr == buf)
1196         return;
1197     if (ptr > PL_parser->bufptr)
1198         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1199     discard_len = ptr - buf;
1200     if (PL_parser->oldbufptr < ptr)
1201         PL_parser->oldbufptr = ptr;
1202     if (PL_parser->oldoldbufptr < ptr)
1203         PL_parser->oldoldbufptr = ptr;
1204     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1205         PL_parser->last_uni = NULL;
1206     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1207         PL_parser->last_lop = NULL;
1208     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1209     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1210     PL_parser->bufend -= discard_len;
1211     PL_parser->bufptr -= discard_len;
1212     PL_parser->oldbufptr -= discard_len;
1213     PL_parser->oldoldbufptr -= discard_len;
1214     if (PL_parser->last_uni)
1215         PL_parser->last_uni -= discard_len;
1216     if (PL_parser->last_lop)
1217         PL_parser->last_lop -= discard_len;
1218 }
1219
1220 /*
1221 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1222
1223 Reads in the next chunk of text to be lexed, appending it to
1224 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1225 looked to the end of the current chunk and wants to know more.  It is
1226 usual, but not necessary, for lexing to have consumed the entirety of
1227 the current chunk at this time.
1228
1229 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1230 chunk (i.e., the current chunk has been entirely consumed), normally the
1231 current chunk will be discarded at the same time that the new chunk is
1232 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1233 will not be discarded.  If the current chunk has not been entirely
1234 consumed, then it will not be discarded regardless of the flag.
1235
1236 Returns true if some new text was added to the buffer, or false if the
1237 buffer has reached the end of the input text.
1238
1239 =cut
1240 */
1241
1242 #define LEX_FAKE_EOF 0x80000000
1243
1244 bool
1245 Perl_lex_next_chunk(pTHX_ U32 flags)
1246 {
1247     SV *linestr;
1248     char *buf;
1249     STRLEN old_bufend_pos, new_bufend_pos;
1250     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1251     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1252     bool got_some_for_debugger = 0;
1253     bool got_some;
1254     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1255         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1256     linestr = PL_parser->linestr;
1257     buf = SvPVX(linestr);
1258     if (!(flags & LEX_KEEP_PREVIOUS) &&
1259             PL_parser->bufptr == PL_parser->bufend) {
1260         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1261         linestart_pos = 0;
1262         if (PL_parser->last_uni != PL_parser->bufend)
1263             PL_parser->last_uni = NULL;
1264         if (PL_parser->last_lop != PL_parser->bufend)
1265             PL_parser->last_lop = NULL;
1266         last_uni_pos = last_lop_pos = 0;
1267         *buf = 0;
1268         SvCUR(linestr) = 0;
1269     } else {
1270         old_bufend_pos = PL_parser->bufend - buf;
1271         bufptr_pos = PL_parser->bufptr - buf;
1272         oldbufptr_pos = PL_parser->oldbufptr - buf;
1273         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1274         linestart_pos = PL_parser->linestart - buf;
1275         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1276         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1277     }
1278     if (flags & LEX_FAKE_EOF) {
1279         goto eof;
1280     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1281         got_some = 0;
1282     } else if (filter_gets(linestr, old_bufend_pos)) {
1283         got_some = 1;
1284         got_some_for_debugger = 1;
1285     } else {
1286         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1287             sv_setpvs(linestr, "");
1288         eof:
1289         /* End of real input.  Close filehandle (unless it was STDIN),
1290          * then add implicit termination.
1291          */
1292         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1293             PerlIO_clearerr(PL_parser->rsfp);
1294         else if (PL_parser->rsfp)
1295             (void)PerlIO_close(PL_parser->rsfp);
1296         PL_parser->rsfp = NULL;
1297         PL_parser->in_pod = PL_parser->filtered = 0;
1298 #ifdef PERL_MAD
1299         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1300             PL_faketokens = 1;
1301 #endif
1302         if (!PL_in_eval && PL_minus_p) {
1303             sv_catpvs(linestr,
1304                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1305             PL_minus_n = PL_minus_p = 0;
1306         } else if (!PL_in_eval && PL_minus_n) {
1307             sv_catpvs(linestr, /*{*/";}");
1308             PL_minus_n = 0;
1309         } else
1310             sv_catpvs(linestr, ";");
1311         got_some = 1;
1312     }
1313     buf = SvPVX(linestr);
1314     new_bufend_pos = SvCUR(linestr);
1315     PL_parser->bufend = buf + new_bufend_pos;
1316     PL_parser->bufptr = buf + bufptr_pos;
1317     PL_parser->oldbufptr = buf + oldbufptr_pos;
1318     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1319     PL_parser->linestart = buf + linestart_pos;
1320     if (PL_parser->last_uni)
1321         PL_parser->last_uni = buf + last_uni_pos;
1322     if (PL_parser->last_lop)
1323         PL_parser->last_lop = buf + last_lop_pos;
1324     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1325             PL_curstash != PL_debstash) {
1326         /* debugger active and we're not compiling the debugger code,
1327          * so store the line into the debugger's array of lines
1328          */
1329         update_debugger_info(NULL, buf+old_bufend_pos,
1330             new_bufend_pos-old_bufend_pos);
1331     }
1332     return got_some;
1333 }
1334
1335 /*
1336 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1337
1338 Looks ahead one (Unicode) character in the text currently being lexed.
1339 Returns the codepoint (unsigned integer value) of the next character,
1340 or -1 if lexing has reached the end of the input text.  To consume the
1341 peeked character, use L</lex_read_unichar>.
1342
1343 If the next character is in (or extends into) the next chunk of input
1344 text, the next chunk will be read in.  Normally the current chunk will be
1345 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1346 then the current chunk will not be discarded.
1347
1348 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1349 is encountered, an exception is generated.
1350
1351 =cut
1352 */
1353
1354 I32
1355 Perl_lex_peek_unichar(pTHX_ U32 flags)
1356 {
1357     dVAR;
1358     char *s, *bufend;
1359     if (flags & ~(LEX_KEEP_PREVIOUS))
1360         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1361     s = PL_parser->bufptr;
1362     bufend = PL_parser->bufend;
1363     if (UTF) {
1364         U8 head;
1365         I32 unichar;
1366         STRLEN len, retlen;
1367         if (s == bufend) {
1368             if (!lex_next_chunk(flags))
1369                 return -1;
1370             s = PL_parser->bufptr;
1371             bufend = PL_parser->bufend;
1372         }
1373         head = (U8)*s;
1374         if (!(head & 0x80))
1375             return head;
1376         if (head & 0x40) {
1377             len = PL_utf8skip[head];
1378             while ((STRLEN)(bufend-s) < len) {
1379                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1380                     break;
1381                 s = PL_parser->bufptr;
1382                 bufend = PL_parser->bufend;
1383             }
1384         }
1385         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1386         if (retlen == (STRLEN)-1) {
1387             /* malformed UTF-8 */
1388             ENTER;
1389             SAVESPTR(PL_warnhook);
1390             PL_warnhook = PERL_WARNHOOK_FATAL;
1391             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1392             LEAVE;
1393         }
1394         return unichar;
1395     } else {
1396         if (s == bufend) {
1397             if (!lex_next_chunk(flags))
1398                 return -1;
1399             s = PL_parser->bufptr;
1400         }
1401         return (U8)*s;
1402     }
1403 }
1404
1405 /*
1406 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1407
1408 Reads the next (Unicode) character in the text currently being lexed.
1409 Returns the codepoint (unsigned integer value) of the character read,
1410 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1411 if lexing has reached the end of the input text.  To non-destructively
1412 examine the next character, use L</lex_peek_unichar> instead.
1413
1414 If the next character is in (or extends into) the next chunk of input
1415 text, the next chunk will be read in.  Normally the current chunk will be
1416 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1417 then the current chunk will not be discarded.
1418
1419 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1420 is encountered, an exception is generated.
1421
1422 =cut
1423 */
1424
1425 I32
1426 Perl_lex_read_unichar(pTHX_ U32 flags)
1427 {
1428     I32 c;
1429     if (flags & ~(LEX_KEEP_PREVIOUS))
1430         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1431     c = lex_peek_unichar(flags);
1432     if (c != -1) {
1433         if (c == '\n')
1434             CopLINE_inc(PL_curcop);
1435         if (UTF)
1436             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1437         else
1438             ++(PL_parser->bufptr);
1439     }
1440     return c;
1441 }
1442
1443 /*
1444 =for apidoc Amx|void|lex_read_space|U32 flags
1445
1446 Reads optional spaces, in Perl style, in the text currently being
1447 lexed.  The spaces may include ordinary whitespace characters and
1448 Perl-style comments.  C<#line> directives are processed if encountered.
1449 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1450 at a non-space character (or the end of the input text).
1451
1452 If spaces extend into the next chunk of input text, the next chunk will
1453 be read in.  Normally the current chunk will be discarded at the same
1454 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1455 chunk will not be discarded.
1456
1457 =cut
1458 */
1459
1460 #define LEX_NO_NEXT_CHUNK 0x80000000
1461
1462 void
1463 Perl_lex_read_space(pTHX_ U32 flags)
1464 {
1465     char *s, *bufend;
1466     bool need_incline = 0;
1467     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1468         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1469 #ifdef PERL_MAD
1470     if (PL_skipwhite) {
1471         sv_free(PL_skipwhite);
1472         PL_skipwhite = NULL;
1473     }
1474     if (PL_madskills)
1475         PL_skipwhite = newSVpvs("");
1476 #endif /* PERL_MAD */
1477     s = PL_parser->bufptr;
1478     bufend = PL_parser->bufend;
1479     while (1) {
1480         char c = *s;
1481         if (c == '#') {
1482             do {
1483                 c = *++s;
1484             } while (!(c == '\n' || (c == 0 && s == bufend)));
1485         } else if (c == '\n') {
1486             s++;
1487             PL_parser->linestart = s;
1488             if (s == bufend)
1489                 need_incline = 1;
1490             else
1491                 incline(s);
1492         } else if (isSPACE(c)) {
1493             s++;
1494         } else if (c == 0 && s == bufend) {
1495             bool got_more;
1496 #ifdef PERL_MAD
1497             if (PL_madskills)
1498                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1499 #endif /* PERL_MAD */
1500             if (flags & LEX_NO_NEXT_CHUNK)
1501                 break;
1502             PL_parser->bufptr = s;
1503             CopLINE_inc(PL_curcop);
1504             got_more = lex_next_chunk(flags);
1505             CopLINE_dec(PL_curcop);
1506             s = PL_parser->bufptr;
1507             bufend = PL_parser->bufend;
1508             if (!got_more)
1509                 break;
1510             if (need_incline && PL_parser->rsfp) {
1511                 incline(s);
1512                 need_incline = 0;
1513             }
1514         } else {
1515             break;
1516         }
1517     }
1518 #ifdef PERL_MAD
1519     if (PL_madskills)
1520         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1521 #endif /* PERL_MAD */
1522     PL_parser->bufptr = s;
1523 }
1524
1525 /*
1526  * S_incline
1527  * This subroutine has nothing to do with tilting, whether at windmills
1528  * or pinball tables.  Its name is short for "increment line".  It
1529  * increments the current line number in CopLINE(PL_curcop) and checks
1530  * to see whether the line starts with a comment of the form
1531  *    # line 500 "foo.pm"
1532  * If so, it sets the current line number and file to the values in the comment.
1533  */
1534
1535 STATIC void
1536 S_incline(pTHX_ const char *s)
1537 {
1538     dVAR;
1539     const char *t;
1540     const char *n;
1541     const char *e;
1542     line_t line_num;
1543
1544     PERL_ARGS_ASSERT_INCLINE;
1545
1546     CopLINE_inc(PL_curcop);
1547     if (*s++ != '#')
1548         return;
1549     while (SPACE_OR_TAB(*s))
1550         s++;
1551     if (strnEQ(s, "line", 4))
1552         s += 4;
1553     else
1554         return;
1555     if (SPACE_OR_TAB(*s))
1556         s++;
1557     else
1558         return;
1559     while (SPACE_OR_TAB(*s))
1560         s++;
1561     if (!isDIGIT(*s))
1562         return;
1563
1564     n = s;
1565     while (isDIGIT(*s))
1566         s++;
1567     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1568         return;
1569     while (SPACE_OR_TAB(*s))
1570         s++;
1571     if (*s == '"' && (t = strchr(s+1, '"'))) {
1572         s++;
1573         e = t + 1;
1574     }
1575     else {
1576         t = s;
1577         while (!isSPACE(*t))
1578             t++;
1579         e = t;
1580     }
1581     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1582         e++;
1583     if (*e != '\n' && *e != '\0')
1584         return;         /* false alarm */
1585
1586     line_num = atoi(n)-1;
1587
1588     if (t - s > 0) {
1589         const STRLEN len = t - s;
1590         SV *const temp_sv = CopFILESV(PL_curcop);
1591         const char *cf;
1592         STRLEN tmplen;
1593
1594         if (temp_sv) {
1595             cf = SvPVX(temp_sv);
1596             tmplen = SvCUR(temp_sv);
1597         } else {
1598             cf = NULL;
1599             tmplen = 0;
1600         }
1601
1602         if (!PL_rsfp && !PL_parser->filtered) {
1603             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1604              * to *{"::_<newfilename"} */
1605             /* However, the long form of evals is only turned on by the
1606                debugger - usually they're "(eval %lu)" */
1607             char smallbuf[128];
1608             char *tmpbuf;
1609             GV **gvp;
1610             STRLEN tmplen2 = len;
1611             if (tmplen + 2 <= sizeof smallbuf)
1612                 tmpbuf = smallbuf;
1613             else
1614                 Newx(tmpbuf, tmplen + 2, char);
1615             tmpbuf[0] = '_';
1616             tmpbuf[1] = '<';
1617             memcpy(tmpbuf + 2, cf, tmplen);
1618             tmplen += 2;
1619             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1620             if (gvp) {
1621                 char *tmpbuf2;
1622                 GV *gv2;
1623
1624                 if (tmplen2 + 2 <= sizeof smallbuf)
1625                     tmpbuf2 = smallbuf;
1626                 else
1627                     Newx(tmpbuf2, tmplen2 + 2, char);
1628
1629                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1630                     /* Either they malloc'd it, or we malloc'd it,
1631                        so no prefix is present in ours.  */
1632                     tmpbuf2[0] = '_';
1633                     tmpbuf2[1] = '<';
1634                 }
1635
1636                 memcpy(tmpbuf2 + 2, s, tmplen2);
1637                 tmplen2 += 2;
1638
1639                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1640                 if (!isGV(gv2)) {
1641                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1642                     /* adjust ${"::_<newfilename"} to store the new file name */
1643                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1644                     /* The line number may differ. If that is the case,
1645                        alias the saved lines that are in the array.
1646                        Otherwise alias the whole array. */
1647                     if (CopLINE(PL_curcop) == line_num) {
1648                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1649                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1650                     }
1651                     else if (GvAV(*gvp)) {
1652                         AV * const av = GvAV(*gvp);
1653                         const I32 start = CopLINE(PL_curcop)+1;
1654                         I32 items = AvFILLp(av) - start;
1655                         if (items > 0) {
1656                             AV * const av2 = GvAVn(gv2);
1657                             SV **svp = AvARRAY(av) + start;
1658                             I32 l = (I32)line_num+1;
1659                             while (items--)
1660                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1661                         }
1662                     }
1663                 }
1664
1665                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1666             }
1667             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1668         }
1669         CopFILE_free(PL_curcop);
1670         CopFILE_setn(PL_curcop, s, len);
1671     }
1672     CopLINE_set(PL_curcop, line_num);
1673 }
1674
1675 #ifdef PERL_MAD
1676 /* skip space before PL_thistoken */
1677
1678 STATIC char *
1679 S_skipspace0(pTHX_ register char *s)
1680 {
1681     PERL_ARGS_ASSERT_SKIPSPACE0;
1682
1683     s = skipspace(s);
1684     if (!PL_madskills)
1685         return s;
1686     if (PL_skipwhite) {
1687         if (!PL_thiswhite)
1688             PL_thiswhite = newSVpvs("");
1689         sv_catsv(PL_thiswhite, PL_skipwhite);
1690         sv_free(PL_skipwhite);
1691         PL_skipwhite = 0;
1692     }
1693     PL_realtokenstart = s - SvPVX(PL_linestr);
1694     return s;
1695 }
1696
1697 /* skip space after PL_thistoken */
1698
1699 STATIC char *
1700 S_skipspace1(pTHX_ register char *s)
1701 {
1702     const char *start = s;
1703     I32 startoff = start - SvPVX(PL_linestr);
1704
1705     PERL_ARGS_ASSERT_SKIPSPACE1;
1706
1707     s = skipspace(s);
1708     if (!PL_madskills)
1709         return s;
1710     start = SvPVX(PL_linestr) + startoff;
1711     if (!PL_thistoken && PL_realtokenstart >= 0) {
1712         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1713         PL_thistoken = newSVpvn(tstart, start - tstart);
1714     }
1715     PL_realtokenstart = -1;
1716     if (PL_skipwhite) {
1717         if (!PL_nextwhite)
1718             PL_nextwhite = newSVpvs("");
1719         sv_catsv(PL_nextwhite, PL_skipwhite);
1720         sv_free(PL_skipwhite);
1721         PL_skipwhite = 0;
1722     }
1723     return s;
1724 }
1725
1726 STATIC char *
1727 S_skipspace2(pTHX_ register char *s, SV **svp)
1728 {
1729     char *start;
1730     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1731     const I32 startoff = s - SvPVX(PL_linestr);
1732
1733     PERL_ARGS_ASSERT_SKIPSPACE2;
1734
1735     s = skipspace(s);
1736     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1737     if (!PL_madskills || !svp)
1738         return s;
1739     start = SvPVX(PL_linestr) + startoff;
1740     if (!PL_thistoken && PL_realtokenstart >= 0) {
1741         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1742         PL_thistoken = newSVpvn(tstart, start - tstart);
1743         PL_realtokenstart = -1;
1744     }
1745     if (PL_skipwhite) {
1746         if (!*svp)
1747             *svp = newSVpvs("");
1748         sv_setsv(*svp, PL_skipwhite);
1749         sv_free(PL_skipwhite);
1750         PL_skipwhite = 0;
1751     }
1752
1753     return s;
1754 }
1755 #endif
1756
1757 STATIC void
1758 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1759 {
1760     AV *av = CopFILEAVx(PL_curcop);
1761     if (av) {
1762         SV * const sv = newSV_type(SVt_PVMG);
1763         if (orig_sv)
1764             sv_setsv(sv, orig_sv);
1765         else
1766             sv_setpvn(sv, buf, len);
1767         (void)SvIOK_on(sv);
1768         SvIV_set(sv, 0);
1769         av_store(av, (I32)CopLINE(PL_curcop), sv);
1770     }
1771 }
1772
1773 /*
1774  * S_skipspace
1775  * Called to gobble the appropriate amount and type of whitespace.
1776  * Skips comments as well.
1777  */
1778
1779 STATIC char *
1780 S_skipspace(pTHX_ register char *s)
1781 {
1782 #ifdef PERL_MAD
1783     char *start = s;
1784 #endif /* PERL_MAD */
1785     PERL_ARGS_ASSERT_SKIPSPACE;
1786 #ifdef PERL_MAD
1787     if (PL_skipwhite) {
1788         sv_free(PL_skipwhite);
1789         PL_skipwhite = NULL;
1790     }
1791 #endif /* PERL_MAD */
1792     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1793         while (s < PL_bufend && SPACE_OR_TAB(*s))
1794             s++;
1795     } else {
1796         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1797         PL_bufptr = s;
1798         lex_read_space(LEX_KEEP_PREVIOUS |
1799                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1800                     LEX_NO_NEXT_CHUNK : 0));
1801         s = PL_bufptr;
1802         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1803         if (PL_linestart > PL_bufptr)
1804             PL_bufptr = PL_linestart;
1805         return s;
1806     }
1807 #ifdef PERL_MAD
1808     if (PL_madskills)
1809         PL_skipwhite = newSVpvn(start, s-start);
1810 #endif /* PERL_MAD */
1811     return s;
1812 }
1813
1814 /*
1815  * S_check_uni
1816  * Check the unary operators to ensure there's no ambiguity in how they're
1817  * used.  An ambiguous piece of code would be:
1818  *     rand + 5
1819  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1820  * the +5 is its argument.
1821  */
1822
1823 STATIC void
1824 S_check_uni(pTHX)
1825 {
1826     dVAR;
1827     const char *s;
1828     const char *t;
1829
1830     if (PL_oldoldbufptr != PL_last_uni)
1831         return;
1832     while (isSPACE(*PL_last_uni))
1833         PL_last_uni++;
1834     s = PL_last_uni;
1835     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1836         s++;
1837     if ((t = strchr(s, '(')) && t < PL_bufptr)
1838         return;
1839
1840     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1841                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1842                      (int)(s - PL_last_uni), PL_last_uni);
1843 }
1844
1845 /*
1846  * LOP : macro to build a list operator.  Its behaviour has been replaced
1847  * with a subroutine, S_lop() for which LOP is just another name.
1848  */
1849
1850 #define LOP(f,x) return lop(f,x,s)
1851
1852 /*
1853  * S_lop
1854  * Build a list operator (or something that might be one).  The rules:
1855  *  - if we have a next token, then it's a list operator [why?]
1856  *  - if the next thing is an opening paren, then it's a function
1857  *  - else it's a list operator
1858  */
1859
1860 STATIC I32
1861 S_lop(pTHX_ I32 f, int x, char *s)
1862 {
1863     dVAR;
1864
1865     PERL_ARGS_ASSERT_LOP;
1866
1867     pl_yylval.ival = f;
1868     CLINE;
1869     PL_expect = x;
1870     PL_bufptr = s;
1871     PL_last_lop = PL_oldbufptr;
1872     PL_last_lop_op = (OPCODE)f;
1873 #ifdef PERL_MAD
1874     if (PL_lasttoke)
1875         goto lstop;
1876 #else
1877     if (PL_nexttoke)
1878         goto lstop;
1879 #endif
1880     if (*s == '(')
1881         return REPORT(FUNC);
1882     s = PEEKSPACE(s);
1883     if (*s == '(')
1884         return REPORT(FUNC);
1885     else {
1886         lstop:
1887         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1888             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1889         return REPORT(LSTOP);
1890     }
1891 }
1892
1893 #ifdef PERL_MAD
1894  /*
1895  * S_start_force
1896  * Sets up for an eventual force_next().  start_force(0) basically does
1897  * an unshift, while start_force(-1) does a push.  yylex removes items
1898  * on the "pop" end.
1899  */
1900
1901 STATIC void
1902 S_start_force(pTHX_ int where)
1903 {
1904     int i;
1905
1906     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1907         where = PL_lasttoke;
1908     assert(PL_curforce < 0 || PL_curforce == where);
1909     if (PL_curforce != where) {
1910         for (i = PL_lasttoke; i > where; --i) {
1911             PL_nexttoke[i] = PL_nexttoke[i-1];
1912         }
1913         PL_lasttoke++;
1914     }
1915     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1916         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1917     PL_curforce = where;
1918     if (PL_nextwhite) {
1919         if (PL_madskills)
1920             curmad('^', newSVpvs(""));
1921         CURMAD('_', PL_nextwhite);
1922     }
1923 }
1924
1925 STATIC void
1926 S_curmad(pTHX_ char slot, SV *sv)
1927 {
1928     MADPROP **where;
1929
1930     if (!sv)
1931         return;
1932     if (PL_curforce < 0)
1933         where = &PL_thismad;
1934     else
1935         where = &PL_nexttoke[PL_curforce].next_mad;
1936
1937     if (PL_faketokens)
1938         sv_setpvs(sv, "");
1939     else {
1940         if (!IN_BYTES) {
1941             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1942                 SvUTF8_on(sv);
1943             else if (PL_encoding) {
1944                 sv_recode_to_utf8(sv, PL_encoding);
1945             }
1946         }
1947     }
1948
1949     /* keep a slot open for the head of the list? */
1950     if (slot != '_' && *where && (*where)->mad_key == '^') {
1951         (*where)->mad_key = slot;
1952         sv_free(MUTABLE_SV(((*where)->mad_val)));
1953         (*where)->mad_val = (void*)sv;
1954     }
1955     else
1956         addmad(newMADsv(slot, sv), where, 0);
1957 }
1958 #else
1959 #  define start_force(where)    NOOP
1960 #  define curmad(slot, sv)      NOOP
1961 #endif
1962
1963 /*
1964  * S_force_next
1965  * When the lexer realizes it knows the next token (for instance,
1966  * it is reordering tokens for the parser) then it can call S_force_next
1967  * to know what token to return the next time the lexer is called.  Caller
1968  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1969  * and possibly PL_expect to ensure the lexer handles the token correctly.
1970  */
1971
1972 STATIC void
1973 S_force_next(pTHX_ I32 type)
1974 {
1975     dVAR;
1976 #ifdef DEBUGGING
1977     if (DEBUG_T_TEST) {
1978         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1979         tokereport(type, &NEXTVAL_NEXTTOKE);
1980     }
1981 #endif
1982 #ifdef PERL_MAD
1983     if (PL_curforce < 0)
1984         start_force(PL_lasttoke);
1985     PL_nexttoke[PL_curforce].next_type = type;
1986     if (PL_lex_state != LEX_KNOWNEXT)
1987         PL_lex_defer = PL_lex_state;
1988     PL_lex_state = LEX_KNOWNEXT;
1989     PL_lex_expect = PL_expect;
1990     PL_curforce = -1;
1991 #else
1992     PL_nexttype[PL_nexttoke] = type;
1993     PL_nexttoke++;
1994     if (PL_lex_state != LEX_KNOWNEXT) {
1995         PL_lex_defer = PL_lex_state;
1996         PL_lex_expect = PL_expect;
1997         PL_lex_state = LEX_KNOWNEXT;
1998     }
1999 #endif
2000 }
2001
2002 void
2003 Perl_yyunlex(pTHX)
2004 {
2005     int yyc = PL_parser->yychar;
2006     if (yyc != YYEMPTY) {
2007         if (yyc) {
2008             start_force(-1);
2009             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2010             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2011                 PL_lex_allbrackets--;
2012                 PL_lex_brackets--;
2013                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2014             } else if (yyc == '('/*)*/) {
2015                 PL_lex_allbrackets--;
2016                 yyc |= (2<<24);
2017             }
2018             force_next(yyc);
2019         }
2020         PL_parser->yychar = YYEMPTY;
2021     }
2022 }
2023
2024 STATIC SV *
2025 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2026 {
2027     dVAR;
2028     SV * const sv = newSVpvn_utf8(start, len,
2029                                   !IN_BYTES
2030                                   && UTF
2031                                   && !is_ascii_string((const U8*)start, len)
2032                                   && is_utf8_string((const U8*)start, len));
2033     return sv;
2034 }
2035
2036 /*
2037  * S_force_word
2038  * When the lexer knows the next thing is a word (for instance, it has
2039  * just seen -> and it knows that the next char is a word char, then
2040  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2041  * lookahead.
2042  *
2043  * Arguments:
2044  *   char *start : buffer position (must be within PL_linestr)
2045  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2046  *   int check_keyword : if true, Perl checks to make sure the word isn't
2047  *       a keyword (do this if the word is a label, e.g. goto FOO)
2048  *   int allow_pack : if true, : characters will also be allowed (require,
2049  *       use, etc. do this)
2050  *   int allow_initial_tick : used by the "sub" lexer only.
2051  */
2052
2053 STATIC char *
2054 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2055 {
2056     dVAR;
2057     register char *s;
2058     STRLEN len;
2059
2060     PERL_ARGS_ASSERT_FORCE_WORD;
2061
2062     start = SKIPSPACE1(start);
2063     s = start;
2064     if (isIDFIRST_lazy_if(s,UTF) ||
2065         (allow_pack && *s == ':') ||
2066         (allow_initial_tick && *s == '\'') )
2067     {
2068         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2069         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2070             return start;
2071         start_force(PL_curforce);
2072         if (PL_madskills)
2073             curmad('X', newSVpvn(start,s-start));
2074         if (token == METHOD) {
2075             s = SKIPSPACE1(s);
2076             if (*s == '(')
2077                 PL_expect = XTERM;
2078             else {
2079                 PL_expect = XOPERATOR;
2080             }
2081         }
2082         if (PL_madskills)
2083             curmad('g', newSVpvs( "forced" ));
2084         NEXTVAL_NEXTTOKE.opval
2085             = (OP*)newSVOP(OP_CONST,0,
2086                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2087         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2088         force_next(token);
2089     }
2090     return s;
2091 }
2092
2093 /*
2094  * S_force_ident
2095  * Called when the lexer wants $foo *foo &foo etc, but the program
2096  * text only contains the "foo" portion.  The first argument is a pointer
2097  * to the "foo", and the second argument is the type symbol to prefix.
2098  * Forces the next token to be a "WORD".
2099  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2100  */
2101
2102 STATIC void
2103 S_force_ident(pTHX_ register const char *s, int kind)
2104 {
2105     dVAR;
2106
2107     PERL_ARGS_ASSERT_FORCE_IDENT;
2108
2109     if (*s) {
2110         const STRLEN len = strlen(s);
2111         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2112                                                                 UTF ? SVf_UTF8 : 0));
2113         start_force(PL_curforce);
2114         NEXTVAL_NEXTTOKE.opval = o;
2115         force_next(WORD);
2116         if (kind) {
2117             o->op_private = OPpCONST_ENTERED;
2118             /* XXX see note in pp_entereval() for why we forgo typo
2119                warnings if the symbol must be introduced in an eval.
2120                GSAR 96-10-12 */
2121             gv_fetchpvn_flags(s, len,
2122                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2123                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2124                               kind == '$' ? SVt_PV :
2125                               kind == '@' ? SVt_PVAV :
2126                               kind == '%' ? SVt_PVHV :
2127                               SVt_PVGV
2128                               );
2129         }
2130     }
2131 }
2132
2133 NV
2134 Perl_str_to_version(pTHX_ SV *sv)
2135 {
2136     NV retval = 0.0;
2137     NV nshift = 1.0;
2138     STRLEN len;
2139     const char *start = SvPV_const(sv,len);
2140     const char * const end = start + len;
2141     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2142
2143     PERL_ARGS_ASSERT_STR_TO_VERSION;
2144
2145     while (start < end) {
2146         STRLEN skip;
2147         UV n;
2148         if (utf)
2149             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2150         else {
2151             n = *(U8*)start;
2152             skip = 1;
2153         }
2154         retval += ((NV)n)/nshift;
2155         start += skip;
2156         nshift *= 1000;
2157     }
2158     return retval;
2159 }
2160
2161 /*
2162  * S_force_version
2163  * Forces the next token to be a version number.
2164  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2165  * and if "guessing" is TRUE, then no new token is created (and the caller
2166  * must use an alternative parsing method).
2167  */
2168
2169 STATIC char *
2170 S_force_version(pTHX_ char *s, int guessing)
2171 {
2172     dVAR;
2173     OP *version = NULL;
2174     char *d;
2175 #ifdef PERL_MAD
2176     I32 startoff = s - SvPVX(PL_linestr);
2177 #endif
2178
2179     PERL_ARGS_ASSERT_FORCE_VERSION;
2180
2181     s = SKIPSPACE1(s);
2182
2183     d = s;
2184     if (*d == 'v')
2185         d++;
2186     if (isDIGIT(*d)) {
2187         while (isDIGIT(*d) || *d == '_' || *d == '.')
2188             d++;
2189 #ifdef PERL_MAD
2190         if (PL_madskills) {
2191             start_force(PL_curforce);
2192             curmad('X', newSVpvn(s,d-s));
2193         }
2194 #endif
2195         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2196             SV *ver;
2197 #ifdef USE_LOCALE_NUMERIC
2198             char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2199             setlocale(LC_NUMERIC, "C");
2200 #endif
2201             s = scan_num(s, &pl_yylval);
2202 #ifdef USE_LOCALE_NUMERIC
2203             setlocale(LC_NUMERIC, loc);
2204             Safefree(loc);
2205 #endif
2206             version = pl_yylval.opval;
2207             ver = cSVOPx(version)->op_sv;
2208             if (SvPOK(ver) && !SvNIOK(ver)) {
2209                 SvUPGRADE(ver, SVt_PVNV);
2210                 SvNV_set(ver, str_to_version(ver));
2211                 SvNOK_on(ver);          /* hint that it is a version */
2212             }
2213         }
2214         else if (guessing) {
2215 #ifdef PERL_MAD
2216             if (PL_madskills) {
2217                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2218                 PL_nextwhite = 0;
2219                 s = SvPVX(PL_linestr) + startoff;
2220             }
2221 #endif
2222             return s;
2223         }
2224     }
2225
2226 #ifdef PERL_MAD
2227     if (PL_madskills && !version) {
2228         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2229         PL_nextwhite = 0;
2230         s = SvPVX(PL_linestr) + startoff;
2231     }
2232 #endif
2233     /* NOTE: The parser sees the package name and the VERSION swapped */
2234     start_force(PL_curforce);
2235     NEXTVAL_NEXTTOKE.opval = version;
2236     force_next(WORD);
2237
2238     return s;
2239 }
2240
2241 /*
2242  * S_force_strict_version
2243  * Forces the next token to be a version number using strict syntax rules.
2244  */
2245
2246 STATIC char *
2247 S_force_strict_version(pTHX_ char *s)
2248 {
2249     dVAR;
2250     OP *version = NULL;
2251 #ifdef PERL_MAD
2252     I32 startoff = s - SvPVX(PL_linestr);
2253 #endif
2254     const char *errstr = NULL;
2255
2256     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2257
2258     while (isSPACE(*s)) /* leading whitespace */
2259         s++;
2260
2261     if (is_STRICT_VERSION(s,&errstr)) {
2262         SV *ver = newSV(0);
2263         s = (char *)scan_version(s, ver, 0);
2264         version = newSVOP(OP_CONST, 0, ver);
2265     }
2266     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2267             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2268     {
2269         PL_bufptr = s;
2270         if (errstr)
2271             yyerror(errstr); /* version required */
2272         return s;
2273     }
2274
2275 #ifdef PERL_MAD
2276     if (PL_madskills && !version) {
2277         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2278         PL_nextwhite = 0;
2279         s = SvPVX(PL_linestr) + startoff;
2280     }
2281 #endif
2282     /* NOTE: The parser sees the package name and the VERSION swapped */
2283     start_force(PL_curforce);
2284     NEXTVAL_NEXTTOKE.opval = version;
2285     force_next(WORD);
2286
2287     return s;
2288 }
2289
2290 /*
2291  * S_tokeq
2292  * Tokenize a quoted string passed in as an SV.  It finds the next
2293  * chunk, up to end of string or a backslash.  It may make a new
2294  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2295  * turns \\ into \.
2296  */
2297
2298 STATIC SV *
2299 S_tokeq(pTHX_ SV *sv)
2300 {
2301     dVAR;
2302     register char *s;
2303     register char *send;
2304     register char *d;
2305     STRLEN len = 0;
2306     SV *pv = sv;
2307
2308     PERL_ARGS_ASSERT_TOKEQ;
2309
2310     if (!SvLEN(sv))
2311         goto finish;
2312
2313     s = SvPV_force(sv, len);
2314     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2315         goto finish;
2316     send = s + len;
2317     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2318     while (s < send && !(*s == '\\' && s[1] == '\\'))
2319         s++;
2320     if (s == send)
2321         goto finish;
2322     d = s;
2323     if ( PL_hints & HINT_NEW_STRING ) {
2324         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2325     }
2326     while (s < send) {
2327         if (*s == '\\') {
2328             if (s + 1 < send && (s[1] == '\\'))
2329                 s++;            /* all that, just for this */
2330         }
2331         *d++ = *s++;
2332     }
2333     *d = '\0';
2334     SvCUR_set(sv, d - SvPVX_const(sv));
2335   finish:
2336     if ( PL_hints & HINT_NEW_STRING )
2337        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2338     return sv;
2339 }
2340
2341 /*
2342  * Now come three functions related to double-quote context,
2343  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2344  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2345  * interact with PL_lex_state, and create fake ( ... ) argument lists
2346  * to handle functions and concatenation.
2347  * For example,
2348  *   "foo\lbar"
2349  * is tokenised as
2350  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2351  */
2352
2353 /*
2354  * S_sublex_start
2355  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2356  *
2357  * Pattern matching will set PL_lex_op to the pattern-matching op to
2358  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2359  *
2360  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2361  *
2362  * Everything else becomes a FUNC.
2363  *
2364  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2365  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2366  * call to S_sublex_push().
2367  */
2368
2369 STATIC I32
2370 S_sublex_start(pTHX)
2371 {
2372     dVAR;
2373     register const I32 op_type = pl_yylval.ival;
2374
2375     if (op_type == OP_NULL) {
2376         pl_yylval.opval = PL_lex_op;
2377         PL_lex_op = NULL;
2378         return THING;
2379     }
2380     if (op_type == OP_CONST || op_type == OP_READLINE) {
2381         SV *sv = tokeq(PL_lex_stuff);
2382
2383         if (SvTYPE(sv) == SVt_PVIV) {
2384             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2385             STRLEN len;
2386             const char * const p = SvPV_const(sv, len);
2387             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2388             SvREFCNT_dec(sv);
2389             sv = nsv;
2390         }
2391         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2392         PL_lex_stuff = NULL;
2393         /* Allow <FH> // "foo" */
2394         if (op_type == OP_READLINE)
2395             PL_expect = XTERMORDORDOR;
2396         return THING;
2397     }
2398     else if (op_type == OP_BACKTICK && PL_lex_op) {
2399         /* readpipe() vas overriden */
2400         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2401         pl_yylval.opval = PL_lex_op;
2402         PL_lex_op = NULL;
2403         PL_lex_stuff = NULL;
2404         return THING;
2405     }
2406
2407     PL_sublex_info.super_state = PL_lex_state;
2408     PL_sublex_info.sub_inwhat = (U16)op_type;
2409     PL_sublex_info.sub_op = PL_lex_op;
2410     PL_lex_state = LEX_INTERPPUSH;
2411
2412     PL_expect = XTERM;
2413     if (PL_lex_op) {
2414         pl_yylval.opval = PL_lex_op;
2415         PL_lex_op = NULL;
2416         return PMFUNC;
2417     }
2418     else
2419         return FUNC;
2420 }
2421
2422 /*
2423  * S_sublex_push
2424  * Create a new scope to save the lexing state.  The scope will be
2425  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2426  * to the uc, lc, etc. found before.
2427  * Sets PL_lex_state to LEX_INTERPCONCAT.
2428  */
2429
2430 STATIC I32
2431 S_sublex_push(pTHX)
2432 {
2433     dVAR;
2434     ENTER;
2435
2436     PL_lex_state = PL_sublex_info.super_state;
2437     SAVEBOOL(PL_lex_dojoin);
2438     SAVEI32(PL_lex_brackets);
2439     SAVEI32(PL_lex_allbrackets);
2440     SAVEI8(PL_lex_fakeeof);
2441     SAVEI32(PL_lex_casemods);
2442     SAVEI32(PL_lex_starts);
2443     SAVEI8(PL_lex_state);
2444     SAVEPPTR(PL_sublex_info.re_eval_start);
2445     SAVEVPTR(PL_lex_inpat);
2446     SAVEI16(PL_lex_inwhat);
2447     SAVECOPLINE(PL_curcop);
2448     SAVEPPTR(PL_bufptr);
2449     SAVEPPTR(PL_bufend);
2450     SAVEPPTR(PL_oldbufptr);
2451     SAVEPPTR(PL_oldoldbufptr);
2452     SAVEPPTR(PL_last_lop);
2453     SAVEPPTR(PL_last_uni);
2454     SAVEPPTR(PL_linestart);
2455     SAVESPTR(PL_linestr);
2456     SAVEGENERICPV(PL_lex_brackstack);
2457     SAVEGENERICPV(PL_lex_casestack);
2458
2459     PL_linestr = PL_lex_stuff;
2460     PL_lex_stuff = NULL;
2461     PL_sublex_info.re_eval_start = NULL;
2462
2463     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2464         = SvPVX(PL_linestr);
2465     PL_bufend += SvCUR(PL_linestr);
2466     PL_last_lop = PL_last_uni = NULL;
2467     SAVEFREESV(PL_linestr);
2468
2469     PL_lex_dojoin = FALSE;
2470     PL_lex_brackets = 0;
2471     PL_lex_allbrackets = 0;
2472     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2473     Newx(PL_lex_brackstack, 120, char);
2474     Newx(PL_lex_casestack, 12, char);
2475     PL_lex_casemods = 0;
2476     *PL_lex_casestack = '\0';
2477     PL_lex_starts = 0;
2478     PL_lex_state = LEX_INTERPCONCAT;
2479     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2480
2481     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2482     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2483     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2484         PL_lex_inpat = PL_sublex_info.sub_op;
2485     else
2486         PL_lex_inpat = NULL;
2487
2488     return '(';
2489 }
2490
2491 /*
2492  * S_sublex_done
2493  * Restores lexer state after a S_sublex_push.
2494  */
2495
2496 STATIC I32
2497 S_sublex_done(pTHX)
2498 {
2499     dVAR;
2500     if (!PL_lex_starts++) {
2501         SV * const sv = newSVpvs("");
2502         if (SvUTF8(PL_linestr))
2503             SvUTF8_on(sv);
2504         PL_expect = XOPERATOR;
2505         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2506         return THING;
2507     }
2508
2509     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2510         PL_lex_state = LEX_INTERPCASEMOD;
2511         return yylex();
2512     }
2513
2514     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2515     assert(PL_lex_inwhat != OP_TRANSR);
2516     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2517         PL_linestr = PL_lex_repl;
2518         PL_lex_inpat = 0;
2519         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2520         PL_bufend += SvCUR(PL_linestr);
2521         PL_last_lop = PL_last_uni = NULL;
2522         SAVEFREESV(PL_linestr);
2523         PL_lex_dojoin = FALSE;
2524         PL_lex_brackets = 0;
2525         PL_lex_allbrackets = 0;
2526         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2527         PL_lex_casemods = 0;
2528         *PL_lex_casestack = '\0';
2529         PL_lex_starts = 0;
2530         if (SvEVALED(PL_lex_repl)) {
2531             PL_lex_state = LEX_INTERPNORMAL;
2532             PL_lex_starts++;
2533             /*  we don't clear PL_lex_repl here, so that we can check later
2534                 whether this is an evalled subst; that means we rely on the
2535                 logic to ensure sublex_done() is called again only via the
2536                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2537         }
2538         else {
2539             PL_lex_state = LEX_INTERPCONCAT;
2540             PL_lex_repl = NULL;
2541         }
2542         return ',';
2543     }
2544     else {
2545 #ifdef PERL_MAD
2546         if (PL_madskills) {
2547             if (PL_thiswhite) {
2548                 if (!PL_endwhite)
2549                     PL_endwhite = newSVpvs("");
2550                 sv_catsv(PL_endwhite, PL_thiswhite);
2551                 PL_thiswhite = 0;
2552             }
2553             if (PL_thistoken)
2554                 sv_setpvs(PL_thistoken,"");
2555             else
2556                 PL_realtokenstart = -1;
2557         }
2558 #endif
2559         LEAVE;
2560         PL_bufend = SvPVX(PL_linestr);
2561         PL_bufend += SvCUR(PL_linestr);
2562         PL_expect = XOPERATOR;
2563         PL_sublex_info.sub_inwhat = 0;
2564         return ')';
2565     }
2566 }
2567
2568 /*
2569   scan_const
2570
2571   Extracts the next constant part of a pattern, double-quoted string,
2572   or transliteration.  This is terrifying code.
2573
2574   For example, in parsing the double-quoted string "ab\x63$d", it would
2575   stop at the '$' and return an OP_CONST containing 'abc'.
2576
2577   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2578   processing a pattern (PL_lex_inpat is true), a transliteration
2579   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2580
2581   Returns a pointer to the character scanned up to. If this is
2582   advanced from the start pointer supplied (i.e. if anything was
2583   successfully parsed), will leave an OP_CONST for the substring scanned
2584   in pl_yylval. Caller must intuit reason for not parsing further
2585   by looking at the next characters herself.
2586
2587   In patterns:
2588     expand:
2589       \N{ABC}  => \N{U+41.42.43}
2590
2591     pass through:
2592         all other \-char, including \N and \N{ apart from \N{ABC}
2593
2594     stops on:
2595         @ and $ where it appears to be a var, but not for $ as tail anchor
2596         \l \L \u \U \Q \E
2597         (?{  or  (??{
2598
2599
2600   In transliterations:
2601     characters are VERY literal, except for - not at the start or end
2602     of the string, which indicates a range. If the range is in bytes,
2603     scan_const expands the range to the full set of intermediate
2604     characters. If the range is in utf8, the hyphen is replaced with
2605     a certain range mark which will be handled by pmtrans() in op.c.
2606
2607   In double-quoted strings:
2608     backslashes:
2609       double-quoted style: \r and \n
2610       constants: \x31, etc.
2611       deprecated backrefs: \1 (in substitution replacements)
2612       case and quoting: \U \Q \E
2613     stops on @ and $
2614
2615   scan_const does *not* construct ops to handle interpolated strings.
2616   It stops processing as soon as it finds an embedded $ or @ variable
2617   and leaves it to the caller to work out what's going on.
2618
2619   embedded arrays (whether in pattern or not) could be:
2620       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2621
2622   $ in double-quoted strings must be the symbol of an embedded scalar.
2623
2624   $ in pattern could be $foo or could be tail anchor.  Assumption:
2625   it's a tail anchor if $ is the last thing in the string, or if it's
2626   followed by one of "()| \r\n\t"
2627
2628   \1 (backreferences) are turned into $1 in substitutions
2629
2630   The structure of the code is
2631       while (there's a character to process) {
2632           handle transliteration ranges
2633           skip regexp comments /(?#comment)/ and codes /(?{code})/
2634           skip #-initiated comments in //x patterns
2635           check for embedded arrays
2636           check for embedded scalars
2637           if (backslash) {
2638               deprecate \1 in substitution replacements
2639               handle string-changing backslashes \l \U \Q \E, etc.
2640               switch (what was escaped) {
2641                   handle \- in a transliteration (becomes a literal -)
2642                   if a pattern and not \N{, go treat as regular character
2643                   handle \132 (octal characters)
2644                   handle \x15 and \x{1234} (hex characters)
2645                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2646                   handle \cV (control characters)
2647                   handle printf-style backslashes (\f, \r, \n, etc)
2648               } (end switch)
2649               continue
2650           } (end if backslash)
2651           handle regular character
2652     } (end while character to read)
2653
2654 */
2655
2656 STATIC char *
2657 S_scan_const(pTHX_ char *start)
2658 {
2659     dVAR;
2660     register char *send = PL_bufend;            /* end of the constant */
2661     SV *sv = newSV(send - start);               /* sv for the constant.  See
2662                                                    note below on sizing. */
2663     register char *s = start;                   /* start of the constant */
2664     register char *d = SvPVX(sv);               /* destination for copies */
2665     bool dorange = FALSE;                       /* are we in a translit range? */
2666     bool didrange = FALSE;                      /* did we just finish a range? */
2667     bool in_charclass = FALSE;                  /* within /[...]/ */
2668     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2669     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2670                                                    to be UTF8?  But, this can
2671                                                    show as true when the source
2672                                                    isn't utf8, as for example
2673                                                    when it is entirely composed
2674                                                    of hex constants */
2675
2676     /* Note on sizing:  The scanned constant is placed into sv, which is
2677      * initialized by newSV() assuming one byte of output for every byte of
2678      * input.  This routine expects newSV() to allocate an extra byte for a
2679      * trailing NUL, which this routine will append if it gets to the end of
2680      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2681      * CAPITAL LETTER A}), or more output than input if the constant ends up
2682      * recoded to utf8, but each time a construct is found that might increase
2683      * the needed size, SvGROW() is called.  Its size parameter each time is
2684      * based on the best guess estimate at the time, namely the length used so
2685      * far, plus the length the current construct will occupy, plus room for
2686      * the trailing NUL, plus one byte for every input byte still unscanned */
2687
2688     UV uv;
2689 #ifdef EBCDIC
2690     UV literal_endpoint = 0;
2691     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2692 #endif
2693
2694     PERL_ARGS_ASSERT_SCAN_CONST;
2695
2696     assert(PL_lex_inwhat != OP_TRANSR);
2697     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2698         /* If we are doing a trans and we know we want UTF8 set expectation */
2699         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2700         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2701     }
2702
2703
2704     while (s < send || dorange) {
2705
2706         /* get transliterations out of the way (they're most literal) */
2707         if (PL_lex_inwhat == OP_TRANS) {
2708             /* expand a range A-Z to the full set of characters.  AIE! */
2709             if (dorange) {
2710                 I32 i;                          /* current expanded character */
2711                 I32 min;                        /* first character in range */
2712                 I32 max;                        /* last character in range */
2713
2714 #ifdef EBCDIC
2715                 UV uvmax = 0;
2716 #endif
2717
2718                 if (has_utf8
2719 #ifdef EBCDIC
2720                     && !native_range
2721 #endif
2722                     ) {
2723                     char * const c = (char*)utf8_hop((U8*)d, -1);
2724                     char *e = d++;
2725                     while (e-- > c)
2726                         *(e + 1) = *e;
2727                     *c = (char)UTF_TO_NATIVE(0xff);
2728                     /* mark the range as done, and continue */
2729                     dorange = FALSE;
2730                     didrange = TRUE;
2731                     continue;
2732                 }
2733
2734                 i = d - SvPVX_const(sv);                /* remember current offset */
2735 #ifdef EBCDIC
2736                 SvGROW(sv,
2737                        SvLEN(sv) + (has_utf8 ?
2738                                     (512 - UTF_CONTINUATION_MARK +
2739                                      UNISKIP(0x100))
2740                                     : 256));
2741                 /* How many two-byte within 0..255: 128 in UTF-8,
2742                  * 96 in UTF-8-mod. */
2743 #else
2744                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2745 #endif
2746                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2747 #ifdef EBCDIC
2748                 if (has_utf8) {
2749                     int j;
2750                     for (j = 0; j <= 1; j++) {
2751                         char * const c = (char*)utf8_hop((U8*)d, -1);
2752                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2753                         if (j)
2754                             min = (U8)uv;
2755                         else if (uv < 256)
2756                             max = (U8)uv;
2757                         else {
2758                             max = (U8)0xff; /* only to \xff */
2759                             uvmax = uv; /* \x{100} to uvmax */
2760                         }
2761                         d = c; /* eat endpoint chars */
2762                      }
2763                 }
2764                else {
2765 #endif
2766                    d -= 2;              /* eat the first char and the - */
2767                    min = (U8)*d;        /* first char in range */
2768                    max = (U8)d[1];      /* last char in range  */
2769 #ifdef EBCDIC
2770                }
2771 #endif
2772
2773                 if (min > max) {
2774                     Perl_croak(aTHX_
2775                                "Invalid range \"%c-%c\" in transliteration operator",
2776                                (char)min, (char)max);
2777                 }
2778
2779 #ifdef EBCDIC
2780                 if (literal_endpoint == 2 &&
2781                     ((isLOWER(min) && isLOWER(max)) ||
2782                      (isUPPER(min) && isUPPER(max)))) {
2783                     if (isLOWER(min)) {
2784                         for (i = min; i <= max; i++)
2785                             if (isLOWER(i))
2786                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2787                     } else {
2788                         for (i = min; i <= max; i++)
2789                             if (isUPPER(i))
2790                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2791                     }
2792                 }
2793                 else
2794 #endif
2795                     for (i = min; i <= max; i++)
2796 #ifdef EBCDIC
2797                         if (has_utf8) {
2798                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2799                             if (UNI_IS_INVARIANT(ch))
2800                                 *d++ = (U8)i;
2801                             else {
2802                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2803                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2804                             }
2805                         }
2806                         else
2807 #endif
2808                             *d++ = (char)i;
2809
2810 #ifdef EBCDIC
2811                 if (uvmax) {
2812                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2813                     if (uvmax > 0x101)
2814                         *d++ = (char)UTF_TO_NATIVE(0xff);
2815                     if (uvmax > 0x100)
2816                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2817                 }
2818 #endif
2819
2820                 /* mark the range as done, and continue */
2821                 dorange = FALSE;
2822                 didrange = TRUE;
2823 #ifdef EBCDIC
2824                 literal_endpoint = 0;
2825 #endif
2826                 continue;
2827             }
2828
2829             /* range begins (ignore - as first or last char) */
2830             else if (*s == '-' && s+1 < send  && s != start) {
2831                 if (didrange) {
2832                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2833                 }
2834                 if (has_utf8
2835 #ifdef EBCDIC
2836                     && !native_range
2837 #endif
2838                     ) {
2839                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2840                     s++;
2841                     continue;
2842                 }
2843                 dorange = TRUE;
2844                 s++;
2845             }
2846             else {
2847                 didrange = FALSE;
2848 #ifdef EBCDIC
2849                 literal_endpoint = 0;
2850                 native_range = TRUE;
2851 #endif
2852             }
2853         }
2854
2855         /* if we get here, we're not doing a transliteration */
2856
2857         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2858             char *s1 = s-1;
2859             int esc = 0;
2860             while (s1 >= start && *s1-- == '\\')
2861                 esc = !esc;
2862             if (!esc)
2863                 in_charclass = TRUE;
2864         }
2865
2866         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
2867             char *s1 = s-1;
2868             int esc = 0;
2869             while (s1 >= start && *s1-- == '\\')
2870                 esc = !esc;
2871             if (!esc)
2872                 in_charclass = FALSE;
2873         }
2874
2875         /* skip for regexp comments /(?#comment)/, except for the last
2876          * char, which will be done separately.
2877          * Stop on (?{..}) and friends */
2878
2879         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2880             if (s[2] == '#') {
2881                 while (s+1 < send && *s != ')')
2882                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2883             }
2884             else if (!PL_lex_casemods && !in_charclass &&
2885                      (    s[2] == '{' /* This should match regcomp.c */
2886                       || (s[2] == '?' && s[3] == '{')))
2887             {
2888                 break;
2889             }
2890         }
2891
2892         /* likewise skip #-initiated comments in //x patterns */
2893         else if (*s == '#' && PL_lex_inpat &&
2894           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2895             while (s+1 < send && *s != '\n')
2896                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2897         }
2898
2899         /* no further processing of single-quoted regex */
2900         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
2901             goto default_action;
2902
2903         /* check for embedded arrays
2904            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2905            */
2906         else if (*s == '@' && s[1]) {
2907             if (isALNUM_lazy_if(s+1,UTF))
2908                 break;
2909             if (strchr(":'{$", s[1]))
2910                 break;
2911             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2912                 break; /* in regexp, neither @+ nor @- are interpolated */
2913         }
2914
2915         /* check for embedded scalars.  only stop if we're sure it's a
2916            variable.
2917         */
2918         else if (*s == '$') {
2919             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2920                 break;
2921             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2922                 if (s[1] == '\\') {
2923                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2924                                    "Possible unintended interpolation of $\\ in regex");
2925                 }
2926                 break;          /* in regexp, $ might be tail anchor */
2927             }
2928         }
2929
2930         /* End of else if chain - OP_TRANS rejoin rest */
2931
2932         /* backslashes */
2933         if (*s == '\\' && s+1 < send) {
2934             char* e;    /* Can be used for ending '}', etc. */
2935
2936             s++;
2937
2938             /* warn on \1 - \9 in substitution replacements, but note that \11
2939              * is an octal; and \19 is \1 followed by '9' */
2940             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2941                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2942             {
2943                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2944                 *--s = '$';
2945                 break;
2946             }
2947
2948             /* string-change backslash escapes */
2949             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
2950                 --s;
2951                 break;
2952             }
2953             /* In a pattern, process \N, but skip any other backslash escapes.
2954              * This is because we don't want to translate an escape sequence
2955              * into a meta symbol and have the regex compiler use the meta
2956              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2957              * in spite of this, we do have to process \N here while the proper
2958              * charnames handler is in scope.  See bugs #56444 and #62056.
2959              * There is a complication because \N in a pattern may also stand
2960              * for 'match a non-nl', and not mean a charname, in which case its
2961              * processing should be deferred to the regex compiler.  To be a
2962              * charname it must be followed immediately by a '{', and not look
2963              * like \N followed by a curly quantifier, i.e., not something like
2964              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2965              * quantifier */
2966             else if (PL_lex_inpat
2967                     && (*s != 'N'
2968                         || s[1] != '{'
2969                         || regcurly(s + 1)))
2970             {
2971                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2972                 goto default_action;
2973             }
2974
2975             switch (*s) {
2976
2977             /* quoted - in transliterations */
2978             case '-':
2979                 if (PL_lex_inwhat == OP_TRANS) {
2980                     *d++ = *s++;
2981                     continue;
2982                 }
2983                 /* FALL THROUGH */
2984             default:
2985                 {
2986                     if ((isALNUMC(*s)))
2987                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2988                                        "Unrecognized escape \\%c passed through",
2989                                        *s);
2990                     /* default action is to copy the quoted character */
2991                     goto default_action;
2992                 }
2993
2994             /* eg. \132 indicates the octal constant 0132 */
2995             case '0': case '1': case '2': case '3':
2996             case '4': case '5': case '6': case '7':
2997                 {
2998                     I32 flags = 0;
2999                     STRLEN len = 3;
3000                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3001                     s += len;
3002                 }
3003                 goto NUM_ESCAPE_INSERT;
3004
3005             /* eg. \o{24} indicates the octal constant \024 */
3006             case 'o':
3007                 {
3008                     STRLEN len;
3009                     const char* error;
3010
3011                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
3012                     s += len;
3013                     if (! valid) {
3014                         yyerror(error);
3015                         continue;
3016                     }
3017                     goto NUM_ESCAPE_INSERT;
3018                 }
3019
3020             /* eg. \x24 indicates the hex constant 0x24 */
3021             case 'x':
3022                 {
3023                     STRLEN len;
3024                     const char* error;
3025
3026                     bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
3027                     s += len;
3028                     if (! valid) {
3029                         yyerror(error);
3030                         continue;
3031                     }
3032                 }
3033
3034               NUM_ESCAPE_INSERT:
3035                 /* Insert oct or hex escaped character.  There will always be
3036                  * enough room in sv since such escapes will be longer than any
3037                  * UTF-8 sequence they can end up as, except if they force us
3038                  * to recode the rest of the string into utf8 */
3039
3040                 /* Here uv is the ordinal of the next character being added in
3041                  * unicode (converted from native). */
3042                 if (!UNI_IS_INVARIANT(uv)) {
3043                     if (!has_utf8 && uv > 255) {
3044                         /* Might need to recode whatever we have accumulated so
3045                          * far if it contains any chars variant in utf8 or
3046                          * utf-ebcdic. */
3047
3048                         SvCUR_set(sv, d - SvPVX_const(sv));
3049                         SvPOK_on(sv);
3050                         *d = '\0';
3051                         /* See Note on sizing above.  */
3052                         sv_utf8_upgrade_flags_grow(sv,
3053                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3054                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3055                         d = SvPVX(sv) + SvCUR(sv);
3056                         has_utf8 = TRUE;
3057                     }
3058
3059                     if (has_utf8) {
3060                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3061                         if (PL_lex_inwhat == OP_TRANS &&
3062                             PL_sublex_info.sub_op) {
3063                             PL_sublex_info.sub_op->op_private |=
3064                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3065                                              : OPpTRANS_TO_UTF);
3066                         }
3067 #ifdef EBCDIC
3068                         if (uv > 255 && !dorange)
3069                             native_range = FALSE;
3070 #endif
3071                     }
3072                     else {
3073                         *d++ = (char)uv;
3074                     }
3075                 }
3076                 else {
3077                     *d++ = (char) uv;
3078                 }
3079                 continue;
3080
3081             case 'N':
3082                 /* In a non-pattern \N must be a named character, like \N{LATIN
3083                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3084                  * mean to match a non-newline.  For non-patterns, named
3085                  * characters are converted to their string equivalents. In
3086                  * patterns, named characters are not converted to their
3087                  * ultimate forms for the same reasons that other escapes
3088                  * aren't.  Instead, they are converted to the \N{U+...} form
3089                  * to get the value from the charnames that is in effect right
3090                  * now, while preserving the fact that it was a named character
3091                  * so that the regex compiler knows this */
3092
3093                 /* This section of code doesn't generally use the
3094                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3095                  * a close examination of this macro and determined it is a
3096                  * no-op except on utfebcdic variant characters.  Every
3097                  * character generated by this that would normally need to be
3098                  * enclosed by this macro is invariant, so the macro is not
3099                  * needed, and would complicate use of copy().  XXX There are
3100                  * other parts of this file where the macro is used
3101                  * inconsistently, but are saved by it being a no-op */
3102
3103                 /* The structure of this section of code (besides checking for
3104                  * errors and upgrading to utf8) is:
3105                  *  Further disambiguate between the two meanings of \N, and if
3106                  *      not a charname, go process it elsewhere
3107                  *  If of form \N{U+...}, pass it through if a pattern;
3108                  *      otherwise convert to utf8
3109                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3110                  *  pattern; otherwise convert to utf8 */
3111
3112                 /* Here, s points to the 'N'; the test below is guaranteed to
3113                  * succeed if we are being called on a pattern as we already
3114                  * know from a test above that the next character is a '{'.
3115                  * On a non-pattern \N must mean 'named sequence, which
3116                  * requires braces */
3117                 s++;
3118                 if (*s != '{') {
3119                     yyerror("Missing braces on \\N{}");
3120                     continue;
3121                 }
3122                 s++;
3123
3124                 /* If there is no matching '}', it is an error. */
3125                 if (! (e = strchr(s, '}'))) {
3126                     if (! PL_lex_inpat) {
3127                         yyerror("Missing right brace on \\N{}");
3128                     } else {
3129                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3130                     }
3131                     continue;
3132                 }
3133
3134                 /* Here it looks like a named character */
3135
3136                 if (PL_lex_inpat) {
3137
3138                     /* XXX This block is temporary code.  \N{} implies that the
3139                      * pattern is to have Unicode semantics, and therefore
3140                      * currently has to be encoded in utf8.  By putting it in
3141                      * utf8 now, we save a whole pass in the regular expression
3142                      * compiler.  Once that code is changed so Unicode
3143                      * semantics doesn't necessarily have to be in utf8, this
3144                      * block should be removed.  However, the code that parses
3145                      * the output of this would have to be changed to not
3146                      * necessarily expect utf8 */
3147                     if (!has_utf8) {
3148                         SvCUR_set(sv, d - SvPVX_const(sv));
3149                         SvPOK_on(sv);
3150                         *d = '\0';
3151                         /* See Note on sizing above.  */
3152                         sv_utf8_upgrade_flags_grow(sv,
3153                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3154                                         /* 5 = '\N{' + cur char + NUL */
3155                                         (STRLEN)(send - s) + 5);
3156                         d = SvPVX(sv) + SvCUR(sv);
3157                         has_utf8 = TRUE;
3158                     }
3159                 }
3160
3161                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3162                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3163                                 | PERL_SCAN_DISALLOW_PREFIX;
3164                     STRLEN len;
3165
3166                     /* For \N{U+...}, the '...' is a unicode value even on
3167                      * EBCDIC machines */
3168                     s += 2;         /* Skip to next char after the 'U+' */
3169                     len = e - s;
3170                     uv = grok_hex(s, &len, &flags, NULL);
3171                     if (len == 0 || len != (STRLEN)(e - s)) {
3172                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3173                         s = e + 1;
3174                         continue;
3175                     }
3176
3177                     if (PL_lex_inpat) {
3178
3179                         /* On non-EBCDIC platforms, pass through to the regex
3180                          * compiler unchanged.  The reason we evaluated the
3181                          * number above is to make sure there wasn't a syntax
3182                          * error.  But on EBCDIC we convert to native so
3183                          * downstream code can continue to assume it's native
3184                          */
3185                         s -= 5;     /* Include the '\N{U+' */
3186 #ifdef EBCDIC
3187                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3188                                                                and the \0 */
3189                                     "\\N{U+%X}",
3190                                     (unsigned int) UNI_TO_NATIVE(uv));
3191 #else
3192                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3193                         d += e - s + 1;
3194 #endif
3195                     }
3196                     else {  /* Not a pattern: convert the hex to string */
3197
3198                          /* If destination is not in utf8, unconditionally
3199                           * recode it to be so.  This is because \N{} implies
3200                           * Unicode semantics, and scalars have to be in utf8
3201                           * to guarantee those semantics */
3202                         if (! has_utf8) {
3203                             SvCUR_set(sv, d - SvPVX_const(sv));
3204                             SvPOK_on(sv);
3205                             *d = '\0';
3206                             /* See Note on sizing above.  */
3207                             sv_utf8_upgrade_flags_grow(
3208                                         sv,
3209                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3210                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3211                             d = SvPVX(sv) + SvCUR(sv);
3212                             has_utf8 = TRUE;
3213                         }
3214
3215                         /* Add the string to the output */
3216                         if (UNI_IS_INVARIANT(uv)) {
3217                             *d++ = (char) uv;
3218                         }
3219                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3220                     }
3221                 }
3222                 else { /* Here is \N{NAME} but not \N{U+...}. */
3223
3224                     SV *res;            /* result from charnames */
3225                     const char *str;    /* the string in 'res' */
3226                     STRLEN len;         /* its length */
3227
3228                     /* Get the value for NAME */
3229                     res = newSVpvn(s, e - s);
3230                     res = new_constant( NULL, 0, "charnames",
3231                                         /* includes all of: \N{...} */
3232                                         res, NULL, s - 3, e - s + 4 );
3233
3234                     /* Most likely res will be in utf8 already since the
3235                      * standard charnames uses pack U, but a custom translator
3236                      * can leave it otherwise, so make sure.  XXX This can be
3237                      * revisited to not have charnames use utf8 for characters
3238                      * that don't need it when regexes don't have to be in utf8
3239                      * for Unicode semantics.  If doing so, remember EBCDIC */
3240                     sv_utf8_upgrade(res);
3241                     str = SvPV_const(res, len);
3242
3243                     /* Don't accept malformed input */
3244                     if (! is_utf8_string((U8 *) str, len)) {
3245                         yyerror("Malformed UTF-8 returned by \\N");
3246                     }
3247                     else if (PL_lex_inpat) {
3248
3249                         if (! len) { /* The name resolved to an empty string */
3250                             Copy("\\N{}", d, 4, char);
3251                             d += 4;
3252                         }
3253                         else {
3254                             /* In order to not lose information for the regex
3255                             * compiler, pass the result in the specially made
3256                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3257                             * the code points in hex of each character
3258                             * returned by charnames */
3259
3260                             const char *str_end = str + len;
3261                             STRLEN char_length;     /* cur char's byte length */
3262                             STRLEN output_length;   /* and the number of bytes
3263                                                        after this is translated
3264                                                        into hex digits */
3265                             const STRLEN off = d - SvPVX_const(sv);
3266
3267                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3268                              * max('U+', '.'); and 1 for NUL */
3269                             char hex_string[2 * UTF8_MAXBYTES + 5];
3270
3271                             /* Get the first character of the result. */
3272                             U32 uv = utf8n_to_uvuni((U8 *) str,
3273                                                     len,
3274                                                     &char_length,
3275                                                     UTF8_ALLOW_ANYUV);
3276
3277                             /* The call to is_utf8_string() above hopefully
3278                              * guarantees that there won't be an error.  But
3279                              * it's easy here to make sure.  The function just
3280                              * above warns and returns 0 if invalid utf8, but
3281                              * it can also return 0 if the input is validly a
3282                              * NUL. Disambiguate */
3283                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3284                                 uv = UNICODE_REPLACEMENT;
3285                             }
3286
3287                             /* Convert first code point to hex, including the
3288                              * boiler plate before it.  For all these, we
3289                              * convert to native format so that downstream code
3290                              * can continue to assume the input is native */
3291                             output_length =
3292                                 my_snprintf(hex_string, sizeof(hex_string),
3293                                             "\\N{U+%X",
3294                                             (unsigned int) UNI_TO_NATIVE(uv));
3295
3296                             /* Make sure there is enough space to hold it */
3297                             d = off + SvGROW(sv, off
3298                                                  + output_length
3299                                                  + (STRLEN)(send - e)
3300                                                  + 2);  /* '}' + NUL */
3301                             /* And output it */
3302                             Copy(hex_string, d, output_length, char);
3303                             d += output_length;
3304
3305                             /* For each subsequent character, append dot and
3306                              * its ordinal in hex */
3307                             while ((str += char_length) < str_end) {
3308                                 const STRLEN off = d - SvPVX_const(sv);
3309                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3310                                                         str_end - str,
3311                                                         &char_length,
3312                                                         UTF8_ALLOW_ANYUV);
3313                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3314                                     uv = UNICODE_REPLACEMENT;
3315                                 }
3316
3317                                 output_length =
3318                                     my_snprintf(hex_string, sizeof(hex_string),
3319                                             ".%X",
3320                                             (unsigned int) UNI_TO_NATIVE(uv));
3321
3322                                 d = off + SvGROW(sv, off
3323                                                      + output_length
3324                                                      + (STRLEN)(send - e)
3325                                                      + 2);      /* '}' +  NUL */
3326                                 Copy(hex_string, d, output_length, char);
3327                                 d += output_length;
3328                             }
3329
3330                             *d++ = '}'; /* Done.  Add the trailing brace */
3331                         }
3332                     }
3333                     else { /* Here, not in a pattern.  Convert the name to a
3334                             * string. */
3335
3336                          /* If destination is not in utf8, unconditionally
3337                           * recode it to be so.  This is because \N{} implies
3338                           * Unicode semantics, and scalars have to be in utf8
3339                           * to guarantee those semantics */
3340                         if (! has_utf8) {
3341                             SvCUR_set(sv, d - SvPVX_const(sv));
3342                             SvPOK_on(sv);
3343                             *d = '\0';
3344                             /* See Note on sizing above.  */
3345                             sv_utf8_upgrade_flags_grow(sv,
3346                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3347                                                 len + (STRLEN)(send - s) + 1);
3348                             d = SvPVX(sv) + SvCUR(sv);
3349                             has_utf8 = TRUE;
3350                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3351
3352                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3353                              * set correctly here). */
3354                             const STRLEN off = d - SvPVX_const(sv);
3355                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3356                         }
3357                         Copy(str, d, len, char);
3358                         d += len;
3359                     }
3360                     SvREFCNT_dec(res);
3361
3362                     /* Deprecate non-approved name syntax */
3363                     if (ckWARN_d(WARN_DEPRECATED)) {
3364                         bool problematic = FALSE;
3365                         char* i = s;
3366
3367                         /* For non-ut8 input, look to see that the first
3368                          * character is an alpha, then loop through the rest
3369                          * checking that each is a continuation */
3370                         if (! this_utf8) {
3371                             if (! isALPHAU(*i)) problematic = TRUE;
3372                             else for (i = s + 1; i < e; i++) {
3373                                 if (isCHARNAME_CONT(*i)) continue;
3374                                 problematic = TRUE;
3375                                 break;
3376                             }
3377                         }
3378                         else {
3379                             /* Similarly for utf8.  For invariants can check
3380                              * directly.  We accept anything above the latin1
3381                              * range because it is immaterial to Perl if it is
3382                              * correct or not, and is expensive to check.  But
3383                              * it is fairly easy in the latin1 range to convert
3384                              * the variants into a single character and check
3385                              * those */
3386                             if (UTF8_IS_INVARIANT(*i)) {
3387                                 if (! isALPHAU(*i)) problematic = TRUE;
3388                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3389                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3390                                                                             *(i+1)))))
3391                                 {
3392                                     problematic = TRUE;
3393                                 }
3394                             }
3395                             if (! problematic) for (i = s + UTF8SKIP(s);
3396                                                     i < e;
3397                                                     i+= UTF8SKIP(i))
3398                             {
3399                                 if (UTF8_IS_INVARIANT(*i)) {
3400                                     if (isCHARNAME_CONT(*i)) continue;
3401                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3402                                     continue;
3403                                 } else if (isCHARNAME_CONT(
3404                                             UNI_TO_NATIVE(
3405                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3406                                 {
3407                                     continue;
3408                                 }
3409                                 problematic = TRUE;
3410                                 break;
3411                             }
3412                         }
3413                         if (problematic) {
3414                             /* The e-i passed to the final %.*s makes sure that
3415                              * should the trailing NUL be missing that this
3416                              * print won't run off the end of the string */
3417                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3418                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3419                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3420                         }
3421                     }
3422                 } /* End \N{NAME} */
3423 #ifdef EBCDIC
3424                 if (!dorange)
3425                     native_range = FALSE; /* \N{} is defined to be Unicode */
3426 #endif
3427                 s = e + 1;  /* Point to just after the '}' */
3428                 continue;
3429
3430             /* \c is a control character */
3431             case 'c':
3432                 s++;
3433                 if (s < send) {
3434                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3435                 }
3436                 else {
3437                     yyerror("Missing control char name in \\c");
3438                 }
3439                 continue;
3440
3441             /* printf-style backslashes, formfeeds, newlines, etc */
3442             case 'b':
3443                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3444                 break;
3445             case 'n':
3446                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3447                 break;
3448             case 'r':
3449                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3450                 break;
3451             case 'f':
3452                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3453                 break;
3454             case 't':
3455                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3456                 break;
3457             case 'e':
3458                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3459                 break;
3460             case 'a':
3461                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3462                 break;
3463             } /* end switch */
3464
3465             s++;
3466             continue;
3467         } /* end if (backslash) */
3468 #ifdef EBCDIC
3469         else
3470             literal_endpoint++;
3471 #endif
3472
3473     default_action:
3474         /* If we started with encoded form, or already know we want it,
3475            then encode the next character */
3476         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3477             STRLEN len  = 1;
3478
3479
3480             /* One might think that it is wasted effort in the case of the
3481              * source being utf8 (this_utf8 == TRUE) to take the next character
3482              * in the source, convert it to an unsigned value, and then convert
3483              * it back again.  But the source has not been validated here.  The
3484              * routine that does the conversion checks for errors like
3485              * malformed utf8 */
3486
3487             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3488             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3489             if (!has_utf8) {
3490                 SvCUR_set(sv, d - SvPVX_const(sv));
3491                 SvPOK_on(sv);
3492                 *d = '\0';
3493                 /* See Note on sizing above.  */
3494                 sv_utf8_upgrade_flags_grow(sv,
3495                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3496                                         need + (STRLEN)(send - s) + 1);
3497                 d = SvPVX(sv) + SvCUR(sv);
3498                 has_utf8 = TRUE;
3499             } else if (need > len) {
3500                 /* encoded value larger than old, may need extra space (NOTE:
3501                  * SvCUR() is not set correctly here).   See Note on sizing
3502                  * above.  */
3503                 const STRLEN off = d - SvPVX_const(sv);
3504                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3505             }
3506             s += len;
3507
3508             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3509 #ifdef EBCDIC
3510             if (uv > 255 && !dorange)
3511                 native_range = FALSE;
3512 #endif
3513         }
3514         else {
3515             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3516         }
3517     } /* while loop to process each character */
3518
3519     /* terminate the string and set up the sv */
3520     *d = '\0';
3521     SvCUR_set(sv, d - SvPVX_const(sv));
3522     if (SvCUR(sv) >= SvLEN(sv))
3523         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3524                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3525
3526     SvPOK_on(sv);
3527     if (PL_encoding && !has_utf8) {
3528         sv_recode_to_utf8(sv, PL_encoding);
3529         if (SvUTF8(sv))
3530             has_utf8 = TRUE;
3531     }
3532     if (has_utf8) {
3533         SvUTF8_on(sv);
3534         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3535             PL_sublex_info.sub_op->op_private |=
3536                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3537         }
3538     }
3539
3540     /* shrink the sv if we allocated more than we used */
3541     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3542         SvPV_shrink_to_cur(sv);
3543     }
3544
3545     /* return the substring (via pl_yylval) only if we parsed anything */
3546     if (s > PL_bufptr) {
3547         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3548             const char *const key = PL_lex_inpat ? "qr" : "q";
3549             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3550             const char *type;
3551             STRLEN typelen;
3552
3553             if (PL_lex_inwhat == OP_TRANS) {
3554                 type = "tr";
3555                 typelen = 2;
3556             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3557                 type = "s";
3558                 typelen = 1;
3559             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3560                 type = "q";
3561                 typelen = 1;
3562             } else  {
3563                 type = "qq";
3564                 typelen = 2;
3565             }
3566
3567             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3568                                 type, typelen);
3569         }
3570         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3571     } else
3572         SvREFCNT_dec(sv);
3573     return s;
3574 }
3575
3576 /* S_intuit_more
3577  * Returns TRUE if there's more to the expression (e.g., a subscript),
3578  * FALSE otherwise.
3579  *
3580  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3581  *
3582  * ->[ and ->{ return TRUE
3583  * { and [ outside a pattern are always subscripts, so return TRUE
3584  * if we're outside a pattern and it's not { or [, then return FALSE
3585  * if we're in a pattern and the first char is a {
3586  *   {4,5} (any digits around the comma) returns FALSE
3587  * if we're in a pattern and the first char is a [
3588  *   [] returns FALSE
3589  *   [SOMETHING] has a funky algorithm to decide whether it's a
3590  *      character class or not.  It has to deal with things like
3591  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3592  * anything else returns TRUE
3593  */
3594
3595 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3596
3597 STATIC int
3598 S_intuit_more(pTHX_ register char *s)
3599 {
3600     dVAR;
3601
3602     PERL_ARGS_ASSERT_INTUIT_MORE;
3603
3604     if (PL_lex_brackets)
3605         return TRUE;
3606     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3607         return TRUE;
3608     if (*s != '{' && *s != '[')
3609         return FALSE;
3610     if (!PL_lex_inpat)
3611         return TRUE;
3612
3613     /* In a pattern, so maybe we have {n,m}. */
3614     if (*s == '{') {
3615         if (regcurly(s)) {
3616             return FALSE;
3617         }
3618         return TRUE;
3619     }
3620
3621     /* On the other hand, maybe we have a character class */
3622
3623     s++;
3624     if (*s == ']' || *s == '^')
3625         return FALSE;
3626     else {
3627         /* this is terrifying, and it works */
3628         int weight = 2;         /* let's weigh the evidence */
3629         char seen[256];
3630         unsigned char un_char = 255, last_un_char;
3631         const char * const send = strchr(s,']');
3632         char tmpbuf[sizeof PL_tokenbuf * 4];
3633
3634         if (!send)              /* has to be an expression */
3635             return TRUE;
3636
3637         Zero(seen,256,char);
3638         if (*s == '$')
3639             weight -= 3;
3640         else if (isDIGIT(*s)) {
3641             if (s[1] != ']') {
3642                 if (isDIGIT(s[1]) && s[2] == ']')
3643                     weight -= 10;
3644             }
3645             else
3646                 weight -= 100;
3647         }
3648         for (; s < send; s++) {
3649             last_un_char = un_char;
3650             un_char = (unsigned char)*s;
3651             switch (*s) {
3652             case '@':
3653             case '&':
3654             case '$':
3655                 weight -= seen[un_char] * 10;
3656                 if (isALNUM_lazy_if(s+1,UTF)) {
3657                     int len;
3658                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3659                     len = (int)strlen(tmpbuf);
3660                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3661                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3662                         weight -= 100;
3663                     else
3664                         weight -= 10;
3665                 }
3666                 else if (*s == '$' && s[1] &&
3667                   strchr("[#!%*<>()-=",s[1])) {
3668                     if (/*{*/ strchr("])} =",s[2]))
3669                         weight -= 10;
3670                     else
3671                         weight -= 1;
3672                 }
3673                 break;
3674             case '\\':
3675                 un_char = 254;
3676                 if (s[1]) {
3677                     if (strchr("wds]",s[1]))
3678                         weight += 100;
3679                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3680                         weight += 1;
3681                     else if (strchr("rnftbxcav",s[1]))
3682                         weight += 40;
3683                     else if (isDIGIT(s[1])) {
3684                         weight += 40;
3685                         while (s[1] && isDIGIT(s[1]))
3686                             s++;
3687                     }
3688                 }
3689                 else
3690                     weight += 100;
3691                 break;
3692             case '-':
3693                 if (s[1] == '\\')
3694                     weight += 50;
3695                 if (strchr("aA01! ",last_un_char))
3696                     weight += 30;
3697                 if (strchr("zZ79~",s[1]))
3698                     weight += 30;
3699                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3700                     weight -= 5;        /* cope with negative subscript */
3701                 break;
3702             default:
3703                 if (!isALNUM(last_un_char)
3704                     && !(last_un_char == '$' || last_un_char == '@'
3705                          || last_un_char == '&')
3706                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3707                     char *d = tmpbuf;
3708                     while (isALPHA(*s))
3709                         *d++ = *s++;
3710                     *d = '\0';
3711                     if (keyword(tmpbuf, d - tmpbuf, 0))
3712                         weight -= 150;
3713                 }
3714                 if (un_char == last_un_char + 1)
3715                     weight += 5;
3716                 weight -= seen[un_char];
3717                 break;
3718             }
3719             seen[un_char]++;
3720         }
3721         if (weight >= 0)        /* probably a character class */
3722             return FALSE;
3723     }
3724
3725     return TRUE;
3726 }
3727
3728 /*
3729  * S_intuit_method
3730  *
3731  * Does all the checking to disambiguate
3732  *   foo bar
3733  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3734  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3735  *
3736  * First argument is the stuff after the first token, e.g. "bar".
3737  *
3738  * Not a method if foo is a filehandle.
3739  * Not a method if foo is a subroutine prototyped to take a filehandle.
3740  * Not a method if it's really "Foo $bar"
3741  * Method if it's "foo $bar"
3742  * Not a method if it's really "print foo $bar"
3743  * Method if it's really "foo package::" (interpreted as package->foo)
3744  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3745  * Not a method if bar is a filehandle or package, but is quoted with
3746  *   =>
3747  */
3748
3749 STATIC int
3750 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3751 {
3752     dVAR;
3753     char *s = start + (*start == '$');
3754     char tmpbuf[sizeof PL_tokenbuf];
3755     STRLEN len;
3756     GV* indirgv;
3757 #ifdef PERL_MAD
3758     int soff;
3759 #endif
3760
3761     PERL_ARGS_ASSERT_INTUIT_METHOD;
3762
3763     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3764             return 0;
3765     if (cv && SvPOK(cv)) {
3766                 const char *proto = CvPROTO(cv);
3767                 if (proto) {
3768                     if (*proto == ';')
3769                         proto++;
3770                     if (*proto == '*')
3771                         return 0;
3772                 }
3773     }
3774     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3775     /* start is the beginning of the possible filehandle/object,
3776      * and s is the end of it
3777      * tmpbuf is a copy of it
3778      */
3779
3780     if (*start == '$') {
3781         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3782                 isUPPER(*PL_tokenbuf))
3783             return 0;
3784 #ifdef PERL_MAD
3785         len = start - SvPVX(PL_linestr);
3786 #endif
3787         s = PEEKSPACE(s);
3788 #ifdef PERL_MAD
3789         start = SvPVX(PL_linestr) + len;
3790 #endif
3791         PL_bufptr = start;
3792         PL_expect = XREF;
3793         return *s == '(' ? FUNCMETH : METHOD;
3794     }
3795     if (!keyword(tmpbuf, len, 0)) {
3796         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3797             len -= 2;
3798             tmpbuf[len] = '\0';
3799 #ifdef PERL_MAD
3800             soff = s - SvPVX(PL_linestr);
3801 #endif
3802             goto bare_package;
3803         }
3804         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3805         if (indirgv && GvCVu(indirgv))
3806             return 0;
3807         /* filehandle or package name makes it a method */
3808         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3809 #ifdef PERL_MAD
3810             soff = s - SvPVX(PL_linestr);
3811 #endif
3812             s = PEEKSPACE(s);
3813             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3814                 return 0;       /* no assumptions -- "=>" quotes bareword */
3815       bare_package:
3816             start_force(PL_curforce);
3817             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3818                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3819             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3820             if (PL_madskills)
3821                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3822                                                             ( UTF ? SVf_UTF8 : 0 )));
3823             PL_expect = XTERM;
3824             force_next(WORD);
3825             PL_bufptr = s;
3826 #ifdef PERL_MAD
3827             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3828 #endif
3829             return *s == '(' ? FUNCMETH : METHOD;
3830         }
3831     }
3832     return 0;
3833 }
3834
3835 /* Encoded script support. filter_add() effectively inserts a
3836  * 'pre-processing' function into the current source input stream.
3837  * Note that the filter function only applies to the current source file
3838  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3839  *
3840  * The datasv parameter (which may be NULL) can be used to pass
3841  * private data to this instance of the filter. The filter function
3842  * can recover the SV using the FILTER_DATA macro and use it to
3843  * store private buffers and state information.
3844  *
3845  * The supplied datasv parameter is upgraded to a PVIO type
3846  * and the IoDIRP/IoANY field is used to store the function pointer,
3847  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3848  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3849  * private use must be set using malloc'd pointers.
3850  */
3851
3852 SV *
3853 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3854 {
3855     dVAR;
3856     if (!funcp)
3857         return NULL;
3858
3859     if (!PL_parser)
3860         return NULL;
3861
3862     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3863         Perl_croak(aTHX_ "Source filters apply only to byte streams");
3864
3865     if (!PL_rsfp_filters)
3866         PL_rsfp_filters = newAV();
3867     if (!datasv)
3868         datasv = newSV(0);
3869     SvUPGRADE(datasv, SVt_PVIO);
3870     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3871     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3872     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3873                           FPTR2DPTR(void *, IoANY(datasv)),
3874                           SvPV_nolen(datasv)));
3875     av_unshift(PL_rsfp_filters, 1);
3876     av_store(PL_rsfp_filters, 0, datasv) ;
3877     if (
3878         !PL_parser->filtered
3879      && PL_parser->lex_flags & LEX_EVALBYTES
3880      && PL_bufptr < PL_bufend
3881     ) {
3882         const char *s = PL_bufptr;
3883         while (s < PL_bufend) {
3884             if (*s == '\n') {
3885                 SV *linestr = PL_parser->linestr;
3886                 char *buf = SvPVX(linestr);
3887                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3888                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3889                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3890                 STRLEN const linestart_pos = PL_parser->linestart - buf;
3891                 STRLEN const last_uni_pos =
3892                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3893                 STRLEN const last_lop_pos =
3894                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3895                 av_push(PL_rsfp_filters, linestr);
3896                 PL_parser->linestr =
3897                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3898                 buf = SvPVX(PL_parser->linestr);
3899                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3900                 PL_parser->bufptr = buf + bufptr_pos;
3901                 PL_parser->oldbufptr = buf + oldbufptr_pos;
3902                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3903                 PL_parser->linestart = buf + linestart_pos;
3904                 if (PL_parser->last_uni)
3905                     PL_parser->last_uni = buf + last_uni_pos;
3906                 if (PL_parser->last_lop)
3907                     PL_parser->last_lop = buf + last_lop_pos;
3908                 SvLEN(linestr) = SvCUR(linestr);
3909                 SvCUR(linestr) = s-SvPVX(linestr);
3910                 PL_parser->filtered = 1;
3911                 break;
3912             }
3913             s++;
3914         }
3915     }
3916     return(datasv);
3917 }
3918
3919
3920 /* Delete most recently added instance of this filter function. */
3921 void
3922 Perl_filter_del(pTHX_ filter_t funcp)
3923 {
3924     dVAR;
3925     SV *datasv;
3926
3927     PERL_ARGS_ASSERT_FILTER_DEL;
3928
3929 #ifdef DEBUGGING
3930     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3931                           FPTR2DPTR(void*, funcp)));
3932 #endif
3933     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3934         return;
3935     /* if filter is on top of stack (usual case) just pop it off */
3936     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3937     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3938         sv_free(av_pop(PL_rsfp_filters));
3939
3940         return;
3941     }
3942     /* we need to search for the correct entry and clear it     */
3943     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3944 }
3945
3946
3947 /* Invoke the idxth filter function for the current rsfp.        */
3948 /* maxlen 0 = read one text line */
3949 I32
3950 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3951 {
3952     dVAR;
3953     filter_t funcp;
3954     SV *datasv = NULL;
3955     /* This API is bad. It should have been using unsigned int for maxlen.
3956        Not sure if we want to change the API, but if not we should sanity
3957        check the value here.  */
3958     unsigned int correct_length
3959         = maxlen < 0 ?
3960 #ifdef PERL_MICRO
3961         0x7FFFFFFF
3962 #else
3963         INT_MAX
3964 #endif
3965         : maxlen;
3966
3967     PERL_ARGS_ASSERT_FILTER_READ;
3968
3969     if (!PL_parser || !PL_rsfp_filters)
3970         return -1;
3971     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3972         /* Provide a default input filter to make life easy.    */
3973         /* Note that we append to the line. This is handy.      */
3974         DEBUG_P(PerlIO_printf(Perl_debug_log,
3975                               "filter_read %d: from rsfp\n", idx));
3976         if (correct_length) {
3977             /* Want a block */
3978             int len ;
3979             const int old_len = SvCUR(buf_sv);
3980
3981             /* ensure buf_sv is large enough */
3982             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3983             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3984                                    correct_length)) <= 0) {
3985                 if (PerlIO_error(PL_rsfp))
3986                     return -1;          /* error */
3987                 else
3988                     return 0 ;          /* end of file */
3989             }
3990             SvCUR_set(buf_sv, old_len + len) ;
3991             SvPVX(buf_sv)[old_len + len] = '\0';
3992         } else {
3993             /* Want a line */
3994             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3995                 if (PerlIO_error(PL_rsfp))
3996                     return -1;          /* error */
3997                 else
3998                     return 0 ;          /* end of file */
3999             }
4000         }
4001         return SvCUR(buf_sv);
4002     }
4003     /* Skip this filter slot if filter has been deleted */
4004     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4005         DEBUG_P(PerlIO_printf(Perl_debug_log,
4006                               "filter_read %d: skipped (filter deleted)\n",
4007                               idx));
4008         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4009     }
4010     if (SvTYPE(datasv) != SVt_PVIO) {
4011         if (correct_length) {
4012             /* Want a block */
4013             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4014             if (!remainder) return 0; /* eof */
4015             if (correct_length > remainder) correct_length = remainder;
4016             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4017             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4018         } else {
4019             /* Want a line */
4020             const char *s = SvEND(datasv);
4021             const char *send = SvPVX(datasv) + SvLEN(datasv);
4022             while (s < send) {
4023                 if (*s == '\n') {
4024                     s++;
4025                     break;
4026                 }
4027                 s++;
4028             }
4029             if (s == send) return 0; /* eof */
4030             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4031             SvCUR_set(datasv, s-SvPVX(datasv));
4032         }
4033         return SvCUR(buf_sv);
4034     }
4035     /* Get function pointer hidden within datasv        */
4036     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4037     DEBUG_P(PerlIO_printf(Perl_debug_log,
4038                           "filter_read %d: via function %p (%s)\n",
4039                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4040     /* Call function. The function is expected to       */
4041     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4042     /* Return: <0:error, =0:eof, >0:not eof             */
4043     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4044 }
4045
4046 STATIC char *
4047 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
4048 {
4049     dVAR;
4050
4051     PERL_ARGS_ASSERT_FILTER_GETS;
4052
4053 #ifdef PERL_CR_FILTER
4054     if (!PL_rsfp_filters) {
4055         filter_add(S_cr_textfilter,NULL);
4056     }
4057 #endif
4058     if (PL_rsfp_filters) {
4059         if (!append)
4060             SvCUR_set(sv, 0);   /* start with empty line        */
4061         if (FILTER_READ(0, sv, 0) > 0)
4062             return ( SvPVX(sv) ) ;
4063         else
4064             return NULL ;
4065     }
4066     else
4067         return (sv_gets(sv, PL_rsfp, append));
4068 }
4069
4070 STATIC HV *
4071 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4072 {
4073     dVAR;
4074     GV *gv;
4075
4076     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4077
4078     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4079         return PL_curstash;
4080
4081     if (len > 2 &&
4082         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4083         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4084     {
4085         return GvHV(gv);                        /* Foo:: */
4086     }
4087
4088     /* use constant CLASS => 'MyClass' */
4089     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4090     if (gv && GvCV(gv)) {
4091         SV * const sv = cv_const_sv(GvCV(gv));
4092         if (sv)
4093             pkgname = SvPV_const(sv, len);
4094     }
4095
4096     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4097 }
4098
4099 /*
4100  * S_readpipe_override
4101  * Check whether readpipe() is overridden, and generates the appropriate
4102  * optree, provided sublex_start() is called afterwards.
4103  */
4104 STATIC void
4105 S_readpipe_override(pTHX)
4106 {
4107     GV **gvp;
4108     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4109     pl_yylval.ival = OP_BACKTICK;
4110     if ((gv_readpipe
4111                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4112             ||
4113             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4114              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4115              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4116     {
4117         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4118             op_append_elem(OP_LIST,
4119                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4120                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4121     }
4122 }
4123
4124 #ifdef PERL_MAD
4125  /*
4126  * Perl_madlex
4127  * The intent of this yylex wrapper is to minimize the changes to the
4128  * tokener when we aren't interested in collecting madprops.  It remains
4129  * to be seen how successful this strategy will be...
4130  */
4131
4132 int
4133 Perl_madlex(pTHX)
4134 {
4135     int optype;
4136     char *s = PL_bufptr;
4137
4138     /* make sure PL_thiswhite is initialized */
4139     PL_thiswhite = 0;
4140     PL_thismad = 0;
4141
4142     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4143     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4144         return S_pending_ident(aTHX);
4145
4146     /* previous token ate up our whitespace? */
4147     if (!PL_lasttoke && PL_nextwhite) {
4148         PL_thiswhite = PL_nextwhite;
4149         PL_nextwhite = 0;
4150     }
4151
4152     /* isolate the token, and figure out where it is without whitespace */
4153     PL_realtokenstart = -1;
4154     PL_thistoken = 0;
4155     optype = yylex();
4156     s = PL_bufptr;
4157     assert(PL_curforce < 0);
4158
4159     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4160         if (!PL_thistoken) {
4161             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4162                 PL_thistoken = newSVpvs("");
4163             else {
4164                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4165                 PL_thistoken = newSVpvn(tstart, s - tstart);
4166             }
4167         }
4168         if (PL_thismad) /* install head */
4169             CURMAD('X', PL_thistoken);
4170     }
4171
4172     /* last whitespace of a sublex? */
4173     if (optype == ')' && PL_endwhite) {
4174         CURMAD('X', PL_endwhite);
4175     }
4176
4177     if (!PL_thismad) {
4178
4179         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4180         if (!PL_thiswhite && !PL_endwhite && !optype) {
4181             sv_free(PL_thistoken);
4182             PL_thistoken = 0;
4183             return 0;
4184         }
4185
4186         /* put off final whitespace till peg */
4187         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4188             PL_nextwhite = PL_thiswhite;
4189             PL_thiswhite = 0;
4190         }
4191         else if (PL_thisopen) {
4192             CURMAD('q', PL_thisopen);
4193             if (PL_thistoken)
4194                 sv_free(PL_thistoken);
4195             PL_thistoken = 0;
4196         }
4197         else {
4198             /* Store actual token text as madprop X */
4199             CURMAD('X', PL_thistoken);
4200         }
4201
4202         if (PL_thiswhite) {
4203             /* add preceding whitespace as madprop _ */
4204             CURMAD('_', PL_thiswhite);
4205         }
4206
4207         if (PL_thisstuff) {
4208             /* add quoted material as madprop = */
4209             CURMAD('=', PL_thisstuff);
4210         }
4211
4212         if (PL_thisclose) {
4213             /* add terminating quote as madprop Q */
4214             CURMAD('Q', PL_thisclose);
4215         }
4216     }
4217
4218     /* special processing based on optype */
4219
4220     switch (optype) {
4221
4222     /* opval doesn't need a TOKEN since it can already store mp */
4223     case WORD:
4224     case METHOD:
4225     case FUNCMETH:
4226     case THING:
4227     case PMFUNC:
4228     case PRIVATEREF:
4229     case FUNC0SUB:
4230     case UNIOPSUB:
4231     case LSTOPSUB:
4232     case LABEL:
4233         if (pl_yylval.opval)
4234             append_madprops(PL_thismad, pl_yylval.opval, 0);
4235         PL_thismad = 0;
4236         return optype;
4237
4238     /* fake EOF */
4239     case 0:
4240         optype = PEG;
4241         if (PL_endwhite) {
4242             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4243             PL_endwhite = 0;
4244         }
4245         break;
4246
4247     case ']':
4248     case '}':
4249         if (PL_faketokens)
4250             break;
4251         /* remember any fake bracket that lexer is about to discard */
4252         if (PL_lex_brackets == 1 &&
4253             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4254         {
4255             s = PL_bufptr;
4256             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4257                 s++;
4258             if (*s == '}') {
4259                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4260                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4261                 PL_thiswhite = 0;
4262                 PL_bufptr = s - 1;
4263                 break;  /* don't bother looking for trailing comment */
4264             }
4265             else
4266                 s = PL_bufptr;
4267         }
4268         if (optype == ']')
4269             break;
4270         /* FALLTHROUGH */
4271
4272     /* attach a trailing comment to its statement instead of next token */
4273     case ';':
4274         if (PL_faketokens)
4275             break;
4276         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4277             s = PL_bufptr;
4278             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4279                 s++;
4280             if (*s == '\n' || *s == '#') {
4281                 while (s < PL_bufend && *s != '\n')
4282                     s++;
4283                 if (s < PL_bufend)
4284                     s++;
4285                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4286                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4287                 PL_thiswhite = 0;
4288                 PL_bufptr = s;
4289             }
4290         }
4291         break;
4292
4293     /* ival */
4294     default:
4295         break;
4296
4297     }
4298
4299     /* Create new token struct.  Note: opvals return early above. */
4300     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4301     PL_thismad = 0;
4302     return optype;
4303 }
4304 #endif
4305
4306 STATIC char *
4307 S_tokenize_use(pTHX_ int is_use, char *s) {
4308     dVAR;
4309
4310     PERL_ARGS_ASSERT_TOKENIZE_USE;
4311
4312     if (PL_expect != XSTATE)
4313         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4314                     is_use ? "use" : "no"));
4315     s = SKIPSPACE1(s);
4316     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4317         s = force_version(s, TRUE);
4318         if (*s == ';' || *s == '}'
4319                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4320             start_force(PL_curforce);
4321             NEXTVAL_NEXTTOKE.opval = NULL;
4322             force_next(WORD);
4323         }
4324         else if (*s == 'v') {
4325             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4326             s = force_version(s, FALSE);
4327         }
4328     }
4329     else {
4330         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4331         s = force_version(s, FALSE);
4332     }
4333     pl_yylval.ival = is_use;
4334     return s;
4335 }
4336 #ifdef DEBUGGING
4337     static const char* const exp_name[] =
4338         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4339           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4340         };
4341 #endif
4342
4343 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4344 STATIC bool
4345 S_word_takes_any_delimeter(char *p, STRLEN len)
4346 {
4347     return (len == 1 && strchr("msyq", p[0])) ||
4348            (len == 2 && (
4349             (p[0] == 't' && p[1] == 'r') ||
4350             (p[0] == 'q' && strchr("qwxr", p[1]))));
4351 }
4352
4353 /*
4354   yylex
4355
4356   Works out what to call the token just pulled out of the input
4357   stream.  The yacc parser takes care of taking the ops we return and
4358   stitching them into a tree.
4359
4360   Returns:
4361     PRIVATEREF
4362
4363   Structure:
4364       if read an identifier
4365           if we're in a my declaration
4366               croak if they tried to say my($foo::bar)
4367               build the ops for a my() declaration
4368           if it's an access to a my() variable
4369               are we in a sort block?
4370                   croak if my($a); $a <=> $b
4371               build ops for access to a my() variable
4372           if in a dq string, and they've said @foo and we can't find @foo
4373               croak
4374           build ops for a bareword
4375       if we already built the token before, use it.
4376 */
4377
4378
4379 #ifdef __SC__
4380 #pragma segment Perl_yylex
4381 #endif
4382 int
4383 Perl_yylex(pTHX)
4384 {
4385     dVAR;
4386     register char *s = PL_bufptr;
4387     register char *d;
4388     STRLEN len;
4389     bool bof = FALSE;
4390     U32 fake_eof = 0;
4391
4392     /* orig_keyword, gvp, and gv are initialized here because
4393      * jump to the label just_a_word_zero can bypass their
4394      * initialization later. */
4395     I32 orig_keyword = 0;
4396     GV *gv = NULL;
4397     GV **gvp = NULL;
4398
4399     DEBUG_T( {
4400         SV* tmp = newSVpvs("");
4401         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4402             (IV)CopLINE(PL_curcop),
4403             lex_state_names[PL_lex_state],
4404             exp_name[PL_expect],
4405             pv_display(tmp, s, strlen(s), 0, 60));
4406         SvREFCNT_dec(tmp);
4407     } );
4408     /* check if there's an identifier for us to look at */
4409     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4410         return REPORT(S_pending_ident(aTHX));
4411
4412     /* no identifier pending identification */
4413
4414     switch (PL_lex_state) {
4415 #ifdef COMMENTARY
4416     case LEX_NORMAL:            /* Some compilers will produce faster */
4417     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4418         break;
4419 #endif
4420
4421     /* when we've already built the next token, just pull it out of the queue */
4422     case LEX_KNOWNEXT:
4423 #ifdef PERL_MAD
4424         PL_lasttoke--;
4425         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4426         if (PL_madskills) {
4427             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4428             PL_nexttoke[PL_lasttoke].next_mad = 0;
4429             if (PL_thismad && PL_thismad->mad_key == '_') {
4430                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4431                 PL_thismad->mad_val = 0;
4432                 mad_free(PL_thismad);
4433                 PL_thismad = 0;
4434             }
4435         }
4436         if (!PL_lasttoke) {
4437             PL_lex_state = PL_lex_defer;
4438             PL_expect = PL_lex_expect;
4439             PL_lex_defer = LEX_NORMAL;
4440             if (!PL_nexttoke[PL_lasttoke].next_type)
4441                 return yylex();
4442         }
4443 #else
4444         PL_nexttoke--;
4445         pl_yylval = PL_nextval[PL_nexttoke];
4446         if (!PL_nexttoke) {
4447             PL_lex_state = PL_lex_defer;
4448             PL_expect = PL_lex_expect;
4449             PL_lex_defer = LEX_NORMAL;
4450         }
4451 #endif
4452         {
4453             I32 next_type;
4454 #ifdef PERL_MAD
4455             next_type = PL_nexttoke[PL_lasttoke].next_type;
4456 #else
4457             next_type = PL_nexttype[PL_nexttoke];
4458 #endif
4459             if (next_type & (7<<24)) {
4460                 if (next_type & (1<<24)) {
4461                     if (PL_lex_brackets > 100)
4462                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4463                     PL_lex_brackstack[PL_lex_brackets++] =
4464                         (char) ((next_type >> 16) & 0xff);
4465                 }
4466                 if (next_type & (2<<24))
4467                     PL_lex_allbrackets++;
4468                 if (next_type & (4<<24))
4469                     PL_lex_allbrackets--;
4470                 next_type &= 0xffff;
4471             }
4472 #ifdef PERL_MAD
4473             /* FIXME - can these be merged?  */
4474             return next_type;
4475 #else
4476             return REPORT(next_type);
4477 #endif
4478         }
4479
4480     /* interpolated case modifiers like \L \U, including \Q and \E.
4481        when we get here, PL_bufptr is at the \
4482     */
4483     case LEX_INTERPCASEMOD:
4484 #ifdef DEBUGGING
4485         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4486             Perl_croak(aTHX_
4487                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4488                        PL_bufptr, PL_bufend, *PL_bufptr);
4489 #endif
4490         /* handle \E or end of string */
4491         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4492             /* if at a \E */
4493             if (PL_lex_casemods) {
4494                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4495                 PL_lex_casestack[PL_lex_casemods] = '\0';
4496
4497                 if (PL_bufptr != PL_bufend
4498                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4499                         || oldmod == 'F')) {
4500                     PL_bufptr += 2;
4501                     PL_lex_state = LEX_INTERPCONCAT;
4502 #ifdef PERL_MAD
4503                     if (PL_madskills)
4504                         PL_thistoken = newSVpvs("\\E");
4505 #endif
4506                 }
4507                 PL_lex_allbrackets--;
4508                 return REPORT(')');
4509             }
4510             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4511                /* Got an unpaired \E */
4512                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4513                         "Useless use of \\E");
4514             }
4515 #ifdef PERL_MAD
4516             while (PL_bufptr != PL_bufend &&
4517               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4518                 if (!PL_thiswhite)
4519                     PL_thiswhite = newSVpvs("");
4520                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4521                 PL_bufptr += 2;
4522             }
4523 #else
4524             if (PL_bufptr != PL_bufend)
4525                 PL_bufptr += 2;
4526 #endif
4527             PL_lex_state = LEX_INTERPCONCAT;
4528             return yylex();
4529         }
4530         else {
4531             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4532               "### Saw case modifier\n"); });
4533             s = PL_bufptr + 1;
4534             if (s[1] == '\\' && s[2] == 'E') {
4535 #ifdef PERL_MAD
4536                 if (!PL_thiswhite)
4537                     PL_thiswhite = newSVpvs("");
4538                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4539 #endif
4540                 PL_bufptr = s + 3;
4541                 PL_lex_state = LEX_INTERPCONCAT;
4542                 return yylex();
4543             }
4544             else {
4545                 I32 tmp;
4546                 if (!PL_madskills) /* when just compiling don't need correct */
4547                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4548                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4549                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4550                     (strchr(PL_lex_casestack, 'L')
4551                         || strchr(PL_lex_casestack, 'U')
4552                         || strchr(PL_lex_casestack, 'F'))) {
4553                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4554                     PL_lex_allbrackets--;
4555                     return REPORT(')');
4556                 }
4557                 if (PL_lex_casemods > 10)
4558                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4559                 PL_lex_casestack[PL_lex_casemods++] = *s;
4560                 PL_lex_casestack[PL_lex_casemods] = '\0';
4561                 PL_lex_state = LEX_INTERPCONCAT;
4562                 start_force(PL_curforce);
4563                 NEXTVAL_NEXTTOKE.ival = 0;
4564                 force_next((2<<24)|'(');
4565                 start_force(PL_curforce);
4566                 if (*s == 'l')
4567                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4568                 else if (*s == 'u')
4569                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4570                 else if (*s == 'L')
4571                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4572                 else if (*s == 'U')
4573                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4574                 else if (*s == 'Q')
4575                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4576                 else if (*s == 'F')
4577                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4578                 else
4579                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4580                 if (PL_madskills) {
4581                     SV* const tmpsv = newSVpvs("\\ ");
4582                     /* replace the space with the character we want to escape
4583                      */
4584                     SvPVX(tmpsv)[1] = *s;
4585                     curmad('_', tmpsv);
4586                 }
4587                 PL_bufptr = s + 1;
4588             }
4589             force_next(FUNC);
4590             if (PL_lex_starts) {
4591                 s = PL_bufptr;
4592                 PL_lex_starts = 0;
4593 #ifdef PERL_MAD
4594                 if (PL_madskills) {
4595                     if (PL_thistoken)
4596                         sv_free(PL_thistoken);
4597                     PL_thistoken = newSVpvs("");
4598                 }
4599 #endif
4600                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4601                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4602                     OPERATOR(',');
4603                 else
4604                     Aop(OP_CONCAT);
4605             }
4606             else
4607                 return yylex();
4608         }
4609
4610     case LEX_INTERPPUSH:
4611         return REPORT(sublex_push());
4612
4613     case LEX_INTERPSTART:
4614         if (PL_bufptr == PL_bufend)
4615             return REPORT(sublex_done());
4616         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4617               "### Interpolated variable\n"); });
4618         PL_expect = XTERM;
4619         PL_lex_dojoin = (*PL_bufptr == '@');
4620         PL_lex_state = LEX_INTERPNORMAL;
4621         if (PL_lex_dojoin) {
4622             start_force(PL_curforce);
4623             NEXTVAL_NEXTTOKE.ival = 0;
4624             force_next(',');
4625             start_force(PL_curforce);
4626             force_ident("\"", '$');
4627             start_force(PL_curforce);
4628             NEXTVAL_NEXTTOKE.ival = 0;
4629             force_next('$');
4630             start_force(PL_curforce);
4631             NEXTVAL_NEXTTOKE.ival = 0;
4632             force_next((2<<24)|'(');
4633             start_force(PL_curforce);
4634             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4635             force_next(FUNC);
4636         }
4637         /* Convert (?{...}) and friends to 'do {...}' */
4638         if (PL_lex_inpat && *PL_bufptr == '(') {
4639             PL_sublex_info.re_eval_start = PL_bufptr;
4640             PL_bufptr += 2;
4641             if (*PL_bufptr != '{')
4642                 PL_bufptr++;
4643             start_force(PL_curforce);
4644             /* XXX probably need a CURMAD(something) here */
4645             PL_expect = XTERMBLOCK;
4646             force_next(DO);
4647         }
4648
4649         if (PL_lex_starts++) {
4650             s = PL_bufptr;
4651 #ifdef PERL_MAD
4652             if (PL_madskills) {
4653                 if (PL_thistoken)
4654                     sv_free(PL_thistoken);
4655                 PL_thistoken = newSVpvs("");
4656             }
4657 #endif
4658             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4659             if (!PL_lex_casemods && PL_lex_inpat)
4660                 OPERATOR(',');
4661             else
4662                 Aop(OP_CONCAT);
4663         }
4664         return yylex();
4665
4666     case LEX_INTERPENDMAYBE:
4667         if (intuit_more(PL_bufptr)) {
4668             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4669             break;
4670         }
4671         /* FALL THROUGH */
4672
4673     case LEX_INTERPEND:
4674         if (PL_lex_dojoin) {
4675             PL_lex_dojoin = FALSE;
4676             PL_lex_state = LEX_INTERPCONCAT;
4677 #ifdef PERL_MAD
4678             if (PL_madskills) {
4679                 if (PL_thistoken)
4680                     sv_free(PL_thistoken);
4681                 PL_thistoken = newSVpvs("");
4682             }
4683 #endif
4684             PL_lex_allbrackets--;
4685             return REPORT(')');
4686         }
4687         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4688             && SvEVALED(PL_lex_repl))
4689         {
4690             if (PL_bufptr != PL_bufend)
4691                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4692             PL_lex_repl = NULL;
4693         }
4694         if (PL_sublex_info.re_eval_start) {
4695             if (*PL_bufptr != ')')
4696                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4697             PL_bufptr++;
4698             /* having compiled a (?{..}) expression, return the original
4699              * text too, as a const */
4700             start_force(PL_curforce);
4701             /* XXX probably need a CURMAD(something) here */
4702             NEXTVAL_NEXTTOKE.opval =
4703                     (OP*)newSVOP(OP_CONST, 0,
4704                         newSVpvn(PL_sublex_info.re_eval_start,
4705                                 PL_bufptr - PL_sublex_info.re_eval_start));
4706             force_next(THING);
4707             PL_sublex_info.re_eval_start = NULL;
4708             PL_expect = XTERM;
4709             return REPORT(',');
4710         }
4711
4712         /* FALLTHROUGH */
4713     case LEX_INTERPCONCAT:
4714 #ifdef DEBUGGING
4715         if (PL_lex_brackets)
4716             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4717                        (long) PL_lex_brackets);
4718 #endif
4719         if (PL_bufptr == PL_bufend)
4720             return REPORT(sublex_done());
4721
4722         /* m'foo' still needs to be parsed for possible (?{...}) */
4723         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4724             SV *sv = newSVsv(PL_linestr);
4725             sv = tokeq(sv);
4726             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4727             s = PL_bufend;
4728         }
4729         else {
4730             s = scan_const(PL_bufptr);
4731             if (*s == '\\')
4732                 PL_lex_state = LEX_INTERPCASEMOD;
4733             else
4734                 PL_lex_state = LEX_INTERPSTART;
4735         }
4736
4737         if (s != PL_bufptr) {
4738             start_force(PL_curforce);
4739             if (PL_madskills) {
4740                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4741             }
4742             NEXTVAL_NEXTTOKE = pl_yylval;
4743             PL_expect = XTERM;
4744             force_next(THING);
4745             if (PL_lex_starts++) {
4746 #ifdef PERL_MAD
4747                 if (PL_madskills) {
4748                     if (PL_thistoken)
4749                         sv_free(PL_thistoken);
4750                     PL_thistoken = newSVpvs("");
4751                 }
4752 #endif
4753                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4754                 if (!PL_lex_casemods && PL_lex_inpat)
4755                     OPERATOR(',');
4756                 else
4757                     Aop(OP_CONCAT);
4758             }
4759             else {
4760                 PL_bufptr = s;
4761                 return yylex();
4762             }
4763         }
4764
4765         return yylex();
4766     case LEX_FORMLINE:
4767         PL_lex_state = LEX_NORMAL;
4768         s = scan_formline(PL_bufptr);
4769         if (!PL_lex_formbrack)
4770             goto rightbracket;
4771         OPERATOR(';');
4772     }
4773
4774     s = PL_bufptr;
4775     PL_oldoldbufptr = PL_oldbufptr;
4776     PL_oldbufptr = s;
4777
4778   retry:
4779 #ifdef PERL_MAD
4780     if (PL_thistoken) {
4781         sv_free(PL_thistoken);
4782         PL_thistoken = 0;
4783     }
4784     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4785 #endif
4786     switch (*s) {
4787     default:
4788         if (isIDFIRST_lazy_if(s,UTF))
4789             goto keylookup;
4790         {
4791         SV *dsv = newSVpvs_flags("", SVs_TEMP);
4792         const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4793                                                     UTF8SKIP(s),
4794                                                     SVs_TEMP | SVf_UTF8),
4795                                             10, UNI_DISPLAY_ISPRINT))
4796                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4797         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4798         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4799             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4800         } else {
4801             d = PL_linestart;
4802         }
4803         *s = '\0';
4804         sv_setpv(dsv, d);
4805         if (UTF)
4806             SvUTF8_on(dsv);
4807         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
4808     }
4809     case 4:
4810     case 26:
4811         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4812     case 0:
4813 #ifdef PERL_MAD
4814         if (PL_madskills)
4815             PL_faketokens = 0;
4816 #endif
4817         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4818             PL_last_uni = 0;
4819             PL_last_lop = 0;
4820             if (PL_lex_brackets &&
4821                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4822                 yyerror((const char *)
4823                         (PL_lex_formbrack
4824                          ? "Format not terminated"
4825                          : "Missing right curly or square bracket"));
4826             }
4827             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4828                         "### Tokener got EOF\n");
4829             } );
4830             TOKEN(0);
4831         }
4832         if (s++ < PL_bufend)
4833             goto retry;                 /* ignore stray nulls */
4834         PL_last_uni = 0;
4835         PL_last_lop = 0;
4836         if (!PL_in_eval && !PL_preambled) {
4837             PL_preambled = TRUE;
4838 #ifdef PERL_MAD
4839             if (PL_madskills)
4840                 PL_faketokens = 1;
4841 #endif
4842             if (PL_perldb) {
4843                 /* Generate a string of Perl code to load the debugger.
4844                  * If PERL5DB is set, it will return the contents of that,
4845                  * otherwise a compile-time require of perl5db.pl.  */
4846
4847                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4848
4849                 if (pdb) {
4850                     sv_setpv(PL_linestr, pdb);
4851                     sv_catpvs(PL_linestr,";");
4852                 } else {
4853                     SETERRNO(0,SS_NORMAL);
4854                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4855                 }
4856             } else
4857                 sv_setpvs(PL_linestr,"");
4858             if (PL_preambleav) {
4859                 SV **svp = AvARRAY(PL_preambleav);
4860                 SV **const end = svp + AvFILLp(PL_preambleav);
4861                 while(svp <= end) {
4862                     sv_catsv(PL_linestr, *svp);
4863                     ++svp;
4864                     sv_catpvs(PL_linestr, ";");
4865                 }
4866                 sv_free(MUTABLE_SV(PL_preambleav));
4867                 PL_preambleav = NULL;
4868             }
4869             if (PL_minus_E)
4870                 sv_catpvs(PL_linestr,
4871                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4872             if (PL_minus_n || PL_minus_p) {
4873                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4874                 if (PL_minus_l)
4875                     sv_catpvs(PL_linestr,"chomp;");
4876                 if (PL_minus_a) {
4877                     if (PL_minus_F) {
4878                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4879                              || *PL_splitstr == '"')
4880                               && strchr(PL_splitstr + 1, *PL_splitstr))
4881                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4882                         else {
4883                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4884                                bytes can be used as quoting characters.  :-) */
4885                             const char *splits = PL_splitstr;
4886                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4887                             do {
4888                                 /* Need to \ \s  */
4889                                 if (*splits == '\\')
4890                                     sv_catpvn(PL_linestr, splits, 1);
4891                                 sv_catpvn(PL_linestr, splits, 1);
4892                             } while (*splits++);
4893                             /* This loop will embed the trailing NUL of
4894                                PL_linestr as the last thing it does before
4895                                terminating.  */
4896                             sv_catpvs(PL_linestr, ");");
4897                         }
4898                     }
4899                     else
4900                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4901                 }
4902             }
4903             sv_catpvs(PL_linestr, "\n");
4904             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4905             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4906             PL_last_lop = PL_last_uni = NULL;
4907             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4908                 update_debugger_info(PL_linestr, NULL, 0);
4909             goto retry;
4910         }
4911         do {
4912             fake_eof = 0;
4913             bof = PL_rsfp ? TRUE : FALSE;
4914             if (0) {
4915               fake_eof:
4916                 fake_eof = LEX_FAKE_EOF;
4917             }
4918             PL_bufptr = PL_bufend;
4919             CopLINE_inc(PL_curcop);
4920             if (!lex_next_chunk(fake_eof)) {
4921                 CopLINE_dec(PL_curcop);
4922                 s = PL_bufptr;
4923                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4924             }
4925             CopLINE_dec(PL_curcop);
4926 #ifdef PERL_MAD
4927             if (!PL_rsfp)
4928                 PL_realtokenstart = -1;
4929 #endif
4930             s = PL_bufptr;
4931             /* If it looks like the start of a BOM or raw UTF-16,
4932              * check if it in fact is. */
4933             if (bof && PL_rsfp &&
4934                      (*s == 0 ||
4935                       *(U8*)s == 0xEF ||
4936                       *(U8*)s >= 0xFE ||
4937                       s[1] == 0)) {
4938                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4939                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4940 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4941                 /* offset may include swallowed CR */
4942                 if (!bof)
4943                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4944 #endif
4945                 if (bof) {
4946                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4947                     s = swallow_bom((U8*)s);
4948                 }
4949             }
4950             if (PL_parser->in_pod) {
4951                 /* Incest with pod. */
4952 #ifdef PERL_MAD
4953                 if (PL_madskills)
4954                     sv_catsv(PL_thiswhite, PL_linestr);
4955 #endif
4956                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4957                     sv_setpvs(PL_linestr, "");
4958                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4959                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4960                     PL_last_lop = PL_last_uni = NULL;
4961                     PL_parser->in_pod = 0;
4962                 }
4963             }
4964             if (PL_rsfp || PL_parser->filtered)
4965                 incline(s);
4966         } while (PL_parser->in_pod);
4967         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4968         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4969         PL_last_lop = PL_last_uni = NULL;
4970         if (CopLINE(PL_curcop) == 1) {
4971             while (s < PL_bufend && isSPACE(*s))
4972                 s++;
4973             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4974                 s++;
4975 #ifdef PERL_MAD
4976             if (PL_madskills)
4977                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4978 #endif
4979             d = NULL;
4980             if (!PL_in_eval) {
4981                 if (*s == '#' && *(s+1) == '!')
4982                     d = s + 2;
4983 #ifdef ALTERNATE_SHEBANG
4984                 else {
4985                     static char const as[] = ALTERNATE_SHEBANG;
4986                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4987                         d = s + (sizeof(as) - 1);
4988                 }
4989 #endif /* ALTERNATE_SHEBANG */
4990             }
4991             if (d) {
4992                 char *ipath;
4993                 char *ipathend;
4994
4995                 while (isSPACE(*d))
4996                     d++;
4997                 ipath = d;
4998                 while (*d && !isSPACE(*d))
4999                     d++;
5000                 ipathend = d;
5001
5002 #ifdef ARG_ZERO_IS_SCRIPT
5003                 if (ipathend > ipath) {
5004                     /*
5005                      * HP-UX (at least) sets argv[0] to the script name,
5006                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5007                      * at least, set argv[0] to the basename of the Perl
5008                      * interpreter. So, having found "#!", we'll set it right.
5009                      */
5010                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5011                                                     SVt_PV)); /* $^X */
5012                     assert(SvPOK(x) || SvGMAGICAL(x));
5013                     if (sv_eq(x, CopFILESV(PL_curcop))) {
5014                         sv_setpvn(x, ipath, ipathend - ipath);
5015                         SvSETMAGIC(x);
5016                     }
5017                     else {
5018                         STRLEN blen;
5019                         STRLEN llen;
5020                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5021                         const char * const lstart = SvPV_const(x,llen);
5022                         if (llen < blen) {
5023                             bstart += blen - llen;
5024                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5025                                 sv_setpvn(x, ipath, ipathend - ipath);
5026                                 SvSETMAGIC(x);
5027                             }
5028                         }
5029                     }
5030                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5031                 }
5032 #endif /* ARG_ZERO_IS_SCRIPT */
5033
5034                 /*
5035                  * Look for options.
5036                  */
5037                 d = instr(s,"perl -");
5038                 if (!d) {
5039                     d = instr(s,"perl");
5040 #if defined(DOSISH)
5041                     /* avoid getting into infinite loops when shebang
5042                      * line contains "Perl" rather than "perl" */
5043                     if (!d) {
5044                         for (d = ipathend-4; d >= ipath; --d) {
5045                             if ((*d == 'p' || *d == 'P')
5046                                 && !ibcmp(d, "perl", 4))
5047                             {
5048                                 break;
5049                             }
5050                         }
5051                         if (d < ipath)
5052                             d = NULL;
5053                     }
5054 #endif
5055                 }
5056 #ifdef ALTERNATE_SHEBANG
5057                 /*
5058                  * If the ALTERNATE_SHEBANG on this system starts with a
5059                  * character that can be part of a Perl expression, then if
5060                  * we see it but not "perl", we're probably looking at the
5061                  * start of Perl code, not a request to hand off to some
5062                  * other interpreter.  Similarly, if "perl" is there, but
5063                  * not in the first 'word' of the line, we assume the line
5064                  * contains the start of the Perl program.
5065                  */
5066                 if (d && *s != '#') {
5067                     const char *c = ipath;
5068                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5069                         c++;
5070                     if (c < d)
5071                         d = NULL;       /* "perl" not in first word; ignore */
5072                     else
5073                         *s = '#';       /* Don't try to parse shebang line */
5074                 }
5075 #endif /* ALTERNATE_SHEBANG */
5076                 if (!d &&
5077                     *s == '#' &&
5078                     ipathend > ipath &&
5079                     !PL_minus_c &&
5080                     !instr(s,"indir") &&
5081                     instr(PL_origargv[0],"perl"))
5082                 {
5083                     dVAR;
5084                     char **newargv;
5085
5086                     *ipathend = '\0';
5087                     s = ipathend + 1;
5088                     while (s < PL_bufend && isSPACE(*s))
5089                         s++;
5090                     if (s < PL_bufend) {
5091                         Newx(newargv,PL_origargc+3,char*);
5092                         newargv[1] = s;
5093                         while (s < PL_bufend && !isSPACE(*s))
5094                             s++;
5095                         *s = '\0';
5096                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5097                     }
5098                     else
5099                         newargv = PL_origargv;
5100                     newargv[0] = ipath;
5101                     PERL_FPU_PRE_EXEC
5102                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5103                     PERL_FPU_POST_EXEC
5104                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5105                 }
5106                 if (d) {
5107                     while (*d && !isSPACE(*d))
5108                         d++;
5109                     while (SPACE_OR_TAB(*d))
5110                         d++;
5111
5112                     if (*d++ == '-') {
5113                         const bool switches_done = PL_doswitches;
5114                         const U32 oldpdb = PL_perldb;
5115                         const bool oldn = PL_minus_n;
5116                         const bool oldp = PL_minus_p;
5117                         const char *d1 = d;
5118
5119                         do {
5120                             bool baduni = FALSE;
5121                             if (*d1 == 'C') {
5122                                 const char *d2 = d1 + 1;
5123                                 if (parse_unicode_opts((const char **)&d2)
5124                                     != PL_unicode)
5125                                     baduni = TRUE;
5126                             }
5127                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5128                                 const char * const m = d1;
5129                                 while (*d1 && !isSPACE(*d1))
5130                                     d1++;
5131                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5132                                       (int)(d1 - m), m);
5133                             }
5134                             d1 = moreswitches(d1);
5135                         } while (d1);
5136                         if (PL_doswitches && !switches_done) {
5137                             int argc = PL_origargc;
5138                             char **argv = PL_origargv;
5139                             do {
5140                                 argc--,argv++;
5141                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5142                             init_argv_symbols(argc,argv);
5143                         }
5144                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5145                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5146                               /* if we have already added "LINE: while (<>) {",
5147                                  we must not do it again */
5148                         {
5149                             sv_setpvs(PL_linestr, "");
5150                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5151                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5152                             PL_last_lop = PL_last_uni = NULL;
5153                             PL_preambled = FALSE;
5154                             if (PERLDB_LINE || PERLDB_SAVESRC)
5155                                 (void)gv_fetchfile(PL_origfilename);
5156                             goto retry;
5157                         }
5158                     }
5159                 }
5160             }
5161         }
5162         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5163             PL_bufptr = s;
5164             PL_lex_state = LEX_FORMLINE;
5165             return yylex();
5166         }
5167         goto retry;
5168     case '\r':
5169 #ifdef PERL_STRICT_CR
5170         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5171         Perl_croak(aTHX_
5172       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5173 #endif
5174     case ' ': case '\t': case '\f': case 013:
5175 #ifdef PERL_MAD
5176         PL_realtokenstart = -1;
5177         if (!PL_thiswhite)
5178             PL_thiswhite = newSVpvs("");
5179         sv_catpvn(PL_thiswhite, s, 1);
5180 #endif
5181         s++;
5182         goto retry;
5183     case '#':
5184     case '\n':
5185 #ifdef PERL_MAD
5186         PL_realtokenstart = -1;
5187         if (PL_madskills)
5188             PL_faketokens = 0;
5189 #endif
5190         if (PL_lex_state != LEX_NORMAL ||
5191              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5192             if (*s == '#' && s == PL_linestart && PL_in_eval
5193              && !PL_rsfp && !PL_parser->filtered) {
5194                 /* handle eval qq[#line 1 "foo"\n ...] */
5195                 CopLINE_dec(PL_curcop);
5196                 incline(s);
5197             }
5198             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5199                 s = SKIPSPACE0(s);
5200                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5201                     incline(s);
5202             }
5203             else {
5204                 d = s;
5205                 while (d < PL_bufend && *d != '\n')
5206                     d++;
5207                 if (d < PL_bufend)
5208                     d++;
5209                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5210                     Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5211                                d, PL_bufend);
5212 #ifdef PERL_MAD
5213                 if (PL_madskills)
5214                     PL_thiswhite = newSVpvn(s, d - s);
5215 #endif
5216                 s = d;
5217                 incline(s);
5218             }
5219             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5220                 PL_bufptr = s;
5221                 PL_lex_state = LEX_FORMLINE;
5222                 return yylex();
5223             }
5224         }
5225         else {
5226 #ifdef PERL_MAD
5227             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5228                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5229                     PL_faketokens = 0;
5230                     s = SKIPSPACE0(s);
5231                     TOKEN(PEG); /* make sure any #! line is accessible */
5232                 }
5233                 s = SKIPSPACE0(s);
5234             }
5235             else {
5236 /*              if (PL_madskills && PL_lex_formbrack) { */
5237                     d = s;
5238                     while (d < PL_bufend && *d != '\n')
5239                         d++;
5240                     if (d < PL_bufend)
5241                         d++;
5242                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5243                       Perl_croak(aTHX_ "panic: input overflow");
5244                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5245                         if (!PL_thiswhite)
5246                             PL_thiswhite = newSVpvs("");
5247                         if (CopLINE(PL_curcop) == 1) {
5248                             sv_setpvs(PL_thiswhite, "");
5249                             PL_faketokens = 0;
5250                         }
5251                         sv_catpvn(PL_thiswhite, s, d - s);
5252                     }
5253                     s = d;
5254 /*              }
5255                 *s = '\0';
5256                 PL_bufend = s; */
5257             }
5258 #else
5259             *s = '\0';
5260             PL_bufend = s;
5261 #endif
5262         }
5263         goto retry;
5264     case '-':
5265         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5266             I32 ftst = 0;
5267             char tmp;
5268
5269             s++;
5270             PL_bufptr = s;
5271             tmp = *s++;
5272
5273             while (s < PL_bufend && SPACE_OR_TAB(*s))
5274                 s++;
5275
5276             if (strnEQ(s,"=>",2)) {
5277                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5278                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5279                 OPERATOR('-');          /* unary minus */
5280             }
5281             PL_last_uni = PL_oldbufptr;
5282             switch (tmp) {
5283             case 'r': ftst = OP_FTEREAD;        break;
5284             case 'w': ftst = OP_FTEWRITE;       break;
5285             case 'x': ftst = OP_FTEEXEC;        break;
5286             case 'o': ftst = OP_FTEOWNED;       break;
5287             case 'R': ftst = OP_FTRREAD;        break;
5288             case 'W': ftst = OP_FTRWRITE;       break;
5289             case 'X': ftst = OP_FTREXEC;        break;
5290             case 'O': ftst = OP_FTROWNED;       break;
5291             case 'e': ftst = OP_FTIS;           break;
5292             case 'z': ftst = OP_FTZERO;         break;
5293             case 's': ftst = OP_FTSIZE;         break;
5294             case 'f': ftst = OP_FTFILE;         break;
5295             case 'd': ftst = OP_FTDIR;          break;
5296             case 'l': ftst = OP_FTLINK;         break;
5297             case 'p': ftst = OP_FTPIPE;         break;
5298             case 'S': ftst = OP_FTSOCK;         break;
5299             case 'u': ftst = OP_FTSUID;         break;
5300             case 'g': ftst = OP_FTSGID;         break;
5301             case 'k': ftst = OP_FTSVTX;         break;
5302             case 'b': ftst = OP_FTBLK;          break;
5303             case 'c': ftst = OP_FTCHR;          break;
5304             case 't': ftst = OP_FTTTY;          break;
5305             case 'T': ftst = OP_FTTEXT;         break;
5306             case 'B': ftst = OP_FTBINARY;       break;
5307             case 'M': case 'A': case 'C':
5308                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5309                 switch (tmp) {
5310                 case 'M': ftst = OP_FTMTIME;    break;
5311                 case 'A': ftst = OP_FTATIME;    break;
5312                 case 'C': ftst = OP_FTCTIME;    break;
5313                 default:                        break;
5314                 }
5315                 break;
5316             default:
5317                 break;
5318             }
5319             if (ftst) {
5320                 PL_last_lop_op = (OPCODE)ftst;
5321                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5322                         "### Saw file test %c\n", (int)tmp);
5323                 } );
5324                 FTST(ftst);
5325             }
5326             else {
5327                 /* Assume it was a minus followed by a one-letter named
5328                  * subroutine call (or a -bareword), then. */
5329                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5330                         "### '-%c' looked like a file test but was not\n",
5331                         (int) tmp);
5332                 } );
5333                 s = --PL_bufptr;
5334             }
5335         }
5336         {
5337             const char tmp = *s++;
5338             if (*s == tmp) {
5339                 s++;
5340                 if (PL_expect == XOPERATOR)
5341                     TERM(POSTDEC);
5342                 else
5343                     OPERATOR(PREDEC);
5344             }
5345             else if (*s == '>') {
5346                 s++;
5347                 s = SKIPSPACE1(s);
5348                 if (isIDFIRST_lazy_if(s,UTF)) {
5349                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5350                     TOKEN(ARROW);
5351                 }
5352                 else if (*s == '$')
5353                     OPERATOR(ARROW);
5354                 else
5355                     TERM(ARROW);
5356             }
5357             if (PL_expect == XOPERATOR) {
5358                 if (*s == '=' && !PL_lex_allbrackets &&
5359                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5360                     s--;
5361                     TOKEN(0);
5362                 }
5363                 Aop(OP_SUBTRACT);
5364             }
5365             else {
5366                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5367                     check_uni();
5368                 OPERATOR('-');          /* unary minus */
5369             }
5370         }
5371
5372     case '+':
5373         {
5374             const char tmp = *s++;
5375             if (*s == tmp) {
5376                 s++;
5377                 if (PL_expect == XOPERATOR)
5378                     TERM(POSTINC);
5379                 else
5380                     OPERATOR(PREINC);
5381             }
5382             if (PL_expect == XOPERATOR) {
5383                 if (*s == '=' && !PL_lex_allbrackets &&
5384                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5385                     s--;
5386                     TOKEN(0);
5387                 }
5388                 Aop(OP_ADD);
5389             }
5390             else {
5391                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5392                     check_uni();
5393                 OPERATOR('+');
5394             }
5395         }
5396
5397     case '*':
5398         if (PL_expect != XOPERATOR) {
5399             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5400             PL_expect = XOPERATOR;
5401             force_ident(PL_tokenbuf, '*');
5402             if (!*PL_tokenbuf)
5403                 PREREF('*');
5404             TERM('*');
5405         }
5406         s++;
5407         if (*s == '*') {
5408             s++;
5409             if (*s == '=' && !PL_lex_allbrackets &&
5410                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5411                 s -= 2;
5412                 TOKEN(0);
5413             }
5414             PWop(OP_POW);
5415         }
5416         if (*s == '=' && !PL_lex_allbrackets &&
5417                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5418             s--;
5419             TOKEN(0);
5420         }
5421         Mop(OP_MULTIPLY);
5422
5423     case '%':
5424         if (PL_expect == XOPERATOR) {
5425             if (s[1] == '=' && !PL_lex_allbrackets &&
5426                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5427                 TOKEN(0);
5428             ++s;
5429             Mop(OP_MODULO);
5430         }
5431         PL_tokenbuf[0] = '%';
5432         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5433                 sizeof PL_tokenbuf - 1, FALSE);
5434         if (!PL_tokenbuf[1]) {
5435             PREREF('%');
5436         }
5437         PL_pending_ident = '%';
5438         TERM('%');
5439
5440     case '^':
5441         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5442                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5443             TOKEN(0);
5444         s++;
5445         BOop(OP_BIT_XOR);
5446     case '[':
5447         if (PL_lex_brackets > 100)
5448             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5449         PL_lex_brackstack[PL_lex_brackets++] = 0;
5450         PL_lex_allbrackets++;
5451         {
5452             const char tmp = *s++;
5453             OPERATOR(tmp);
5454         }
5455     case '~':
5456         if (s[1] == '~'
5457             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5458         {
5459             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5460                 TOKEN(0);
5461             s += 2;
5462             Eop(OP_SMARTMATCH);
5463         }
5464         s++;
5465         OPERATOR('~');
5466     case ',':
5467         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5468             TOKEN(0);
5469         s++;
5470         OPERATOR(',');
5471     case ':':
5472         if (s[1] == ':') {
5473             len = 0;
5474             goto just_a_word_zero_gv;
5475         }
5476         s++;
5477         switch (PL_expect) {
5478             OP *attrs;
5479 #ifdef PERL_MAD
5480             I32 stuffstart;
5481 #endif
5482         case XOPERATOR:
5483             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5484                 break;
5485             PL_bufptr = s;      /* update in case we back off */
5486             if (*s == '=') {
5487                 Perl_croak(aTHX_
5488                            "Use of := for an empty attribute list is not allowed");
5489             }
5490             goto grabattrs;
5491         case XATTRBLOCK:
5492             PL_expect = XBLOCK;
5493             goto grabattrs;
5494         case XATTRTERM:
5495             PL_expect = XTERMBLOCK;
5496          grabattrs:
5497 #ifdef PERL_MAD
5498             stuffstart = s - SvPVX(PL_linestr) - 1;
5499 #endif
5500             s = PEEKSPACE(s);
5501             attrs = NULL;
5502             while (isIDFIRST_lazy_if(s,UTF)) {
5503                 I32 tmp;
5504                 SV *sv;
5505                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5506                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5507                     if (tmp < 0) tmp = -tmp;
5508                     switch (tmp) {
5509                     case KEY_or:
5510                     case KEY_and:
5511                     case KEY_for:
5512                     case KEY_foreach:
5513                     case KEY_unless:
5514                     case KEY_if:
5515                     case KEY_while:
5516                     case KEY_until:
5517                         goto got_attrs;
5518                     default:
5519                         break;
5520                     }
5521                 }
5522                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5523                 if (*d == '(') {
5524                     d = scan_str(d,TRUE,TRUE,FALSE);
5525                     if (!d) {
5526                         /* MUST advance bufptr here to avoid bogus
5527                            "at end of line" context messages from yyerror().
5528                          */
5529                         PL_bufptr = s + len;
5530                         yyerror("Unterminated attribute parameter in attribute list");
5531                         if (attrs)
5532                             op_free(attrs);
5533                         sv_free(sv);
5534                         return REPORT(0);       /* EOF indicator */
5535                     }
5536                 }
5537                 if (PL_lex_stuff) {
5538                     sv_catsv(sv, PL_lex_stuff);
5539                     attrs = op_append_elem(OP_LIST, attrs,
5540                                         newSVOP(OP_CONST, 0, sv));
5541                     SvREFCNT_dec(PL_lex_stuff);
5542                     PL_lex_stuff = NULL;
5543                 }
5544                 else {
5545                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5546                         sv_free(sv);
5547                         if (PL_in_my == KEY_our) {
5548                             deprecate(":unique");
5549                         }
5550                         else
5551                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5552                     }
5553
5554                     /* NOTE: any CV attrs applied here need to be part of
5555                        the CVf_BUILTIN_ATTRS define in cv.h! */
5556                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5557                         sv_free(sv);
5558                         CvLVALUE_on(PL_compcv);
5559                     }
5560                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5561                         sv_free(sv);
5562                         deprecate(":locked");
5563                     }
5564                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5565                         sv_free(sv);
5566                         CvMETHOD_on(PL_compcv);
5567                     }
5568                     /* After we've set the flags, it could be argued that
5569                        we don't need to do the attributes.pm-based setting
5570                        process, and shouldn't bother appending recognized
5571                        flags.  To experiment with that, uncomment the
5572                        following "else".  (Note that's already been
5573                        uncommented.  That keeps the above-applied built-in
5574                        attributes from being intercepted (and possibly
5575                        rejected) by a package's attribute routines, but is
5576                        justified by the performance win for the common case
5577                        of applying only built-in attributes.) */
5578                     else
5579                         attrs = op_append_elem(OP_LIST, attrs,
5580                                             newSVOP(OP_CONST, 0,
5581                                                     sv));
5582                 }
5583                 s = PEEKSPACE(d);
5584                 if (*s == ':' && s[1] != ':')
5585                     s = PEEKSPACE(s+1);
5586                 else if (s == d)
5587                     break;      /* require real whitespace or :'s */
5588                 /* XXX losing whitespace on sequential attributes here */
5589             }
5590             {
5591                 const char tmp
5592                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5593                 if (*s != ';' && *s != '}' && *s != tmp
5594                     && (tmp != '=' || *s != ')')) {
5595                     const char q = ((*s == '\'') ? '"' : '\'');
5596                     /* If here for an expression, and parsed no attrs, back
5597                        off. */
5598                     if (tmp == '=' && !attrs) {
5599                         s = PL_bufptr;
5600                         break;
5601                     }
5602                     /* MUST advance bufptr here to avoid bogus "at end of line"
5603                        context messages from yyerror().
5604                     */
5605                     PL_bufptr = s;
5606                     yyerror( (const char *)
5607                              (*s
5608                               ? Perl_form(aTHX_ "Invalid separator character "
5609                                           "%c%c%c in attribute list", q, *s, q)
5610                               : "Unterminated attribute list" ) );
5611                     if (attrs)
5612                         op_free(attrs);
5613                     OPERATOR(':');
5614                 }
5615             }
5616         got_attrs:
5617             if (attrs) {
5618                 start_force(PL_curforce);
5619                 NEXTVAL_NEXTTOKE.opval = attrs;
5620                 CURMAD('_', PL_nextwhite);
5621                 force_next(THING);
5622             }
5623 #ifdef PERL_MAD
5624             if (PL_madskills) {
5625                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5626                                      (s - SvPVX(PL_linestr)) - stuffstart);
5627             }
5628 #endif
5629             TOKEN(COLONATTR);
5630         }
5631         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5632             s--;
5633             TOKEN(0);
5634         }
5635         PL_lex_allbrackets--;
5636         OPERATOR(':');
5637     case '(':
5638         s++;
5639         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5640             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5641         else
5642             PL_expect = XTERM;
5643         s = SKIPSPACE1(s);
5644         PL_lex_allbrackets++;
5645         TOKEN('(');
5646     case ';':
5647         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5648             TOKEN(0);
5649         CLINE;
5650         s++;
5651         OPERATOR(';');
5652     case ')':
5653         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5654             TOKEN(0);
5655         s++;
5656         PL_lex_allbrackets--;
5657         s = SKIPSPACE1(s);
5658         if (*s == '{')
5659             PREBLOCK(')');
5660         TERM(')');
5661     case ']':
5662         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5663             TOKEN(0);
5664         s++;
5665         if (PL_lex_brackets <= 0)
5666             yyerror("Unmatched right square bracket");
5667         else
5668             --PL_lex_brackets;
5669         PL_lex_allbrackets--;
5670         if (PL_lex_state == LEX_INTERPNORMAL) {
5671             if (PL_lex_brackets == 0) {
5672                 if (*s == '-' && s[1] == '>')
5673                     PL_lex_state = LEX_INTERPENDMAYBE;
5674                 else if (*s != '[' && *s != '{')
5675                     PL_lex_state = LEX_INTERPEND;
5676             }
5677         }
5678         TERM(']');
5679     case '{':
5680       leftbracket:
5681         s++;
5682         if (PL_lex_brackets > 100) {
5683             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5684         }
5685         switch (PL_expect) {
5686         case XTERM:
5687             if (PL_lex_formbrack) {
5688                 s--;
5689                 PRETERMBLOCK(DO);
5690             }
5691             if (PL_oldoldbufptr == PL_last_lop)
5692                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5693             else
5694                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5695             PL_lex_allbrackets++;
5696             OPERATOR(HASHBRACK);
5697         case XOPERATOR:
5698             while (s < PL_bufend && SPACE_OR_TAB(*s))
5699                 s++;
5700             d = s;
5701             PL_tokenbuf[0] = '\0';
5702             if (d < PL_bufend && *d == '-') {
5703                 PL_tokenbuf[0] = '-';
5704                 d++;
5705                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5706                     d++;
5707             }
5708             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5709                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5710                               FALSE, &len);
5711                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5712                     d++;
5713                 if (*d == '}') {
5714                     const char minus = (PL_tokenbuf[0] == '-');
5715                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5716                     if (minus)
5717                         force_next('-');
5718                 }
5719             }
5720             /* FALL THROUGH */
5721         case XATTRBLOCK:
5722         case XBLOCK:
5723             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5724             PL_lex_allbrackets++;
5725             PL_expect = XSTATE;
5726             break;
5727         case XATTRTERM:
5728         case XTERMBLOCK:
5729             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5730             PL_lex_allbrackets++;
5731             PL_expect = XSTATE;
5732             break;
5733         default: {
5734                 const char *t;
5735                 if (PL_oldoldbufptr == PL_last_lop)
5736                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5737                 else
5738                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5739                 PL_lex_allbrackets++;
5740                 s = SKIPSPACE1(s);
5741                 if (*s == '}') {
5742                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5743                         PL_expect = XTERM;
5744                         /* This hack is to get the ${} in the message. */
5745                         PL_bufptr = s+1;
5746                         yyerror("syntax error");
5747                         break;
5748                     }
5749                     OPERATOR(HASHBRACK);
5750                 }
5751                 /* This hack serves to disambiguate a pair of curlies
5752                  * as being a block or an anon hash.  Normally, expectation
5753                  * determines that, but in cases where we're not in a
5754                  * position to expect anything in particular (like inside
5755                  * eval"") we have to resolve the ambiguity.  This code
5756                  * covers the case where the first term in the curlies is a
5757                  * quoted string.  Most other cases need to be explicitly
5758                  * disambiguated by prepending a "+" before the opening
5759                  * curly in order to force resolution as an anon hash.
5760                  *
5761                  * XXX should probably propagate the outer expectation
5762                  * into eval"" to rely less on this hack, but that could
5763                  * potentially break current behavior of eval"".
5764                  * GSAR 97-07-21
5765                  */
5766                 t = s;
5767                 if (*s == '\'' || *s == '"' || *s == '`') {
5768                     /* common case: get past first string, handling escapes */
5769                     for (t++; t < PL_bufend && *t != *s;)
5770                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5771                             t++;
5772                     t++;
5773                 }
5774                 else if (*s == 'q') {
5775                     if (++t < PL_bufend
5776                         && (!isALNUM(*t)
5777                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5778                                 && !isALNUM(*t))))
5779                     {
5780                         /* skip q//-like construct */
5781                         const char *tmps;
5782                         char open, close, term;
5783                         I32 brackets = 1;
5784
5785                         while (t < PL_bufend && isSPACE(*t))
5786                             t++;
5787                         /* check for q => */
5788                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5789                             OPERATOR(HASHBRACK);
5790                         }
5791                         term = *t;
5792                         open = term;
5793                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5794                             term = tmps[5];
5795                         close = term;
5796                         if (open == close)
5797                             for (t++; t < PL_bufend; t++) {
5798                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5799                                     t++;
5800                                 else if (*t == open)
5801                                     break;
5802                             }
5803                         else {
5804                             for (t++; t < PL_bufend; t++) {
5805                                 if (*t == '\\' && t+1 < PL_bufend)
5806                                     t++;
5807                                 else if (*t == close && --brackets <= 0)
5808                                     break;
5809                                 else if (*t == open)
5810                                     brackets++;
5811                             }
5812                         }
5813                         t++;
5814                     }
5815                     else
5816                         /* skip plain q word */
5817                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5818                              t += UTF8SKIP(t);
5819                 }
5820                 else if (isALNUM_lazy_if(t,UTF)) {
5821                     t += UTF8SKIP(t);
5822                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5823                          t += UTF8SKIP(t);
5824                 }
5825                 while (t < PL_bufend && isSPACE(*t))
5826                     t++;
5827                 /* if comma follows first term, call it an anon hash */
5828                 /* XXX it could be a comma expression with loop modifiers */
5829                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5830                                    || (*t == '=' && t[1] == '>')))
5831                     OPERATOR(HASHBRACK);
5832                 if (PL_expect == XREF)
5833                     PL_expect = XTERM;
5834                 else {
5835                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5836                     PL_expect = XSTATE;
5837                 }
5838             }
5839             break;
5840         }
5841         pl_yylval.ival = CopLINE(PL_curcop);
5842         if (isSPACE(*s) || *s == '#')
5843             PL_copline = NOLINE;   /* invalidate current command line number */
5844         TOKEN('{');
5845     case '}':
5846         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5847             TOKEN(0);
5848       rightbracket:
5849         s++;
5850         if (PL_lex_brackets <= 0)
5851             yyerror("Unmatched right curly bracket");
5852         else
5853             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5854         PL_lex_allbrackets--;
5855         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5856             PL_lex_formbrack = 0;
5857         if (PL_lex_state == LEX_INTERPNORMAL) {
5858             if (PL_lex_brackets == 0) {
5859                 if (PL_expect & XFAKEBRACK) {
5860                     PL_expect &= XENUMMASK;
5861                     PL_lex_state = LEX_INTERPEND;
5862                     PL_bufptr = s;
5863 #if 0
5864                     if (PL_madskills) {
5865                         if (!PL_thiswhite)
5866                             PL_thiswhite = newSVpvs("");
5867                         sv_catpvs(PL_thiswhite,"}");
5868                     }
5869 #endif
5870                     return yylex();     /* ignore fake brackets */
5871                 }
5872                 if (*s == '-' && s[1] == '>')
5873                     PL_lex_state = LEX_INTERPENDMAYBE;
5874                 else if (*s != '[' && *s != '{')
5875                     PL_lex_state = LEX_INTERPEND;
5876             }
5877         }
5878         if (PL_expect & XFAKEBRACK) {
5879             PL_expect &= XENUMMASK;
5880             PL_bufptr = s;
5881             return yylex();             /* ignore fake brackets */
5882         }
5883         start_force(PL_curforce);
5884         if (PL_madskills) {
5885             curmad('X', newSVpvn(s-1,1));
5886             CURMAD('_', PL_thiswhite);
5887         }
5888         force_next('}');
5889 #ifdef PERL_MAD
5890         if (!PL_thistoken)
5891             PL_thistoken = newSVpvs("");
5892 #endif
5893         TOKEN(';');
5894     case '&':
5895         s++;
5896         if (*s++ == '&') {
5897             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5898                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5899                 s -= 2;
5900                 TOKEN(0);
5901             }
5902             AOPERATOR(ANDAND);
5903         }
5904         s--;
5905         if (PL_expect == XOPERATOR) {
5906             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5907                 && isIDFIRST_lazy_if(s,UTF))
5908             {
5909                 CopLINE_dec(PL_curcop);
5910                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5911                 CopLINE_inc(PL_curcop);
5912             }
5913             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5914                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5915                 s--;
5916                 TOKEN(0);
5917             }
5918             BAop(OP_BIT_AND);
5919         }
5920
5921         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5922         if (*PL_tokenbuf) {
5923             PL_expect = XOPERATOR;
5924             force_ident(PL_tokenbuf, '&');
5925         }
5926         else
5927             PREREF('&');
5928         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5929         TERM('&');
5930
5931     case '|':
5932         s++;
5933         if (*s++ == '|') {
5934             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5935                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5936                 s -= 2;
5937                 TOKEN(0);
5938             }
5939             AOPERATOR(OROR);
5940         }
5941         s--;
5942         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5943                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5944             s--;
5945             TOKEN(0);
5946         }
5947         BOop(OP_BIT_OR);
5948     case '=':
5949         s++;
5950         {
5951             const char tmp = *s++;
5952             if (tmp == '=') {
5953                 if (!PL_lex_allbrackets &&
5954                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5955                     s -= 2;
5956                     TOKEN(0);
5957                 }
5958                 Eop(OP_EQ);
5959             }
5960             if (tmp == '>') {
5961                 if (!PL_lex_allbrackets &&
5962                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5963                     s -= 2;
5964                     TOKEN(0);
5965                 }
5966                 OPERATOR(',');
5967             }
5968             if (tmp == '~')
5969                 PMop(OP_MATCH);
5970             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5971                 && strchr("+-*/%.^&|<",tmp))
5972                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5973                             "Reversed %c= operator",(int)tmp);
5974             s--;
5975             if (PL_expect == XSTATE && isALPHA(tmp) &&
5976                 (s == PL_linestart+1 || s[-2] == '\n') )
5977                 {
5978                     if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
5979                         d = PL_bufend;
5980                         while (s < d) {
5981                             if (*s++ == '\n') {
5982                                 incline(s);
5983                                 if (strnEQ(s,"=cut",4)) {
5984                                     s = strchr(s,'\n');
5985                                     if (s)
5986                                         s++;
5987                                     else
5988                                         s = d;
5989                                     incline(s);
5990                                     goto retry;
5991                                 }
5992                             }
5993                         }
5994                         goto retry;
5995                     }
5996 #ifdef PERL_MAD
5997                     if (PL_madskills) {
5998                         if (!PL_thiswhite)
5999                             PL_thiswhite = newSVpvs("");
6000                         sv_catpvn(PL_thiswhite, PL_linestart,
6001                                   PL_bufend - PL_linestart);
6002                     }
6003 #endif
6004                     s = PL_bufend;
6005                     PL_parser->in_pod = 1;
6006                     goto retry;
6007                 }
6008         }
6009         if (PL_lex_brackets < PL_lex_formbrack) {
6010             const char *t = s;
6011 #ifdef PERL_STRICT_CR
6012             while (SPACE_OR_TAB(*t))
6013 #else
6014             while (SPACE_OR_TAB(*t) || *t == '\r')
6015 #endif
6016                 t++;
6017             if (*t == '\n' || *t == '#') {
6018                 s--;
6019                 PL_expect = XBLOCK;
6020                 goto leftbracket;
6021             }
6022         }
6023         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6024             s--;
6025             TOKEN(0);
6026         }
6027         pl_yylval.ival = 0;
6028         OPERATOR(ASSIGNOP);
6029     case '!':
6030         s++;
6031         {
6032             const char tmp = *s++;
6033             if (tmp == '=') {
6034                 /* was this !=~ where !~ was meant?
6035                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6036
6037                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6038                     const char *t = s+1;
6039
6040                     while (t < PL_bufend && isSPACE(*t))
6041                         ++t;
6042
6043                     if (*t == '/' || *t == '?' ||
6044                         ((*t == 'm' || *t == 's' || *t == 'y')
6045                          && !isALNUM(t[1])) ||
6046                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6047                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6048                                     "!=~ should be !~");
6049                 }
6050                 if (!PL_lex_allbrackets &&
6051                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6052                     s -= 2;
6053                     TOKEN(0);
6054                 }
6055                 Eop(OP_NE);
6056             }
6057             if (tmp == '~')
6058                 PMop(OP_NOT);
6059         }
6060         s--;
6061         OPERATOR('!');
6062     case '<':
6063         if (PL_expect != XOPERATOR) {
6064             if (s[1] != '<' && !strchr(s,'>'))
6065                 check_uni();
6066             if (s[1] == '<')
6067                 s = scan_heredoc(s);
6068             else
6069                 s = scan_inputsymbol(s);
6070             TERM(sublex_start());
6071         }
6072         s++;
6073         {
6074             char tmp = *s++;
6075             if (tmp == '<') {
6076                 if (*s == '=' && !PL_lex_allbrackets &&
6077                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6078                     s -= 2;
6079                     TOKEN(0);
6080                 }
6081                 SHop(OP_LEFT_SHIFT);
6082             }
6083             if (tmp == '=') {
6084                 tmp = *s++;
6085                 if (tmp == '>') {
6086                     if (!PL_lex_allbrackets &&
6087                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6088                         s -= 3;
6089                         TOKEN(0);
6090                     }
6091                     Eop(OP_NCMP);
6092                 }
6093                 s--;
6094                 if (!PL_lex_allbrackets &&
6095                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6096                     s -= 2;
6097                     TOKEN(0);
6098                 }
6099                 Rop(OP_LE);
6100             }
6101         }
6102         s--;
6103         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6104             s--;
6105             TOKEN(0);
6106         }
6107         Rop(OP_LT);
6108     case '>':
6109         s++;
6110         {
6111             const char tmp = *s++;
6112             if (tmp == '>') {
6113                 if (*s == '=' && !PL_lex_allbrackets &&
6114                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6115                     s -= 2;
6116                     TOKEN(0);
6117                 }
6118                 SHop(OP_RIGHT_SHIFT);
6119             }
6120             else if (tmp == '=') {
6121                 if (!PL_lex_allbrackets &&
6122                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6123                     s -= 2;
6124                     TOKEN(0);
6125                 }
6126                 Rop(OP_GE);
6127             }
6128         }
6129         s--;
6130         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6131             s--;
6132             TOKEN(0);
6133         }
6134         Rop(OP_GT);
6135
6136     case '$':
6137         CLINE;
6138
6139         if (PL_expect == XOPERATOR) {
6140             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6141                 return deprecate_commaless_var_list();
6142             }
6143         }
6144
6145         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6146             PL_tokenbuf[0] = '@';
6147             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6148                            sizeof PL_tokenbuf - 1, FALSE);
6149             if (PL_expect == XOPERATOR)
6150                 no_op("Array length", s);
6151             if (!PL_tokenbuf[1])
6152                 PREREF(DOLSHARP);
6153             PL_expect = XOPERATOR;
6154             PL_pending_ident = '#';
6155             TOKEN(DOLSHARP);
6156         }
6157
6158         PL_tokenbuf[0] = '$';
6159         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6160                        sizeof PL_tokenbuf - 1, FALSE);
6161         if (PL_expect == XOPERATOR)
6162             no_op("Scalar", s);
6163         if (!PL_tokenbuf[1]) {
6164             if (s == PL_bufend)
6165                 yyerror("Final $ should be \\$ or $name");
6166             PREREF('$');
6167         }
6168
6169         d = s;
6170         {
6171             const char tmp = *s;
6172             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6173                 s = SKIPSPACE1(s);
6174
6175             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6176                 && intuit_more(s)) {
6177                 if (*s == '[') {
6178                     PL_tokenbuf[0] = '@';
6179                     if (ckWARN(WARN_SYNTAX)) {
6180                         char *t = s+1;
6181
6182                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6183                             t++;
6184                         if (*t++ == ',') {
6185                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6186                             while (t < PL_bufend && *t != ']')
6187                                 t++;
6188                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6189                                         "Multidimensional syntax %.*s not supported",
6190                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6191                         }
6192                     }
6193                 }
6194                 else if (*s == '{') {
6195                     char *t;
6196                     PL_tokenbuf[0] = '%';
6197                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6198                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6199                         {
6200                             char tmpbuf[sizeof PL_tokenbuf];
6201                             do {
6202                                 t++;
6203                             } while (isSPACE(*t));
6204                             if (isIDFIRST_lazy_if(t,UTF)) {
6205                                 STRLEN len;
6206                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6207                                               &len);
6208                                 while (isSPACE(*t))
6209                                     t++;
6210                                 if (*t == ';'
6211                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6212                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6213                                                 "You need to quote \"%"SVf"\"",
6214                                                   SVfARG(newSVpvn_flags(tmpbuf, len,
6215                                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
6216                             }
6217                         }
6218                 }
6219             }
6220
6221             PL_expect = XOPERATOR;
6222             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6223                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6224                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6225                     PL_expect = XOPERATOR;
6226                 else if (strchr("$@\"'`q", *s))
6227                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6228                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6229                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6230                 else if (isIDFIRST_lazy_if(s,UTF)) {
6231                     char tmpbuf[sizeof PL_tokenbuf];
6232                     int t2;
6233                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6234                     if ((t2 = keyword(tmpbuf, len, 0))) {
6235                         /* binary operators exclude handle interpretations */
6236                         switch (t2) {
6237                         case -KEY_x:
6238                         case -KEY_eq:
6239                         case -KEY_ne:
6240                         case -KEY_gt:
6241                         case -KEY_lt:
6242                         case -KEY_ge:
6243                         case -KEY_le:
6244                         case -KEY_cmp:
6245                             break;
6246                         default:
6247                             PL_expect = XTERM;  /* e.g. print $fh length() */
6248                             break;
6249                         }
6250                     }
6251                     else {
6252                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6253                     }
6254                 }
6255                 else if (isDIGIT(*s))
6256                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6257                 else if (*s == '.' && isDIGIT(s[1]))
6258                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6259                 else if ((*s == '?' || *s == '-' || *s == '+')
6260                          && !isSPACE(s[1]) && s[1] != '=')
6261                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6262                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6263                          && s[1] != '/')
6264                     PL_expect = XTERM;          /* e.g. print $fh /.../
6265                                                    XXX except DORDOR operator
6266                                                 */
6267                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6268                          && s[2] != '=')
6269                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6270             }
6271         }
6272         PL_pending_ident = '$';
6273         TOKEN('$');
6274
6275     case '@':
6276         if (PL_expect == XOPERATOR)
6277             no_op("Array", s);
6278         PL_tokenbuf[0] = '@';
6279         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6280         if (!PL_tokenbuf[1]) {
6281             PREREF('@');
6282         }
6283         if (PL_lex_state == LEX_NORMAL)
6284             s = SKIPSPACE1(s);
6285         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6286             if (*s == '{')
6287                 PL_tokenbuf[0] = '%';
6288
6289             /* Warn about @ where they meant $. */
6290             if (*s == '[' || *s == '{') {
6291                 if (ckWARN(WARN_SYNTAX)) {
6292                     const char *t = s + 1;
6293                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6294                         t += UTF ? UTF8SKIP(t) : 1;
6295                     if (*t == '}' || *t == ']') {
6296                         t++;
6297                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6298        /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6299                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6300                             "Scalar value %"SVf" better written as $%"SVf,
6301                             SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6302                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6303                             SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6304                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
6305                     }
6306                 }
6307             }
6308         }
6309         PL_pending_ident = '@';
6310         TERM('@');
6311
6312      case '/':                  /* may be division, defined-or, or pattern */
6313         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6314             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6315                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6316                 TOKEN(0);
6317             s += 2;
6318             AOPERATOR(DORDOR);
6319         }
6320      case '?':                  /* may either be conditional or pattern */
6321         if (PL_expect == XOPERATOR) {
6322              char tmp = *s++;
6323              if(tmp == '?') {
6324                 if (!PL_lex_allbrackets &&
6325                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6326                     s--;
6327                     TOKEN(0);
6328                 }
6329                 PL_lex_allbrackets++;
6330                 OPERATOR('?');
6331              }
6332              else {
6333                  tmp = *s++;
6334                  if(tmp == '/') {
6335                      /* A // operator. */
6336                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6337                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6338                                             LEX_FAKEEOF_LOGIC)) {
6339                         s -= 2;
6340                         TOKEN(0);
6341                     }
6342                     AOPERATOR(DORDOR);
6343                  }
6344                  else {
6345                      s--;
6346                      if (*s == '=' && !PL_lex_allbrackets &&
6347                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6348                          s--;
6349                          TOKEN(0);
6350                      }
6351                      Mop(OP_DIVIDE);
6352                  }
6353              }
6354          }
6355          else {
6356              /* Disable warning on "study /blah/" */
6357              if (PL_oldoldbufptr == PL_last_uni
6358               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6359                   || memNE(PL_last_uni, "study", 5)
6360                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6361               ))
6362                  check_uni();
6363              if (*s == '?')
6364                  deprecate("?PATTERN? without explicit operator");
6365              s = scan_pat(s,OP_MATCH);
6366              TERM(sublex_start());
6367          }
6368
6369     case '.':
6370         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6371 #ifdef PERL_STRICT_CR
6372             && s[1] == '\n'
6373 #else
6374             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6375 #endif
6376             && (s == PL_linestart || s[-1] == '\n') )
6377         {
6378             PL_lex_formbrack = 0;
6379             PL_expect = XSTATE;
6380             goto rightbracket;
6381         }
6382         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6383             s += 3;
6384             OPERATOR(YADAYADA);
6385         }
6386         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6387             char tmp = *s++;
6388             if (*s == tmp) {
6389                 if (!PL_lex_allbrackets &&
6390                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6391                     s--;
6392                     TOKEN(0);
6393                 }
6394                 s++;
6395                 if (*s == tmp) {
6396                     s++;
6397                     pl_yylval.ival = OPf_SPECIAL;
6398                 }
6399                 else
6400                     pl_yylval.ival = 0;
6401                 OPERATOR(DOTDOT);
6402             }
6403             if (*s == '=' && !PL_lex_allbrackets &&
6404                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6405                 s--;
6406                 TOKEN(0);
6407             }
6408             Aop(OP_CONCAT);
6409         }
6410         /* FALL THROUGH */
6411     case '0': case '1': case '2': case '3': case '4':
6412     case '5': case '6': case '7': case '8': case '9':
6413         s = scan_num(s, &pl_yylval);
6414         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6415         if (PL_expect == XOPERATOR)
6416             no_op("Number",s);
6417         TERM(THING);
6418
6419     case '\'':
6420         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6421         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6422         if (PL_expect == XOPERATOR) {
6423             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6424                 return deprecate_commaless_var_list();
6425             }
6426             else
6427                 no_op("String",s);
6428         }
6429         if (!s)
6430             missingterm(NULL);
6431         pl_yylval.ival = OP_CONST;
6432         TERM(sublex_start());
6433
6434     case '"':
6435         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6436         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6437         if (PL_expect == XOPERATOR) {
6438             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6439                 return deprecate_commaless_var_list();
6440             }
6441             else
6442                 no_op("String",s);
6443         }
6444         if (!s)
6445             missingterm(NULL);
6446         pl_yylval.ival = OP_CONST;
6447         /* FIXME. I think that this can be const if char *d is replaced by
6448            more localised variables.  */
6449         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6450             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6451                 pl_yylval.ival = OP_STRINGIFY;
6452                 break;
6453             }
6454         }
6455         TERM(sublex_start());
6456
6457     case '`':
6458         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6459         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6460         if (PL_expect == XOPERATOR)
6461             no_op("Backticks",s);
6462         if (!s)
6463             missingterm(NULL);
6464         readpipe_override();
6465         TERM(sublex_start());
6466
6467     case '\\':
6468         s++;
6469         if (PL_lex_inwhat && isDIGIT(*s))
6470             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6471                            *s, *s);
6472         if (PL_expect == XOPERATOR)
6473             no_op("Backslash",s);
6474         OPERATOR(REFGEN);
6475
6476     case 'v':
6477         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6478             char *start = s + 2;
6479             while (isDIGIT(*start) || *start == '_')
6480                 start++;
6481             if (*start == '.' && isDIGIT(start[1])) {
6482                 s = scan_num(s, &pl_yylval);
6483                 TERM(THING);
6484             }
6485             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6486             else if (!isALPHA(*start) && (PL_expect == XTERM
6487                         || PL_expect == XREF || PL_expect == XSTATE
6488                         || PL_expect == XTERMORDORDOR)) {
6489                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6490                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6491                 if (!gv) {
6492                     s = scan_num(s, &pl_yylval);
6493                     TERM(THING);
6494                 }
6495             }
6496         }
6497         goto keylookup;
6498     case 'x':
6499         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6500             s++;
6501             Mop(OP_REPEAT);
6502         }
6503         goto keylookup;
6504
6505     case '_':
6506     case 'a': case 'A':
6507     case 'b': case 'B':
6508     case 'c': case 'C':
6509     case 'd': case 'D':
6510     case 'e': case 'E':
6511     case 'f': case 'F':
6512     case 'g': case 'G':
6513     case 'h': case 'H':
6514     case 'i': case 'I':
6515     case 'j': case 'J':
6516     case 'k': case 'K':
6517     case 'l': case 'L':
6518     case 'm': case 'M':
6519     case 'n': case 'N':
6520     case 'o': case 'O':
6521     case 'p': case 'P':
6522     case 'q': case 'Q':
6523     case 'r': case 'R':
6524     case 's': case 'S':
6525     case 't': case 'T':
6526     case 'u': case 'U':
6527               case 'V':
6528     case 'w': case 'W':
6529               case 'X':
6530     case 'y': case 'Y':
6531     case 'z': case 'Z':
6532
6533       keylookup: {
6534         bool anydelim;
6535         I32 tmp;
6536
6537         orig_keyword = 0;
6538         gv = NULL;
6539         gvp = NULL;
6540
6541         PL_bufptr = s;
6542         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6543
6544         /* Some keywords can be followed by any delimiter, including ':' */
6545         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6546
6547         /* x::* is just a word, unless x is "CORE" */
6548         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6549             goto just_a_word;
6550
6551         d = s;
6552         while (d < PL_bufend && isSPACE(*d))
6553                 d++;    /* no comments skipped here, or s### is misparsed */
6554
6555         /* Is this a word before a => operator? */
6556         if (*d == '=' && d[1] == '>') {
6557             CLINE;
6558             pl_yylval.opval
6559                 = (OP*)newSVOP(OP_CONST, 0,
6560                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6561             pl_yylval.opval->op_private = OPpCONST_BARE;
6562             TERM(WORD);
6563         }
6564
6565         /* Check for plugged-in keyword */
6566         {
6567             OP *o;
6568             int result;
6569             char *saved_bufptr = PL_bufptr;
6570             PL_bufptr = s;
6571             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6572             s = PL_bufptr;
6573             if (result == KEYWORD_PLUGIN_DECLINE) {
6574                 /* not a plugged-in keyword */
6575                 PL_bufptr = saved_bufptr;
6576             } else if (result == KEYWORD_PLUGIN_STMT) {
6577                 pl_yylval.opval = o;
6578                 CLINE;
6579                 PL_expect = XSTATE;
6580                 return REPORT(PLUGSTMT);
6581             } else if (result == KEYWORD_PLUGIN_EXPR) {
6582                 pl_yylval.opval = o;
6583                 CLINE;
6584                 PL_expect = XOPERATOR;
6585                 return REPORT(PLUGEXPR);
6586             } else {
6587                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6588                                         PL_tokenbuf);
6589             }
6590         }
6591
6592         /* Check for built-in keyword */
6593         tmp = keyword(PL_tokenbuf, len, 0);
6594
6595         /* Is this a label? */
6596         if (!anydelim && PL_expect == XSTATE
6597               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6598             s = d + 1;
6599             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6600                                             newSVpvn_flags(PL_tokenbuf,
6601                                                         len, UTF ? SVf_UTF8 : 0));
6602             CLINE;
6603             TOKEN(LABEL);
6604         }
6605
6606         if (tmp < 0) {                  /* second-class keyword? */
6607             GV *ogv = NULL;     /* override (winner) */
6608             GV *hgv = NULL;     /* hidden (loser) */
6609             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6610                 CV *cv;
6611                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6612                                             UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6613                     (cv = GvCVu(gv)))
6614                 {
6615                     if (GvIMPORTED_CV(gv))
6616                         ogv = gv;
6617                     else if (! CvMETHOD(cv))
6618                         hgv = gv;
6619                 }
6620                 if (!ogv &&
6621                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6622                                             UTF ? -(I32)len : (I32)len, FALSE)) &&
6623                     (gv = *gvp) && isGV_with_GP(gv) &&
6624                     GvCVu(gv) && GvIMPORTED_CV(gv))
6625                 {
6626                     ogv = gv;
6627                 }
6628             }
6629             if (ogv) {
6630                 orig_keyword = tmp;
6631                 tmp = 0;                /* overridden by import or by GLOBAL */
6632             }
6633             else if (gv && !gvp
6634                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6635                      && GvCVu(gv))
6636             {
6637                 tmp = 0;                /* any sub overrides "weak" keyword */
6638             }
6639             else {                      /* no override */
6640                 tmp = -tmp;
6641                 if (tmp == KEY_dump) {
6642                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6643                                    "dump() better written as CORE::dump()");
6644                 }
6645                 gv = NULL;
6646                 gvp = 0;
6647                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6648                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6649                                    "Ambiguous call resolved as CORE::%s(), "
6650                                    "qualify as such or use &",
6651                                    GvENAME(hgv));
6652             }
6653         }
6654
6655       reserved_word:
6656         switch (tmp) {
6657
6658         default:                        /* not a keyword */
6659             /* Trade off - by using this evil construction we can pull the
6660                variable gv into the block labelled keylookup. If not, then
6661                we have to give it function scope so that the goto from the
6662                earlier ':' case doesn't bypass the initialisation.  */
6663             if (0) {
6664             just_a_word_zero_gv:
6665                 gv = NULL;
6666                 gvp = NULL;
6667                 orig_keyword = 0;
6668             }
6669           just_a_word: {
6670                 SV *sv;
6671                 int pkgname = 0;
6672                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6673                 OP *rv2cv_op;
6674                 CV *cv;
6675 #ifdef PERL_MAD
6676                 SV *nextPL_nextwhite = 0;
6677 #endif
6678
6679
6680                 /* Get the rest if it looks like a package qualifier */
6681
6682                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6683                     STRLEN morelen;
6684                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6685                                   TRUE, &morelen);
6686                     if (!morelen)
6687                         Perl_croak(aTHX_ "Bad name after %"SVf"%s",
6688                                         SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6689                                             (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
6690                                 *s == '\'' ? "'" : "::");
6691                     len += morelen;
6692                     pkgname = 1;
6693                 }
6694
6695                 if (PL_expect == XOPERATOR) {
6696                     if (PL_bufptr == PL_linestart) {
6697                         CopLINE_dec(PL_curcop);
6698                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6699                         CopLINE_inc(PL_curcop);
6700                     }
6701                     else
6702                         no_op("Bareword",s);
6703                 }
6704
6705                 /* Look for a subroutine with this name in current package,
6706                    unless name is "Foo::", in which case Foo is a bareword
6707                    (and a package name). */
6708
6709                 if (len > 2 && !PL_madskills &&
6710                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6711                 {
6712                     if (ckWARN(WARN_BAREWORD)
6713                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6714                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6715                             "Bareword \"%"SVf"\" refers to nonexistent package",
6716                              SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6717                                         (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
6718                     len -= 2;
6719                     PL_tokenbuf[len] = '\0';
6720                     gv = NULL;
6721                     gvp = 0;
6722                 }
6723                 else {
6724                     if (!gv) {
6725                         /* Mustn't actually add anything to a symbol table.
6726                            But also don't want to "initialise" any placeholder
6727                            constants that might already be there into full
6728                            blown PVGVs with attached PVCV.  */
6729                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6730                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6731                                                SVt_PVCV);
6732                     }
6733                     len = 0;
6734                 }
6735
6736                 /* if we saw a global override before, get the right name */
6737
6738                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6739                     len ? len : strlen(PL_tokenbuf));
6740                 if (gvp) {
6741                     SV * const tmp_sv = sv;
6742                     sv = newSVpvs("CORE::GLOBAL::");
6743                     sv_catsv(sv, tmp_sv);
6744                     SvREFCNT_dec(tmp_sv);
6745                 }
6746
6747 #ifdef PERL_MAD
6748                 if (PL_madskills && !PL_thistoken) {
6749                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6750                     PL_thistoken = newSVpvn(start,s - start);
6751                     PL_realtokenstart = s - SvPVX(PL_linestr);
6752                 }
6753 #endif
6754
6755                 /* Presume this is going to be a bareword of some sort. */
6756                 CLINE;
6757                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6758                 pl_yylval.opval->op_private = OPpCONST_BARE;
6759
6760                 /* And if "Foo::", then that's what it certainly is. */
6761                 if (len)
6762                     goto safe_bareword;
6763
6764                 {
6765                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6766                     const_op->op_private = OPpCONST_BARE;
6767                     rv2cv_op = newCVREF(0, const_op);
6768                 }
6769                 cv = rv2cv_op_cv(rv2cv_op, 0);
6770
6771                 /* See if it's the indirect object for a list operator. */
6772
6773                 if (PL_oldoldbufptr &&
6774                     PL_oldoldbufptr < PL_bufptr &&
6775                     (PL_oldoldbufptr == PL_last_lop
6776                      || PL_oldoldbufptr == PL_last_uni) &&
6777                     /* NO SKIPSPACE BEFORE HERE! */
6778                     (PL_expect == XREF ||
6779                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6780                 {
6781                     bool immediate_paren = *s == '(';
6782
6783                     /* (Now we can afford to cross potential line boundary.) */
6784                     s = SKIPSPACE2(s,nextPL_nextwhite);
6785 #ifdef PERL_MAD
6786                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6787 #endif
6788
6789                     /* Two barewords in a row may indicate method call. */
6790
6791                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6792                         (tmp = intuit_method(s, gv, cv))) {
6793                         op_free(rv2cv_op);
6794                         if (tmp == METHOD && !PL_lex_allbrackets &&
6795                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6796                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6797                         return REPORT(tmp);
6798                     }
6799
6800                     /* If not a declared subroutine, it's an indirect object. */
6801                     /* (But it's an indir obj regardless for sort.) */
6802                     /* Also, if "_" follows a filetest operator, it's a bareword */
6803
6804                     if (
6805                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6806                          (!cv &&
6807                         (PL_last_lop_op != OP_MAPSTART &&
6808                          PL_last_lop_op != OP_GREPSTART))))
6809                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6810                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6811                        )
6812                     {
6813                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6814                         goto bareword;
6815                     }
6816                 }
6817
6818                 PL_expect = XOPERATOR;
6819 #ifdef PERL_MAD
6820                 if (isSPACE(*s))
6821                     s = SKIPSPACE2(s,nextPL_nextwhite);
6822                 PL_nextwhite = nextPL_nextwhite;
6823 #else
6824                 s = skipspace(s);
6825 #endif
6826
6827                 /* Is this a word before a => operator? */
6828                 if (*s == '=' && s[1] == '>' && !pkgname) {
6829                     op_free(rv2cv_op);
6830                     CLINE;
6831                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6832                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6833                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6834                     TERM(WORD);
6835                 }
6836
6837                 /* If followed by a paren, it's certainly a subroutine. */
6838                 if (*s == '(') {
6839                     CLINE;
6840                     if (cv) {
6841                         d = s + 1;
6842                         while (SPACE_OR_TAB(*d))
6843                             d++;
6844                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6845                             s = d + 1;
6846                             goto its_constant;
6847                         }
6848                     }
6849 #ifdef PERL_MAD
6850                     if (PL_madskills) {
6851                         PL_nextwhite = PL_thiswhite;
6852                         PL_thiswhite = 0;
6853                     }
6854                     start_force(PL_curforce);
6855 #endif
6856                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6857                     PL_expect = XOPERATOR;
6858 #ifdef PERL_MAD
6859                     if (PL_madskills) {
6860                         PL_nextwhite = nextPL_nextwhite;
6861                         curmad('X', PL_thistoken);
6862                         PL_thistoken = newSVpvs("");
6863                     }
6864 #endif
6865                     op_free(rv2cv_op);
6866                     force_next(WORD);
6867                     pl_yylval.ival = 0;
6868                     TOKEN('&');
6869                 }
6870
6871                 /* If followed by var or block, call it a method (unless sub) */
6872
6873                 if ((*s == '$' || *s == '{') && !cv) {
6874                     op_free(rv2cv_op);
6875                     PL_last_lop = PL_oldbufptr;
6876                     PL_last_lop_op = OP_METHOD;
6877                     if (!PL_lex_allbrackets &&
6878                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6879                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6880                     PREBLOCK(METHOD);
6881                 }
6882
6883                 /* If followed by a bareword, see if it looks like indir obj. */
6884
6885                 if (!orig_keyword
6886                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6887                         && (tmp = intuit_method(s, gv, cv))) {
6888                     op_free(rv2cv_op);
6889                     if (tmp == METHOD && !PL_lex_allbrackets &&
6890                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6891                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6892                     return REPORT(tmp);
6893                 }
6894
6895                 /* Not a method, so call it a subroutine (if defined) */
6896
6897                 if (cv) {
6898                     if (lastchar == '-') {
6899                         const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
6900                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6901                                 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
6902                                 SVfARG(tmpsv), SVfARG(tmpsv));
6903                     }
6904                     /* Check for a constant sub */
6905                     if ((sv = cv_const_sv(cv))) {
6906                   its_constant:
6907                         op_free(rv2cv_op);
6908                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6909                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6910                         pl_yylval.opval->op_private = OPpCONST_FOLDED;
6911                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
6912                         TOKEN(WORD);
6913                     }
6914
6915                     op_free(pl_yylval.opval);
6916                     pl_yylval.opval = rv2cv_op;
6917                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6918                     PL_last_lop = PL_oldbufptr;
6919                     PL_last_lop_op = OP_ENTERSUB;
6920                     /* Is there a prototype? */
6921                     if (
6922 #ifdef PERL_MAD
6923                         cv &&
6924 #endif
6925                         SvPOK(cv))
6926                     {
6927                         STRLEN protolen = CvPROTOLEN(cv);
6928                         const char *proto = CvPROTO(cv);
6929                         bool optional;
6930                         if (!protolen)
6931                             TERM(FUNC0SUB);
6932                         if ((optional = *proto == ';'))
6933                           do
6934                             proto++;
6935                           while (*proto == ';');
6936                         if (
6937                             (
6938                                 (
6939                                     *proto == '$' || *proto == '_'
6940                                  || *proto == '*' || *proto == '+'
6941                                 )
6942                              && proto[1] == '\0'
6943                             )
6944                          || (
6945                              *proto == '\\' && proto[1] && proto[2] == '\0'
6946                             )
6947                         )
6948                             UNIPROTO(UNIOPSUB,optional);
6949                         if (*proto == '\\' && proto[1] == '[') {
6950                             const char *p = proto + 2;
6951                             while(*p && *p != ']')
6952                                 ++p;
6953                             if(*p == ']' && !p[1])
6954                                 UNIPROTO(UNIOPSUB,optional);
6955                         }
6956                         if (*proto == '&' && *s == '{') {
6957                             if (PL_curstash)
6958                                 sv_setpvs(PL_subname, "__ANON__");
6959                             else
6960                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6961                             if (!PL_lex_allbrackets &&
6962                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6963                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6964                             PREBLOCK(LSTOPSUB);
6965                         }
6966                     }
6967 #ifdef PERL_MAD
6968                     {
6969                         if (PL_madskills) {
6970                             PL_nextwhite = PL_thiswhite;
6971                             PL_thiswhite = 0;
6972                         }
6973                         start_force(PL_curforce);
6974                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6975                         PL_expect = XTERM;
6976                         if (PL_madskills) {
6977                             PL_nextwhite = nextPL_nextwhite;
6978                             curmad('X', PL_thistoken);
6979                             PL_thistoken = newSVpvs("");
6980                         }
6981                         force_next(WORD);
6982                         if (!PL_lex_allbrackets &&
6983                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6984                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6985                         TOKEN(NOAMP);
6986                     }
6987                 }
6988
6989                 /* Guess harder when madskills require "best effort". */
6990                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6991                     int probable_sub = 0;
6992                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6993                         probable_sub = 1;
6994                     else if (isALPHA(*s)) {
6995                         char tmpbuf[1024];
6996                         STRLEN tmplen;
6997                         d = s;
6998                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6999                         if (!keyword(tmpbuf, tmplen, 0))
7000                             probable_sub = 1;
7001                         else {
7002                             while (d < PL_bufend && isSPACE(*d))
7003                                 d++;
7004                             if (*d == '=' && d[1] == '>')
7005                                 probable_sub = 1;
7006                         }
7007                     }
7008                     if (probable_sub) {
7009                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7010                                         SVt_PVCV);
7011                         op_free(pl_yylval.opval);
7012                         pl_yylval.opval = rv2cv_op;
7013                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7014                         PL_last_lop = PL_oldbufptr;
7015                         PL_last_lop_op = OP_ENTERSUB;
7016                         PL_nextwhite = PL_thiswhite;
7017                         PL_thiswhite = 0;
7018                         start_force(PL_curforce);
7019                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7020                         PL_expect = XTERM;
7021                         PL_nextwhite = nextPL_nextwhite;
7022                         curmad('X', PL_thistoken);
7023                         PL_thistoken = newSVpvs("");
7024                         force_next(WORD);
7025                         if (!PL_lex_allbrackets &&
7026                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7027                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7028                         TOKEN(NOAMP);
7029                     }
7030 #else
7031                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7032                     PL_expect = XTERM;
7033                     force_next(WORD);
7034                     if (!PL_lex_allbrackets &&
7035                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7036                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7037                     TOKEN(NOAMP);
7038 #endif
7039                 }
7040
7041                 /* Call it a bare word */
7042
7043                 if (PL_hints & HINT_STRICT_SUBS)
7044                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7045                 else {
7046                 bareword:
7047                     /* after "print" and similar functions (corresponding to
7048                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7049                      * a filehandle should be subject to "strict subs".
7050                      * Likewise for the optional indirect-object argument to system
7051                      * or exec, which can't be a bareword */
7052                     if ((PL_last_lop_op == OP_PRINT
7053                             || PL_last_lop_op == OP_PRTF
7054                             || PL_last_lop_op == OP_SAY
7055                             || PL_last_lop_op == OP_SYSTEM
7056                             || PL_last_lop_op == OP_EXEC)
7057                             && (PL_hints & HINT_STRICT_SUBS))
7058                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7059                     if (lastchar != '-') {
7060                         if (ckWARN(WARN_RESERVED)) {
7061                             d = PL_tokenbuf;
7062                             while (isLOWER(*d))
7063                                 d++;
7064                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7065                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7066                                        PL_tokenbuf);
7067                         }
7068                     }
7069                 }
7070                 op_free(rv2cv_op);
7071
7072             safe_bareword:
7073                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7074                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7075                                      "Operator or semicolon missing before %c%"SVf,
7076                                      lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7077                                                     strlen(PL_tokenbuf),
7078                                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
7079                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7080                                      "Ambiguous use of %c resolved as operator %c",
7081                                      lastchar, lastchar);
7082                 }
7083                 TOKEN(WORD);
7084             }
7085
7086         case KEY___FILE__:
7087             FUN0OP(
7088                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7089             );
7090
7091         case KEY___LINE__:
7092             FUN0OP(
7093                 (OP*)newSVOP(OP_CONST, 0,
7094                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7095             );
7096
7097         case KEY___PACKAGE__:
7098             FUN0OP(
7099                 (OP*)newSVOP(OP_CONST, 0,
7100                                         (PL_curstash
7101                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7102                                          : &PL_sv_undef))
7103             );
7104
7105         case KEY___DATA__:
7106         case KEY___END__: {
7107             GV *gv;
7108             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7109                 const char *pname = "main";
7110                 STRLEN plen = 4;
7111                 U32 putf8 = 0;
7112                 if (PL_tokenbuf[2] == 'D')
7113                 {
7114                     HV * const stash =
7115                         PL_curstash ? PL_curstash : PL_defstash;
7116                     pname = HvNAME_get(stash);
7117                     plen  = HvNAMELEN (stash);
7118                     if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7119                 }
7120                 gv = gv_fetchpvn_flags(
7121                         Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7122                         plen+6, GV_ADD|putf8, SVt_PVIO
7123                 );
7124                 GvMULTI_on(gv);
7125                 if (!GvIO(gv))
7126                     GvIOp(gv) = newIO();
7127                 IoIFP(GvIOp(gv)) = PL_rsfp;
7128 #if defined(HAS_FCNTL) && defined(F_SETFD)
7129                 {
7130                     const int fd = PerlIO_fileno(PL_rsfp);
7131                     fcntl(fd,F_SETFD,fd >= 3);
7132                 }
7133 #endif
7134                 /* Mark this internal pseudo-handle as clean */
7135                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7136                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7137                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7138                 else
7139                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7140 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7141                 /* if the script was opened in binmode, we need to revert
7142                  * it to text mode for compatibility; but only iff it has CRs
7143                  * XXX this is a questionable hack at best. */
7144                 if (PL_bufend-PL_bufptr > 2
7145                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7146                 {
7147                     Off_t loc = 0;
7148                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7149                         loc = PerlIO_tell(PL_rsfp);
7150                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7151                     }
7152 #ifdef NETWARE
7153                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7154 #else
7155                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7156 #endif  /* NETWARE */
7157                         if (loc > 0)
7158                             PerlIO_seek(PL_rsfp, loc, 0);
7159                     }
7160                 }
7161 #endif
7162 #ifdef PERLIO_LAYERS
7163                 if (!IN_BYTES) {
7164                     if (UTF)
7165                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7166                     else if (PL_encoding) {
7167                         SV *name;
7168                         dSP;
7169                         ENTER;
7170                         SAVETMPS;
7171                         PUSHMARK(sp);
7172                         EXTEND(SP, 1);
7173                         XPUSHs(PL_encoding);
7174                         PUTBACK;
7175                         call_method("name", G_SCALAR);
7176                         SPAGAIN;
7177                         name = POPs;
7178                         PUTBACK;
7179                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7180                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7181                                                       SVfARG(name)));
7182                         FREETMPS;
7183                         LEAVE;
7184                     }
7185                 }
7186 #endif
7187 #ifdef PERL_MAD
7188                 if (PL_madskills) {
7189                     if (PL_realtokenstart >= 0) {
7190                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7191                         if (!PL_endwhite)
7192                             PL_endwhite = newSVpvs("");
7193                         sv_catsv(PL_endwhite, PL_thiswhite);
7194                         PL_thiswhite = 0;
7195                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7196                         PL_realtokenstart = -1;
7197                     }
7198                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7199                            != NULL) ;
7200                 }
7201 #endif
7202                 PL_rsfp = NULL;
7203             }
7204             goto fake_eof;
7205         }
7206
7207         case KEY___SUB__:
7208             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7209
7210         case KEY_AUTOLOAD:
7211         case KEY_DESTROY:
7212         case KEY_BEGIN:
7213         case KEY_UNITCHECK:
7214         case KEY_CHECK:
7215         case KEY_INIT:
7216         case KEY_END:
7217             if (PL_expect == XSTATE) {
7218                 s = PL_bufptr;
7219                 goto really_sub;
7220             }
7221             goto just_a_word;
7222
7223         case KEY_CORE:
7224             if (*s == ':' && s[1] == ':') {
7225                 STRLEN olen = len;
7226                 d = s;
7227                 s += 2;
7228                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7229                 if ((*s == ':' && s[1] == ':')
7230                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7231                 {
7232                     s = d;
7233                     len = olen;
7234                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7235                     goto just_a_word;
7236                 }
7237                 if (!tmp)
7238                     Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7239                                     SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7240                                                 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
7241                 if (tmp < 0)
7242                     tmp = -tmp;
7243                 else if (tmp == KEY_require || tmp == KEY_do
7244                       || tmp == KEY_glob)
7245                     /* that's a way to remember we saw "CORE::" */
7246                     orig_keyword = tmp;
7247                 goto reserved_word;
7248             }
7249             goto just_a_word;
7250
7251         case KEY_abs:
7252             UNI(OP_ABS);
7253
7254         case KEY_alarm:
7255             UNI(OP_ALARM);
7256
7257         case KEY_accept:
7258             LOP(OP_ACCEPT,XTERM);
7259
7260         case KEY_and:
7261             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7262                 return REPORT(0);
7263             OPERATOR(ANDOP);
7264
7265         case KEY_atan2:
7266             LOP(OP_ATAN2,XTERM);
7267
7268         case KEY_bind:
7269             LOP(OP_BIND,XTERM);
7270
7271         case KEY_binmode:
7272             LOP(OP_BINMODE,XTERM);
7273
7274         case KEY_bless:
7275             LOP(OP_BLESS,XTERM);
7276
7277         case KEY_break:
7278             FUN0(OP_BREAK);
7279
7280         case KEY_chop:
7281             UNI(OP_CHOP);
7282
7283         case KEY_continue:
7284                     /* We have to disambiguate the two senses of
7285                       "continue". If the next token is a '{' then
7286                       treat it as the start of a continue block;
7287                       otherwise treat it as a control operator.
7288                      */
7289                     s = skipspace(s);
7290                     if (*s == '{')
7291             PREBLOCK(CONTINUE);
7292                     else
7293                         FUN0(OP_CONTINUE);
7294
7295         case KEY_chdir:
7296             /* may use HOME */
7297             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7298             UNI(OP_CHDIR);
7299
7300         case KEY_close:
7301             UNI(OP_CLOSE);
7302
7303         case KEY_closedir:
7304             UNI(OP_CLOSEDIR);
7305
7306         case KEY_cmp:
7307             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7308                 return REPORT(0);
7309             Eop(OP_SCMP);
7310
7311         case KEY_caller:
7312             UNI(OP_CALLER);
7313
7314         case KEY_crypt:
7315 #ifdef FCRYPT
7316             if (!PL_cryptseen) {
7317                 PL_cryptseen = TRUE;
7318                 init_des();
7319             }
7320 #endif
7321             LOP(OP_CRYPT,XTERM);
7322
7323         case KEY_chmod:
7324             LOP(OP_CHMOD,XTERM);
7325
7326         case KEY_chown:
7327             LOP(OP_CHOWN,XTERM);
7328
7329         case KEY_connect:
7330             LOP(OP_CONNECT,XTERM);
7331
7332         case KEY_chr:
7333             UNI(OP_CHR);
7334
7335         case KEY_cos:
7336             UNI(OP_COS);
7337
7338         case KEY_chroot:
7339             UNI(OP_CHROOT);
7340
7341         case KEY_default:
7342             PREBLOCK(DEFAULT);
7343
7344         case KEY_do:
7345             s = SKIPSPACE1(s);
7346             if (*s == '{')
7347                 PRETERMBLOCK(DO);
7348             if (*s != '\'') {
7349                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, 1, &len);
7350                 if (len) {
7351                     d = SKIPSPACE1(d);
7352                     if (*d == '(') s = force_word(s,WORD,TRUE,TRUE,FALSE);
7353                 }
7354             }
7355             if (orig_keyword == KEY_do) {
7356                 orig_keyword = 0;
7357                 pl_yylval.ival = 1;
7358             }
7359             else
7360                 pl_yylval.ival = 0;
7361             OPERATOR(DO);
7362
7363         case KEY_die:
7364             PL_hints |= HINT_BLOCK_SCOPE;
7365             LOP(OP_DIE,XTERM);
7366
7367         case KEY_defined:
7368             UNI(OP_DEFINED);
7369
7370         case KEY_delete:
7371             UNI(OP_DELETE);
7372
7373         case KEY_dbmopen:
7374             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7375                               STR_WITH_LEN("NDBM_File::"),
7376                               STR_WITH_LEN("DB_File::"),
7377                               STR_WITH_LEN("GDBM_File::"),
7378                               STR_WITH_LEN("SDBM_File::"),
7379                               STR_WITH_LEN("ODBM_File::"),
7380                               NULL);
7381             LOP(OP_DBMOPEN,XTERM);
7382
7383         case KEY_dbmclose:
7384             UNI(OP_DBMCLOSE);
7385
7386         case KEY_dump:
7387             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7388             LOOPX(OP_DUMP);
7389
7390         case KEY_else:
7391             PREBLOCK(ELSE);
7392
7393         case KEY_elsif:
7394             pl_yylval.ival = CopLINE(PL_curcop);
7395             OPERATOR(ELSIF);
7396
7397         case KEY_eq:
7398             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7399                 return REPORT(0);
7400             Eop(OP_SEQ);
7401
7402         case KEY_exists:
7403             UNI(OP_EXISTS);
7404
7405         case KEY_exit:
7406             if (PL_madskills)
7407                 UNI(OP_INT);
7408             UNI(OP_EXIT);
7409
7410         case KEY_eval:
7411             s = SKIPSPACE1(s);
7412             if (*s == '{') { /* block eval */
7413                 PL_expect = XTERMBLOCK;
7414                 UNIBRACK(OP_ENTERTRY);
7415             }
7416             else { /* string eval */
7417                 PL_expect = XTERM;
7418                 UNIBRACK(OP_ENTEREVAL);
7419             }
7420
7421         case KEY_evalbytes:
7422             PL_expect = XTERM;
7423             UNIBRACK(-OP_ENTEREVAL);
7424
7425         case KEY_eof:
7426             UNI(OP_EOF);
7427
7428         case KEY_exp:
7429             UNI(OP_EXP);
7430
7431         case KEY_each:
7432             UNI(OP_EACH);
7433
7434         case KEY_exec:
7435             LOP(OP_EXEC,XREF);
7436
7437         case KEY_endhostent:
7438             FUN0(OP_EHOSTENT);
7439
7440         case KEY_endnetent:
7441             FUN0(OP_ENETENT);
7442
7443         case KEY_endservent:
7444             FUN0(OP_ESERVENT);
7445
7446         case KEY_endprotoent:
7447             FUN0(OP_EPROTOENT);
7448
7449         case KEY_endpwent:
7450             FUN0(OP_EPWENT);
7451
7452         case KEY_endgrent:
7453             FUN0(OP_EGRENT);
7454
7455         case KEY_for:
7456         case KEY_foreach:
7457             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7458                 return REPORT(0);
7459             pl_yylval.ival = CopLINE(PL_curcop);
7460             s = SKIPSPACE1(s);
7461             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7462                 char *p = s;
7463 #ifdef PERL_MAD
7464                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7465 #endif
7466
7467                 if ((PL_bufend - p) >= 3 &&
7468                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7469                     p += 2;
7470                 else if ((PL_bufend - p) >= 4 &&
7471                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7472                     p += 3;
7473                 p = PEEKSPACE(p);
7474                 if (isIDFIRST_lazy_if(p,UTF)) {
7475                     p = scan_ident(p, PL_bufend,
7476                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7477                     p = PEEKSPACE(p);
7478                 }
7479                 if (*p != '$')
7480                     Perl_croak(aTHX_ "Missing $ on loop variable");
7481 #ifdef PERL_MAD
7482                 s = SvPVX(PL_linestr) + soff;
7483 #endif
7484             }
7485             OPERATOR(FOR);
7486
7487         case KEY_formline:
7488             LOP(OP_FORMLINE,XTERM);
7489
7490         case KEY_fork:
7491             FUN0(OP_FORK);
7492
7493         case KEY_fc:
7494             UNI(OP_FC);
7495
7496         case KEY_fcntl:
7497             LOP(OP_FCNTL,XTERM);
7498
7499         case KEY_fileno:
7500             UNI(OP_FILENO);
7501
7502         case KEY_flock:
7503             LOP(OP_FLOCK,XTERM);
7504
7505         case KEY_gt:
7506             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7507                 return REPORT(0);
7508             Rop(OP_SGT);
7509
7510         case KEY_ge:
7511             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7512                 return REPORT(0);
7513             Rop(OP_SGE);
7514
7515         case KEY_grep:
7516             LOP(OP_GREPSTART, XREF);
7517
7518         case KEY_goto:
7519             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7520             LOOPX(OP_GOTO);
7521
7522         case KEY_gmtime:
7523             UNI(OP_GMTIME);
7524
7525         case KEY_getc:
7526             UNIDOR(OP_GETC);
7527
7528         case KEY_getppid:
7529             FUN0(OP_GETPPID);
7530
7531         case KEY_getpgrp:
7532             UNI(OP_GETPGRP);
7533
7534         case KEY_getpriority:
7535             LOP(OP_GETPRIORITY,XTERM);
7536
7537         case KEY_getprotobyname:
7538             UNI(OP_GPBYNAME);
7539
7540         case KEY_getprotobynumber:
7541             LOP(OP_GPBYNUMBER,XTERM);
7542
7543         case KEY_getprotoent:
7544             FUN0(OP_GPROTOENT);
7545
7546         case KEY_getpwent:
7547             FUN0(OP_GPWENT);
7548
7549         case KEY_getpwnam:
7550             UNI(OP_GPWNAM);
7551
7552         case KEY_getpwuid:
7553             UNI(OP_GPWUID);
7554
7555         case KEY_getpeername:
7556             UNI(OP_GETPEERNAME);
7557
7558         case KEY_gethostbyname:
7559             UNI(OP_GHBYNAME);
7560
7561         case KEY_gethostbyaddr:
7562             LOP(OP_GHBYADDR,XTERM);
7563
7564         case KEY_gethostent:
7565             FUN0(OP_GHOSTENT);
7566
7567         case KEY_getnetbyname:
7568             UNI(OP_GNBYNAME);
7569
7570         case KEY_getnetbyaddr:
7571             LOP(OP_GNBYADDR,XTERM);
7572
7573         case KEY_getnetent:
7574             FUN0(OP_GNETENT);
7575
7576         case KEY_getservbyname:
7577             LOP(OP_GSBYNAME,XTERM);
7578
7579         case KEY_getservbyport:
7580             LOP(OP_GSBYPORT,XTERM);
7581
7582         case KEY_getservent:
7583             FUN0(OP_GSERVENT);
7584
7585         case KEY_getsockname:
7586             UNI(OP_GETSOCKNAME);
7587
7588         case KEY_getsockopt:
7589             LOP(OP_GSOCKOPT,XTERM);
7590
7591         case KEY_getgrent:
7592             FUN0(OP_GGRENT);
7593
7594         case KEY_getgrnam:
7595             UNI(OP_GGRNAM);
7596
7597         case KEY_getgrgid:
7598             UNI(OP_GGRGID);
7599
7600         case KEY_getlogin:
7601             FUN0(OP_GETLOGIN);
7602
7603         case KEY_given:
7604             pl_yylval.ival = CopLINE(PL_curcop);
7605             OPERATOR(GIVEN);
7606
7607         case KEY_glob:
7608             LOP(
7609              orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7610              XTERM
7611             );
7612
7613         case KEY_hex:
7614             UNI(OP_HEX);
7615
7616         case KEY_if:
7617             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7618                 return REPORT(0);
7619             pl_yylval.ival = CopLINE(PL_curcop);
7620             OPERATOR(IF);
7621
7622         case KEY_index:
7623             LOP(OP_INDEX,XTERM);
7624
7625         case KEY_int:
7626             UNI(OP_INT);
7627
7628         case KEY_ioctl:
7629             LOP(OP_IOCTL,XTERM);
7630
7631         case KEY_join:
7632             LOP(OP_JOIN,XTERM);
7633
7634         case KEY_keys:
7635             UNI(OP_KEYS);
7636
7637         case KEY_kill:
7638             LOP(OP_KILL,XTERM);
7639
7640         case KEY_last:
7641             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7642             LOOPX(OP_LAST);
7643
7644         case KEY_lc:
7645             UNI(OP_LC);
7646
7647         case KEY_lcfirst:
7648             UNI(OP_LCFIRST);
7649
7650         case KEY_local:
7651             pl_yylval.ival = 0;
7652             OPERATOR(LOCAL);
7653
7654         case KEY_length:
7655             UNI(OP_LENGTH);
7656
7657         case KEY_lt:
7658             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7659                 return REPORT(0);
7660             Rop(OP_SLT);
7661
7662         case KEY_le:
7663             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7664                 return REPORT(0);
7665             Rop(OP_SLE);
7666
7667         case KEY_localtime:
7668             UNI(OP_LOCALTIME);
7669
7670         case KEY_log:
7671             UNI(OP_LOG);
7672
7673         case KEY_link:
7674             LOP(OP_LINK,XTERM);
7675
7676         case KEY_listen:
7677             LOP(OP_LISTEN,XTERM);
7678
7679         case KEY_lock:
7680             UNI(OP_LOCK);
7681
7682         case KEY_lstat:
7683             UNI(OP_LSTAT);
7684
7685         case KEY_m:
7686             s = scan_pat(s,OP_MATCH);
7687             TERM(sublex_start());
7688
7689         case KEY_map:
7690             LOP(OP_MAPSTART, XREF);
7691
7692         case KEY_mkdir:
7693             LOP(OP_MKDIR,XTERM);
7694
7695         case KEY_msgctl:
7696             LOP(OP_MSGCTL,XTERM);
7697
7698         case KEY_msgget:
7699             LOP(OP_MSGGET,XTERM);
7700
7701         case KEY_msgrcv:
7702             LOP(OP_MSGRCV,XTERM);
7703
7704         case KEY_msgsnd:
7705             LOP(OP_MSGSND,XTERM);
7706
7707         case KEY_our:
7708         case KEY_my:
7709         case KEY_state:
7710             PL_in_my = (U16)tmp;
7711             s = SKIPSPACE1(s);
7712             if (isIDFIRST_lazy_if(s,UTF)) {
7713 #ifdef PERL_MAD
7714                 char* start = s;
7715 #endif
7716                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7717                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7718                     goto really_sub;
7719                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7720                 if (!PL_in_my_stash) {
7721                     char tmpbuf[1024];
7722                     PL_bufptr = s;
7723                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7724                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7725                 }
7726 #ifdef PERL_MAD
7727                 if (PL_madskills) {     /* just add type to declarator token */
7728                     sv_catsv(PL_thistoken, PL_nextwhite);
7729                     PL_nextwhite = 0;
7730                     sv_catpvn(PL_thistoken, start, s - start);
7731                 }
7732 #endif
7733             }
7734             pl_yylval.ival = 1;
7735             OPERATOR(MY);
7736
7737         case KEY_next:
7738             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7739             LOOPX(OP_NEXT);
7740
7741         case KEY_ne:
7742             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7743                 return REPORT(0);
7744             Eop(OP_SNE);
7745
7746         case KEY_no:
7747             s = tokenize_use(0, s);
7748             OPERATOR(USE);
7749
7750         case KEY_not:
7751             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7752                 FUN1(OP_NOT);
7753             else {
7754                 if (!PL_lex_allbrackets &&
7755                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7756                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7757                 OPERATOR(NOTOP);
7758             }
7759
7760         case KEY_open:
7761             s = SKIPSPACE1(s);
7762             if (isIDFIRST_lazy_if(s,UTF)) {
7763                 const char *t;
7764                 for (d = s; isALNUM_lazy_if(d,UTF);) {
7765                     d += UTF ? UTF8SKIP(d) : 1;
7766                     if (UTF) {
7767                         while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
7768                             d += UTF ? UTF8SKIP(d) : 1;
7769                         }
7770                     }
7771                 }
7772                 for (t=d; isSPACE(*t);)
7773                     t++;
7774                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7775                     /* [perl #16184] */
7776                     && !(t[0] == '=' && t[1] == '>')
7777                     && !(t[0] == ':' && t[1] == ':')
7778                     && !keyword(s, d-s, 0)
7779                 ) {
7780                     SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
7781                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
7782                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7783                            "Precedence problem: open %"SVf" should be open(%"SVf")",
7784                             SVfARG(tmpsv), SVfARG(tmpsv));
7785                 }
7786             }
7787             LOP(OP_OPEN,XTERM);
7788
7789         case KEY_or:
7790             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7791                 return REPORT(0);
7792             pl_yylval.ival = OP_OR;
7793             OPERATOR(OROP);
7794
7795         case KEY_ord:
7796             UNI(OP_ORD);
7797
7798         case KEY_oct:
7799             UNI(OP_OCT);
7800
7801         case KEY_opendir:
7802             LOP(OP_OPEN_DIR,XTERM);
7803
7804         case KEY_print:
7805             checkcomma(s,PL_tokenbuf,"filehandle");
7806             LOP(OP_PRINT,XREF);
7807
7808         case KEY_printf:
7809             checkcomma(s,PL_tokenbuf,"filehandle");
7810             LOP(OP_PRTF,XREF);
7811
7812         case KEY_prototype:
7813             UNI(OP_PROTOTYPE);
7814
7815         case KEY_push:
7816             LOP(OP_PUSH,XTERM);
7817
7818         case KEY_pop:
7819             UNIDOR(OP_POP);
7820
7821         case KEY_pos:
7822             UNIDOR(OP_POS);
7823
7824         case KEY_pack:
7825             LOP(OP_PACK,XTERM);
7826
7827         case KEY_package:
7828             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7829             s = SKIPSPACE1(s);
7830             s = force_strict_version(s);
7831             PL_lex_expect = XBLOCK;
7832             OPERATOR(PACKAGE);
7833
7834         case KEY_pipe:
7835             LOP(OP_PIPE_OP,XTERM);
7836
7837         case KEY_q:
7838             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7839             if (!s)
7840                 missingterm(NULL);
7841             pl_yylval.ival = OP_CONST;
7842             TERM(sublex_start());
7843
7844         case KEY_quotemeta:
7845             UNI(OP_QUOTEMETA);
7846
7847         case KEY_qw: {
7848             OP *words = NULL;
7849             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7850             if (!s)
7851                 missingterm(NULL);
7852             PL_expect = XOPERATOR;
7853             if (SvCUR(PL_lex_stuff)) {
7854                 int warned_comma = !ckWARN(WARN_QW);
7855                 int warned_comment = warned_comma;
7856                 d = SvPV_force(PL_lex_stuff, len);
7857                 while (len) {
7858                     for (; isSPACE(*d) && len; --len, ++d)
7859                         /**/;
7860                     if (len) {
7861                         SV *sv;
7862                         const char *b = d;
7863                         if (!warned_comma || !warned_comment) {
7864                             for (; !isSPACE(*d) && len; --len, ++d) {
7865                                 if (!warned_comma && *d == ',') {
7866                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7867                                         "Possible attempt to separate words with commas");
7868                                     ++warned_comma;
7869                                 }
7870                                 else if (!warned_comment && *d == '#') {
7871                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7872                                         "Possible attempt to put comments in qw() list");
7873                                     ++warned_comment;
7874                                 }
7875                             }
7876                         }
7877                         else {
7878                             for (; !isSPACE(*d) && len; --len, ++d)
7879                                 /**/;
7880                         }
7881                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7882                         words = op_append_elem(OP_LIST, words,
7883                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7884                     }
7885                 }
7886             }
7887             if (!words)
7888                 words = newNULLLIST();
7889             if (PL_lex_stuff) {
7890                 SvREFCNT_dec(PL_lex_stuff);
7891                 PL_lex_stuff = NULL;
7892             }
7893             PL_expect = XOPERATOR;
7894             pl_yylval.opval = sawparens(words);
7895             TOKEN(QWLIST);
7896         }
7897
7898         case KEY_qq:
7899             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7900             if (!s)
7901                 missingterm(NULL);
7902             pl_yylval.ival = OP_STRINGIFY;
7903             if (SvIVX(PL_lex_stuff) == '\'')
7904                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7905             TERM(sublex_start());
7906
7907         case KEY_qr:
7908             s = scan_pat(s,OP_QR);
7909             TERM(sublex_start());
7910
7911         case KEY_qx:
7912             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7913             if (!s)
7914                 missingterm(NULL);
7915             readpipe_override();
7916             TERM(sublex_start());
7917
7918         case KEY_return:
7919             OLDLOP(OP_RETURN);
7920
7921         case KEY_require:
7922             s = SKIPSPACE1(s);
7923             if (isDIGIT(*s)) {
7924                 s = force_version(s, FALSE);
7925             }
7926             else if (*s != 'v' || !isDIGIT(s[1])
7927                     || (s = force_version(s, TRUE), *s == 'v'))
7928             {
7929                 *PL_tokenbuf = '\0';
7930                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7931                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7932                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7933                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
7934                 else if (*s == '<')
7935                     yyerror("<> should be quotes");
7936             }
7937             if (orig_keyword == KEY_require) {
7938                 orig_keyword = 0;
7939                 pl_yylval.ival = 1;
7940             }
7941             else
7942                 pl_yylval.ival = 0;
7943             PL_expect = XTERM;
7944             PL_bufptr = s;
7945             PL_last_uni = PL_oldbufptr;
7946             PL_last_lop_op = OP_REQUIRE;
7947             s = skipspace(s);
7948             return REPORT( (int)REQUIRE );
7949
7950         case KEY_reset:
7951             UNI(OP_RESET);
7952
7953         case KEY_redo:
7954             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7955             LOOPX(OP_REDO);
7956
7957         case KEY_rename:
7958             LOP(OP_RENAME,XTERM);
7959
7960         case KEY_rand:
7961             UNI(OP_RAND);
7962
7963         case KEY_rmdir:
7964             UNI(OP_RMDIR);
7965
7966         case KEY_rindex:
7967             LOP(OP_RINDEX,XTERM);
7968
7969         case KEY_read:
7970             LOP(OP_READ,XTERM);
7971
7972         case KEY_readdir:
7973             UNI(OP_READDIR);
7974
7975         case KEY_readline:
7976             UNIDOR(OP_READLINE);
7977
7978         case KEY_readpipe:
7979             UNIDOR(OP_BACKTICK);
7980
7981         case KEY_rewinddir:
7982             UNI(OP_REWINDDIR);
7983
7984         case KEY_recv:
7985             LOP(OP_RECV,XTERM);
7986
7987         case KEY_reverse:
7988             LOP(OP_REVERSE,XTERM);
7989
7990         case KEY_readlink:
7991             UNIDOR(OP_READLINK);
7992
7993         case KEY_ref:
7994             UNI(OP_REF);
7995
7996         case KEY_s:
7997             s = scan_subst(s);
7998             if (pl_yylval.opval)
7999                 TERM(sublex_start());
8000             else
8001                 TOKEN(1);       /* force error */
8002
8003         case KEY_say:
8004             checkcomma(s,PL_tokenbuf,"filehandle");
8005             LOP(OP_SAY,XREF);
8006
8007         case KEY_chomp:
8008             UNI(OP_CHOMP);
8009
8010         case KEY_scalar:
8011             UNI(OP_SCALAR);
8012
8013         case KEY_select:
8014             LOP(OP_SELECT,XTERM);
8015
8016         case KEY_seek:
8017             LOP(OP_SEEK,XTERM);
8018
8019         case KEY_semctl:
8020             LOP(OP_SEMCTL,XTERM);
8021
8022         case KEY_semget:
8023             LOP(OP_SEMGET,XTERM);
8024
8025         case KEY_semop:
8026             LOP(OP_SEMOP,XTERM);
8027
8028         case KEY_send:
8029             LOP(OP_SEND,XTERM);
8030
8031         case KEY_setpgrp:
8032             LOP(OP_SETPGRP,XTERM);
8033
8034         case KEY_setpriority:
8035             LOP(OP_SETPRIORITY,XTERM);
8036
8037         case KEY_sethostent:
8038             UNI(OP_SHOSTENT);
8039
8040         case KEY_setnetent:
8041             UNI(OP_SNETENT);
8042
8043         case KEY_setservent:
8044             UNI(OP_SSERVENT);
8045
8046         case KEY_setprotoent:
8047             UNI(OP_SPROTOENT);
8048
8049         case KEY_setpwent:
8050             FUN0(OP_SPWENT);
8051
8052         case KEY_setgrent:
8053             FUN0(OP_SGRENT);
8054
8055         case KEY_seekdir:
8056             LOP(OP_SEEKDIR,XTERM);
8057
8058         case KEY_setsockopt:
8059             LOP(OP_SSOCKOPT,XTERM);
8060
8061         case KEY_shift:
8062             UNIDOR(OP_SHIFT);
8063
8064         case KEY_shmctl:
8065             LOP(OP_SHMCTL,XTERM);
8066
8067         case KEY_shmget:
8068             LOP(OP_SHMGET,XTERM);
8069
8070         case KEY_shmread:
8071             LOP(OP_SHMREAD,XTERM);
8072
8073         case KEY_shmwrite:
8074             LOP(OP_SHMWRITE,XTERM);
8075
8076         case KEY_shutdown:
8077             LOP(OP_SHUTDOWN,XTERM);
8078
8079         case KEY_sin:
8080             UNI(OP_SIN);
8081
8082         case KEY_sleep:
8083             UNI(OP_SLEEP);
8084
8085         case KEY_socket:
8086             LOP(OP_SOCKET,XTERM);
8087
8088         case KEY_socketpair:
8089             LOP(OP_SOCKPAIR,XTERM);
8090
8091         case KEY_sort:
8092             checkcomma(s,PL_tokenbuf,"subroutine name");
8093             s = SKIPSPACE1(s);
8094             PL_expect = XTERM;
8095             s = force_word(s,WORD,TRUE,TRUE,FALSE);
8096             LOP(OP_SORT,XREF);
8097
8098         case KEY_split:
8099             LOP(OP_SPLIT,XTERM);
8100
8101         case KEY_sprintf:
8102             LOP(OP_SPRINTF,XTERM);
8103
8104         case KEY_splice:
8105             LOP(OP_SPLICE,XTERM);
8106
8107         case KEY_sqrt:
8108             UNI(OP_SQRT);
8109
8110         case KEY_srand:
8111             UNI(OP_SRAND);
8112
8113         case KEY_stat:
8114             UNI(OP_STAT);
8115
8116         case KEY_study:
8117             UNI(OP_STUDY);
8118
8119         case KEY_substr:
8120             LOP(OP_SUBSTR,XTERM);
8121
8122         case KEY_format:
8123         case KEY_sub:
8124           really_sub:
8125             {
8126                 char tmpbuf[sizeof PL_tokenbuf];
8127                 SSize_t tboffset = 0;
8128                 expectation attrful;
8129                 bool have_name, have_proto;
8130                 const int key = tmp;
8131
8132 #ifdef PERL_MAD
8133                 SV *tmpwhite = 0;
8134
8135                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8136                 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
8137                 PL_thistoken = 0;
8138
8139                 d = s;
8140                 s = SKIPSPACE2(s,tmpwhite);
8141 #else
8142                 s = skipspace(s);
8143 #endif
8144
8145                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8146                     (*s == ':' && s[1] == ':'))
8147                 {
8148 #ifdef PERL_MAD
8149                     SV *nametoke = NULL;
8150 #endif
8151
8152                     PL_expect = XBLOCK;
8153                     attrful = XATTRBLOCK;
8154                     /* remember buffer pos'n for later force_word */
8155                     tboffset = s - PL_oldbufptr;
8156                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
8157 #ifdef PERL_MAD
8158                     if (PL_madskills)
8159                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8160 #endif
8161                     if (memchr(tmpbuf, ':', len))
8162                         sv_setpvn(PL_subname, tmpbuf, len);
8163                     else {
8164                         sv_setsv(PL_subname,PL_curstname);
8165                         sv_catpvs(PL_subname,"::");
8166                         sv_catpvn(PL_subname,tmpbuf,len);
8167                     }
8168                     if (SvUTF8(PL_linestr))
8169                         SvUTF8_on(PL_subname);
8170                     have_name = TRUE;
8171
8172 #ifdef PERL_MAD
8173
8174                     start_force(0);
8175                     CURMAD('X', nametoke);
8176                     CURMAD('_', tmpwhite);
8177                     (void) force_word(PL_oldbufptr + tboffset, WORD,
8178                                       FALSE, TRUE, TRUE);
8179
8180                     s = SKIPSPACE2(d,tmpwhite);
8181 #else
8182                     s = skipspace(d);
8183 #endif
8184                 }
8185                 else {
8186                     if (key == KEY_my)
8187                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
8188                     PL_expect = XTERMBLOCK;
8189                     attrful = XATTRTERM;
8190                     sv_setpvs(PL_subname,"?");
8191                     have_name = FALSE;
8192                 }
8193
8194                 if (key == KEY_format) {
8195                     if (*s == '=')
8196                         PL_lex_formbrack = PL_lex_brackets + 1;
8197 #ifdef PERL_MAD
8198                     PL_thistoken = subtoken;
8199                     s = d;
8200 #else
8201                     if (have_name)
8202                         (void) force_word(PL_oldbufptr + tboffset, WORD,
8203                                           FALSE, TRUE, TRUE);
8204 #endif
8205                     OPERATOR(FORMAT);
8206                 }
8207
8208                 /* Look for a prototype */
8209                 if (*s == '(') {
8210                     char *p;
8211                     bool bad_proto = FALSE;
8212                     bool in_brackets = FALSE;
8213                     char greedy_proto = ' ';
8214                     bool proto_after_greedy_proto = FALSE;
8215                     bool must_be_last = FALSE;
8216                     bool underscore = FALSE;
8217                     bool seen_underscore = FALSE;
8218                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8219                     STRLEN tmplen;
8220
8221                     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8222                     if (!s)
8223                         Perl_croak(aTHX_ "Prototype not terminated");
8224                     /* strip spaces and check for bad characters */
8225                     d = SvPV(PL_lex_stuff, tmplen);
8226                     tmp = 0;
8227                     for (p = d; tmplen; tmplen--, ++p) {
8228                         if (!isSPACE(*p)) {
8229                             d[tmp++] = *p;
8230
8231                             if (warnillegalproto) {
8232                                 if (must_be_last)
8233                                     proto_after_greedy_proto = TRUE;
8234                                 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8235                                     bad_proto = TRUE;
8236                                 }
8237                                 else {
8238                                     if ( underscore ) {
8239                                         if ( !strchr(";@%", *p) )
8240                                             bad_proto = TRUE;
8241                                         underscore = FALSE;
8242                                     }
8243                                     if ( *p == '[' ) {
8244                                         in_brackets = TRUE;
8245                                     }
8246                                     else if ( *p == ']' ) {
8247                                         in_brackets = FALSE;
8248                                     }
8249                                     else if ( (*p == '@' || *p == '%') &&
8250                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8251                                          !in_brackets ) {
8252                                         must_be_last = TRUE;
8253                                         greedy_proto = *p;
8254                                     }
8255                                     else if ( *p == '_' ) {
8256                                         underscore = seen_underscore = TRUE;
8257                                     }
8258                                 }
8259                             }
8260                         }
8261                     }
8262                     d[tmp] = '\0';
8263                     if (proto_after_greedy_proto)
8264                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8265                                     "Prototype after '%c' for %"SVf" : %s",
8266                                     greedy_proto, SVfARG(PL_subname), d);
8267                     if (bad_proto) {
8268                         SV *dsv = newSVpvs_flags("", SVs_TEMP);
8269                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8270                                     "Illegal character %sin prototype for %"SVf" : %s",
8271                                     seen_underscore ? "after '_' " : "",
8272                                     SVfARG(PL_subname),
8273                                     SvUTF8(PL_lex_stuff)
8274                                         ? sv_uni_display(dsv,
8275                                             newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8276                                             tmp,
8277                                             UNI_DISPLAY_ISPRINT)
8278                                         : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8279                                             PERL_PV_ESCAPE_NONASCII));
8280                     }
8281                     SvCUR_set(PL_lex_stuff, tmp);
8282                     have_proto = TRUE;
8283
8284 #ifdef PERL_MAD
8285                     start_force(0);
8286                     CURMAD('q', PL_thisopen);
8287                     CURMAD('_', tmpwhite);
8288                     CURMAD('=', PL_thisstuff);
8289                     CURMAD('Q', PL_thisclose);
8290                     NEXTVAL_NEXTTOKE.opval =
8291                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8292                     PL_lex_stuff = NULL;
8293                     force_next(THING);
8294
8295                     s = SKIPSPACE2(s,tmpwhite);
8296 #else
8297                     s = skipspace(s);
8298 #endif
8299                 }
8300                 else
8301                     have_proto = FALSE;
8302
8303                 if (*s == ':' && s[1] != ':')
8304                     PL_expect = attrful;
8305                 else if (*s != '{' && key == KEY_sub) {
8306                     if (!have_name)
8307                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8308                     else if (*s != ';' && *s != '}')
8309                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8310                 }
8311
8312 #ifdef PERL_MAD
8313                 start_force(0);
8314                 if (tmpwhite) {
8315                     if (PL_madskills)
8316                         curmad('^', newSVpvs(""));
8317                     CURMAD('_', tmpwhite);
8318                 }
8319                 force_next(0);
8320
8321                 PL_thistoken = subtoken;
8322 #else
8323                 if (have_proto) {
8324                     NEXTVAL_NEXTTOKE.opval =
8325                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8326                     PL_lex_stuff = NULL;
8327                     force_next(THING);
8328                 }
8329 #endif
8330                 if (!have_name) {
8331                     if (PL_curstash)
8332                         sv_setpvs(PL_subname, "__ANON__");
8333                     else
8334                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8335                     TOKEN(ANONSUB);
8336                 }
8337 #ifndef PERL_MAD
8338                 (void) force_word(PL_oldbufptr + tboffset, WORD,
8339                                   FALSE, TRUE, TRUE);
8340 #endif
8341                 if (key == KEY_my)
8342                     TOKEN(MYSUB);
8343                 TOKEN(SUB);
8344             }
8345
8346         case KEY_system:
8347             LOP(OP_SYSTEM,XREF);
8348
8349         case KEY_symlink:
8350             LOP(OP_SYMLINK,XTERM);
8351
8352         case KEY_syscall:
8353             LOP(OP_SYSCALL,XTERM);
8354
8355         case KEY_sysopen:
8356             LOP(OP_SYSOPEN,XTERM);
8357
8358         case KEY_sysseek:
8359             LOP(OP_SYSSEEK,XTERM);
8360
8361         case KEY_sysread:
8362             LOP(OP_SYSREAD,XTERM);
8363
8364         case KEY_syswrite:
8365             LOP(OP_SYSWRITE,XTERM);
8366
8367         case KEY_tr:
8368             s = scan_trans(s);
8369             TERM(sublex_start());
8370
8371         case KEY_tell:
8372             UNI(OP_TELL);
8373
8374         case KEY_telldir:
8375             UNI(OP_TELLDIR);
8376
8377         case KEY_tie:
8378             LOP(OP_TIE,XTERM);
8379
8380         case KEY_tied:
8381             UNI(OP_TIED);
8382
8383         case KEY_time:
8384             FUN0(OP_TIME);
8385
8386         case KEY_times:
8387             FUN0(OP_TMS);
8388
8389         case KEY_truncate:
8390             LOP(OP_TRUNCATE,XTERM);
8391
8392         case KEY_uc:
8393             UNI(OP_UC);
8394
8395         case KEY_ucfirst:
8396             UNI(OP_UCFIRST);
8397
8398         case KEY_untie:
8399             UNI(OP_UNTIE);
8400
8401         case KEY_until:
8402             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8403                 return REPORT(0);
8404             pl_yylval.ival = CopLINE(PL_curcop);
8405             OPERATOR(UNTIL);
8406
8407         case KEY_unless:
8408             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8409                 return REPORT(0);
8410             pl_yylval.ival = CopLINE(PL_curcop);
8411             OPERATOR(UNLESS);
8412
8413         case KEY_unlink:
8414             LOP(OP_UNLINK,XTERM);
8415
8416         case KEY_undef:
8417             UNIDOR(OP_UNDEF);
8418
8419         case KEY_unpack:
8420             LOP(OP_UNPACK,XTERM);
8421
8422         case KEY_utime:
8423             LOP(OP_UTIME,XTERM);
8424
8425         case KEY_umask:
8426             UNIDOR(OP_UMASK);
8427
8428         case KEY_unshift:
8429             LOP(OP_UNSHIFT,XTERM);
8430
8431         case KEY_use:
8432             s = tokenize_use(1, s);
8433             OPERATOR(USE);
8434
8435         case KEY_values:
8436             UNI(OP_VALUES);
8437
8438         case KEY_vec:
8439             LOP(OP_VEC,XTERM);
8440
8441         case KEY_when:
8442             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8443                 return REPORT(0);
8444             pl_yylval.ival = CopLINE(PL_curcop);
8445             OPERATOR(WHEN);
8446
8447         case KEY_while:
8448             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8449                 return REPORT(0);
8450             pl_yylval.ival = CopLINE(PL_curcop);
8451             OPERATOR(WHILE);
8452
8453         case KEY_warn:
8454             PL_hints |= HINT_BLOCK_SCOPE;
8455             LOP(OP_WARN,XTERM);
8456
8457         case KEY_wait:
8458             FUN0(OP_WAIT);
8459
8460         case KEY_waitpid:
8461             LOP(OP_WAITPID,XTERM);
8462
8463         case KEY_wantarray:
8464             FUN0(OP_WANTARRAY);
8465
8466         case KEY_write:
8467 #ifdef EBCDIC
8468         {
8469             char ctl_l[2];
8470             ctl_l[0] = toCTRL('L');
8471             ctl_l[1] = '\0';
8472             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8473         }
8474 #else
8475             /* Make sure $^L is defined */
8476             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8477 #endif
8478             UNI(OP_ENTERWRITE);
8479
8480         case KEY_x:
8481             if (PL_expect == XOPERATOR) {
8482                 if (*s == '=' && !PL_lex_allbrackets &&
8483                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8484                     return REPORT(0);
8485                 Mop(OP_REPEAT);
8486             }
8487             check_uni();
8488             goto just_a_word;
8489
8490         case KEY_xor:
8491             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8492                 return REPORT(0);
8493             pl_yylval.ival = OP_XOR;
8494             OPERATOR(OROP);
8495
8496         case KEY_y:
8497             s = scan_trans(s);
8498             TERM(sublex_start());
8499         }
8500     }}
8501 }
8502 #ifdef __SC__
8503 #pragma segment Main
8504 #endif
8505
8506 static int
8507 S_pending_ident(pTHX)
8508 {
8509     dVAR;
8510     PADOFFSET tmp = 0;
8511     /* pit holds the identifier we read and pending_ident is reset */
8512     char pit = PL_pending_ident;
8513     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8514     /* All routes through this function want to know if there is a colon.  */
8515     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8516     PL_pending_ident = 0;
8517
8518     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8519     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8520           "### Pending identifier '%s'\n", PL_tokenbuf); });
8521
8522     /* if we're in a my(), we can't allow dynamics here.
8523        $foo'bar has already been turned into $foo::bar, so
8524        just check for colons.
8525
8526        if it's a legal name, the OP is a PADANY.
8527     */
8528     if (PL_in_my) {
8529         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8530             if (has_colon)
8531                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8532                                   "variable %s in \"our\"",
8533                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8534             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8535         }
8536         else {
8537             if (has_colon)
8538                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8539                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8540                             UTF ? SVf_UTF8 : 0);
8541
8542             pl_yylval.opval = newOP(OP_PADANY, 0);
8543             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8544                                                         UTF ? SVf_UTF8 : 0);
8545             return PRIVATEREF;
8546         }
8547     }
8548
8549     /*
8550        build the ops for accesses to a my() variable.
8551     */
8552
8553     if (!has_colon) {
8554         if (!PL_in_my)
8555             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8556                                     UTF ? SVf_UTF8 : 0);
8557         if (tmp != NOT_IN_PAD) {
8558             /* might be an "our" variable" */
8559             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8560                 /* build ops for a bareword */
8561                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8562                 HEK * const stashname = HvNAME_HEK(stash);
8563                 SV *  const sym = newSVhek(stashname);
8564                 sv_catpvs(sym, "::");
8565                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8566                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8567                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8568                 gv_fetchsv(sym,
8569                     (PL_in_eval
8570                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8571                         : GV_ADDMULTI
8572                     ),
8573                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8574                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8575                      : SVt_PVHV));
8576                 return WORD;
8577             }
8578
8579             pl_yylval.opval = newOP(OP_PADANY, 0);
8580             pl_yylval.opval->op_targ = tmp;
8581             return PRIVATEREF;
8582         }
8583     }
8584
8585     /*
8586        Whine if they've said @foo in a doublequoted string,
8587        and @foo isn't a variable we can find in the symbol
8588        table.
8589     */
8590     if (ckWARN(WARN_AMBIGUOUS) &&
8591         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8592         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8593                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8594         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8595                 /* DO NOT warn for @- and @+ */
8596                 && !( PL_tokenbuf[2] == '\0' &&
8597                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8598            )
8599         {
8600             /* Downgraded from fatal to warning 20000522 mjd */
8601             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8602                         "Possible unintended interpolation of %"SVf" in string",
8603                         SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8604                                         SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8605         }
8606     }
8607
8608     /* build ops for a bareword */
8609     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
8610                                                       tokenbuf_len - 1,
8611                                                       UTF ? SVf_UTF8 : 0 ));
8612     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8613     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8614                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8615                      | ( UTF ? SVf_UTF8 : 0 ),
8616                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8617                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8618                       : SVt_PVHV));
8619     return WORD;
8620 }
8621
8622 STATIC void
8623 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8624 {
8625     dVAR;
8626
8627     PERL_ARGS_ASSERT_CHECKCOMMA;
8628
8629     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8630         if (ckWARN(WARN_SYNTAX)) {
8631             int level = 1;
8632             const char *w;
8633             for (w = s+2; *w && level; w++) {
8634                 if (*w == '(')
8635                     ++level;
8636                 else if (*w == ')')
8637                     --level;
8638             }
8639             while (isSPACE(*w))
8640                 ++w;
8641             /* the list of chars below is for end of statements or
8642              * block / parens, boolean operators (&&, ||, //) and branch
8643              * constructs (or, and, if, until, unless, while, err, for).
8644              * Not a very solid hack... */
8645             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8646                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8647                             "%s (...) interpreted as function",name);
8648         }
8649     }
8650     while (s < PL_bufend && isSPACE(*s))
8651         s++;
8652     if (*s == '(')
8653         s++;
8654     while (s < PL_bufend && isSPACE(*s))
8655         s++;
8656     if (isIDFIRST_lazy_if(s,UTF)) {
8657         const char * const w = s;
8658         s += UTF ? UTF8SKIP(s) : 1;
8659         while (isALNUM_lazy_if(s,UTF))
8660             s += UTF ? UTF8SKIP(s) : 1;
8661         while (s < PL_bufend && isSPACE(*s))
8662             s++;
8663         if (*s == ',') {
8664             GV* gv;
8665             if (keyword(w, s - w, 0))
8666                 return;
8667
8668             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8669             if (gv && GvCVu(gv))
8670                 return;
8671             Perl_croak(aTHX_ "No comma allowed after %s", what);
8672         }
8673     }
8674 }
8675
8676 /* Either returns sv, or mortalizes sv and returns a new SV*.
8677    Best used as sv=new_constant(..., sv, ...).
8678    If s, pv are NULL, calls subroutine with one argument,
8679    and type is used with error messages only. */
8680
8681 STATIC SV *
8682 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8683                SV *sv, SV *pv, const char *type, STRLEN typelen)
8684 {
8685     dVAR; dSP;
8686     HV * table = GvHV(PL_hintgv);                /* ^H */
8687     SV *res;
8688     SV **cvp;
8689     SV *cv, *typesv;
8690     const char *why1 = "", *why2 = "", *why3 = "";
8691
8692     PERL_ARGS_ASSERT_NEW_CONSTANT;
8693
8694     /* charnames doesn't work well if there have been errors found */
8695     if (PL_error_count > 0 && strEQ(key,"charnames"))
8696         return &PL_sv_undef;
8697
8698     if (!table
8699         || ! (PL_hints & HINT_LOCALIZE_HH)
8700         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8701         || ! SvOK(*cvp))
8702     {
8703         SV *msg;
8704
8705         /* Here haven't found what we're looking for.  If it is charnames,
8706          * perhaps it needs to be loaded.  Try doing that before giving up */
8707         if (strEQ(key,"charnames")) {
8708             Perl_load_module(aTHX_
8709                             0,
8710                             newSVpvs("_charnames"),
8711                              /* version parameter; no need to specify it, as if
8712                               * we get too early a version, will fail anyway,
8713                               * not being able to find '_charnames' */
8714                             NULL,
8715                             newSVpvs(":full"),
8716                             newSVpvs(":short"),
8717                             NULL);
8718             SPAGAIN;
8719             table = GvHV(PL_hintgv);
8720             if (table
8721                 && (PL_hints & HINT_LOCALIZE_HH)
8722                 && (cvp = hv_fetch(table, key, keylen, FALSE))
8723                 && SvOK(*cvp))
8724             {
8725                 goto now_ok;
8726             }
8727         }
8728         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8729             msg = Perl_newSVpvf(aTHX_
8730                             "Constant(%s) unknown", (type ? type: "undef"));
8731         }
8732         else {
8733         why1 = "$^H{";
8734         why2 = key;
8735         why3 = "} is not defined";
8736     report:
8737         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8738                             (type ? type: "undef"), why1, why2, why3);
8739         }
8740         yyerror(SvPVX_const(msg));
8741         SvREFCNT_dec(msg);
8742         return sv;
8743     }
8744 now_ok:
8745     sv_2mortal(sv);                     /* Parent created it permanently */
8746     cv = *cvp;
8747     if (!pv && s)
8748         pv = newSVpvn_flags(s, len, SVs_TEMP);
8749     if (type && pv)
8750         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8751     else
8752         typesv = &PL_sv_undef;
8753
8754     PUSHSTACKi(PERLSI_OVERLOAD);
8755     ENTER ;
8756     SAVETMPS;
8757
8758     PUSHMARK(SP) ;
8759     EXTEND(sp, 3);
8760     if (pv)
8761         PUSHs(pv);
8762     PUSHs(sv);
8763     if (pv)
8764         PUSHs(typesv);
8765     PUTBACK;
8766     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8767
8768     SPAGAIN ;
8769
8770     /* Check the eval first */
8771     if (!PL_in_eval && SvTRUE(ERRSV)) {
8772         sv_catpvs(ERRSV, "Propagated");
8773         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8774         (void)POPs;
8775         res = SvREFCNT_inc_simple(sv);
8776     }
8777     else {
8778         res = POPs;
8779         SvREFCNT_inc_simple_void(res);
8780     }
8781
8782     PUTBACK ;
8783     FREETMPS ;
8784     LEAVE ;
8785     POPSTACK;
8786
8787     if (!SvOK(res)) {
8788         why1 = "Call to &{$^H{";
8789         why2 = key;
8790         why3 = "}} did not return a defined value";
8791         sv = res;
8792         goto report;
8793     }
8794
8795     return res;
8796 }
8797
8798 /* Returns a NUL terminated string, with the length of the string written to
8799    *slp
8800    */
8801 STATIC char *
8802 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8803 {
8804     dVAR;
8805     register char *d = dest;
8806     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8807
8808     PERL_ARGS_ASSERT_SCAN_WORD;
8809
8810     for (;;) {
8811         if (d >= e)
8812             Perl_croak(aTHX_ ident_too_long);
8813         if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s)))   /* UTF handled below */
8814             *d++ = *s++;
8815         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8816             *d++ = ':';
8817             *d++ = ':';
8818             s++;
8819         }
8820         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8821             *d++ = *s++;
8822             *d++ = *s++;
8823         }
8824         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8825             char *t = s + UTF8SKIP(s);
8826             size_t len;
8827             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8828                 t += UTF8SKIP(t);
8829             len = t - s;
8830             if (d + len > e)
8831                 Perl_croak(aTHX_ ident_too_long);
8832             Copy(s, d, len, char);
8833             d += len;
8834             s = t;
8835         }
8836         else {
8837             *d = '\0';
8838             *slp = d - dest;
8839             return s;
8840         }
8841     }
8842 }
8843
8844 STATIC char *
8845 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8846 {
8847     dVAR;
8848     char *bracket = NULL;
8849     char funny = *s++;
8850     register char *d = dest;
8851     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8852
8853     PERL_ARGS_ASSERT_SCAN_IDENT;
8854
8855     if (isSPACE(*s))
8856         s = PEEKSPACE(s);
8857     if (isDIGIT(*s)) {
8858         while (isDIGIT(*s)) {
8859             if (d >= e)
8860                 Perl_croak(aTHX_ ident_too_long);
8861             *d++ = *s++;
8862         }
8863     }
8864     else {
8865         for (;;) {
8866             if (d >= e)
8867                 Perl_croak(aTHX_ ident_too_long);
8868             if (isALNUM(*s))    /* UTF handled below */
8869                 *d++ = *s++;
8870             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8871                 *d++ = ':';
8872                 *d++ = ':';
8873                 s++;
8874             }
8875             else if (*s == ':' && s[1] == ':') {
8876                 *d++ = *s++;
8877                 *d++ = *s++;
8878             }
8879             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8880                 char *t = s + UTF8SKIP(s);
8881                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8882                     t += UTF8SKIP(t);
8883                 if (d + (t - s) > e)
8884                     Perl_croak(aTHX_ ident_too_long);
8885                 Copy(s, d, t - s, char);
8886                 d += t - s;
8887                 s = t;
8888             }
8889             else
8890                 break;
8891         }
8892     }
8893     *d = '\0';
8894     d = dest;
8895     if (*d) {
8896         if (PL_lex_state != LEX_NORMAL)
8897             PL_lex_state = LEX_INTERPENDMAYBE;
8898         return s;
8899     }
8900     if (*s == '$' && s[1] &&
8901         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8902     {
8903         return s;
8904     }
8905     if (*s == '{') {
8906         bracket = s;
8907         s++;
8908     }
8909     if (s < send) {
8910         if (UTF) {
8911             const STRLEN skip = UTF8SKIP(s);
8912             STRLEN i;
8913             d[skip] = '\0';
8914             for ( i = 0; i < skip; i++ )
8915                 d[i] = *s++;
8916         }
8917         else {
8918             *d = *s++;
8919             d[1] = '\0';
8920         }
8921     }
8922     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8923         *d = toCTRL(*s);
8924         s++;
8925     }
8926     else if (ck_uni && !bracket)
8927         check_uni();
8928     if (bracket) {
8929         if (isSPACE(s[-1])) {
8930             while (s < send) {
8931                 const char ch = *s++;
8932                 if (!SPACE_OR_TAB(ch)) {
8933                     *d = ch;
8934                     break;
8935                 }
8936             }
8937         }
8938         if (isIDFIRST_lazy_if(d,UTF)) {
8939             d += UTF8SKIP(d);
8940             if (UTF) {
8941                 char *end = s;
8942                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8943                     end += UTF8SKIP(end);
8944                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8945                         end += UTF8SKIP(end);
8946                 }
8947                 Copy(s, d, end - s, char);
8948                 d += end - s;
8949                 s = end;
8950             }
8951             else {
8952                 while ((isALNUM(*s) || *s == ':') && d < e)
8953                     *d++ = *s++;
8954                 if (d >= e)
8955                     Perl_croak(aTHX_ ident_too_long);
8956             }
8957             *d = '\0';
8958             while (s < send && SPACE_OR_TAB(*s))
8959                 s++;
8960             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8961                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8962                     const char * const brack =
8963                         (const char *)
8964                         ((*s == '[') ? "[...]" : "{...}");
8965    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8966                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8967                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8968                         funny, dest, brack, funny, dest, brack);
8969                 }
8970                 bracket++;
8971                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8972                 PL_lex_allbrackets++;
8973                 return s;
8974             }
8975         }
8976         /* Handle extended ${^Foo} variables
8977          * 1999-02-27 mjd-perl-patch@plover.com */
8978         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8979                  && isALNUM(*s))
8980         {
8981             d++;
8982             while (isALNUM(*s) && d < e) {
8983                 *d++ = *s++;
8984             }
8985             if (d >= e)
8986                 Perl_croak(aTHX_ ident_too_long);
8987             *d = '\0';
8988         }
8989         if (*s == '}') {
8990             s++;
8991             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8992                 PL_lex_state = LEX_INTERPEND;
8993                 PL_expect = XREF;
8994             }
8995             if (PL_lex_state == LEX_NORMAL) {
8996                 if (ckWARN(WARN_AMBIGUOUS) &&
8997                     (keyword(dest, d - dest, 0)
8998                      || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
8999                 {
9000                     SV *tmp = newSVpvn_flags( dest, d - dest,
9001                                             SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
9002                     if (funny == '#')
9003                         funny = '@';
9004                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9005                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9006                         funny, tmp, funny, tmp);
9007                 }
9008             }
9009         }
9010         else {
9011             s = bracket;                /* let the parser handle it */
9012             *dest = '\0';
9013         }
9014     }
9015     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9016         PL_lex_state = LEX_INTERPEND;
9017     return s;
9018 }
9019
9020 static bool
9021 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9022
9023     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9024      * the parse starting at 's', based on the subset that are valid in this
9025      * context input to this routine in 'valid_flags'. Advances s.  Returns
9026      * TRUE if the input should be treated as a valid flag, so the next char
9027      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9028      * first call on the current regex.  This routine will set it to any
9029      * charset modifier found.  The caller shouldn't change it.  This way,
9030      * another charset modifier encountered in the parse can be detected as an
9031      * error, as we have decided to allow only one */
9032
9033     const char c = **s;
9034     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9035
9036     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9037         if (isALNUM_lazy_if(*s, UTF)) {
9038             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9039                        UTF ? SVf_UTF8 : 0);
9040             (*s) += charlen;
9041             /* Pretend that it worked, so will continue processing before
9042              * dieing */
9043             return TRUE;
9044         }
9045         return FALSE;
9046     }
9047
9048     switch (c) {
9049
9050         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9051         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9052         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9053         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9054         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9055         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9056         case LOCALE_PAT_MOD:
9057             if (*charset) {
9058                 goto multiple_charsets;
9059             }
9060             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9061             *charset = c;
9062             break;
9063         case UNICODE_PAT_MOD:
9064             if (*charset) {
9065                 goto multiple_charsets;
9066             }
9067             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9068             *charset = c;
9069             break;
9070         case ASCII_RESTRICT_PAT_MOD:
9071             if (! *charset) {
9072                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9073             }
9074             else {
9075
9076                 /* Error if previous modifier wasn't an 'a', but if it was, see
9077                  * if, and accept, a second occurrence (only) */
9078                 if (*charset != 'a'
9079                     || get_regex_charset(*pmfl)
9080                         != REGEX_ASCII_RESTRICTED_CHARSET)
9081                 {
9082                         goto multiple_charsets;
9083                 }
9084                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9085             }
9086             *charset = c;
9087             break;
9088         case DEPENDS_PAT_MOD:
9089             if (*charset) {
9090                 goto multiple_charsets;
9091             }
9092             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9093             *charset = c;
9094             break;
9095     }
9096
9097     (*s)++;
9098     return TRUE;
9099
9100     multiple_charsets:
9101         if (*charset != c) {
9102             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9103         }
9104         else if (c == 'a') {
9105             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9106         }
9107         else {
9108             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9109         }
9110
9111         /* Pretend that it worked, so will continue processing before dieing */
9112         (*s)++;
9113         return TRUE;
9114 }
9115
9116 STATIC char *
9117 S_scan_pat(pTHX_ char *start, I32 type)
9118 {
9119     dVAR;
9120     PMOP *pm;
9121     char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
9122     const char * const valid_flags =
9123         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9124     char charset = '\0';    /* character set modifier */
9125 #ifdef PERL_MAD
9126     char *modstart;
9127 #endif
9128
9129     PERL_ARGS_ASSERT_SCAN_PAT;
9130
9131     /* this was only needed for the initial scan_str; set it to false
9132      * so that any (?{}) code blocks etc are parsed normally */
9133     PL_reg_state.re_reparsing = FALSE;
9134     if (!s) {
9135         const char * const delimiter = skipspace(start);
9136         Perl_croak(aTHX_
9137                    (const char *)
9138                    (*delimiter == '?'
9139                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9140                     : "Search pattern not terminated" ));
9141     }
9142
9143     pm = (PMOP*)newPMOP(type, 0);
9144     if (PL_multi_open == '?') {
9145         /* This is the only point in the code that sets PMf_ONCE:  */
9146         pm->op_pmflags |= PMf_ONCE;
9147
9148         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9149            allows us to restrict the list needed by reset to just the ??
9150            matches.  */
9151         assert(type != OP_TRANS);
9152         if (PL_curstash) {
9153             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9154             U32 elements;
9155             if (!mg) {
9156                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9157                                  0);
9158             }
9159             elements = mg->mg_len / sizeof(PMOP**);
9160             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9161             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9162             mg->mg_len = elements * sizeof(PMOP**);
9163             PmopSTASH_set(pm,PL_curstash);
9164         }
9165     }
9166 #ifdef PERL_MAD
9167     modstart = s;
9168 #endif
9169
9170     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9171      * anon CV. False positives like qr/[(?{]/ are harmless */
9172
9173     if (type == OP_QR) {
9174         STRLEN len;
9175         char *e, *p = SvPV(PL_lex_stuff, len);
9176         e = p + len;
9177         for (; p < e; p++) {
9178             if (p[0] == '(' && p[1] == '?'
9179                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9180             {
9181                 pm->op_pmflags |= PMf_HAS_CV;
9182                 break;
9183             }
9184         }
9185         pm->op_pmflags |= PMf_IS_QR;
9186     }
9187
9188     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9189 #ifdef PERL_MAD
9190     if (PL_madskills && modstart != s) {
9191         SV* tmptoken = newSVpvn(modstart, s - modstart);
9192         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9193     }
9194 #endif
9195     /* issue a warning if /c is specified,but /g is not */
9196     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9197     {
9198         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9199                        "Use of /c modifier is meaningless without /g" );
9200     }
9201
9202     PL_lex_op = (OP*)pm;
9203     pl_yylval.ival = OP_MATCH;
9204     return s;
9205 }
9206
9207 STATIC char *
9208 S_scan_subst(pTHX_ char *start)
9209 {
9210     dVAR;
9211     char *s;
9212     register PMOP *pm;
9213     I32 first_start;
9214     I32 es = 0;
9215     char charset = '\0';    /* character set modifier */
9216 #ifdef PERL_MAD
9217     char *modstart;
9218 #endif
9219
9220     PERL_ARGS_ASSERT_SCAN_SUBST;
9221
9222     pl_yylval.ival = OP_NULL;
9223
9224     s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9225
9226     if (!s)
9227         Perl_croak(aTHX_ "Substitution pattern not terminated");
9228
9229     if (s[-1] == PL_multi_open)
9230         s--;
9231 #ifdef PERL_MAD
9232     if (PL_madskills) {
9233         CURMAD('q', PL_thisopen);
9234         CURMAD('_', PL_thiswhite);
9235         CURMAD('E', PL_thisstuff);
9236         CURMAD('Q', PL_thisclose);
9237         PL_realtokenstart = s - SvPVX(PL_linestr);
9238     }
9239 #endif
9240
9241     first_start = PL_multi_start;
9242     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9243     if (!s) {
9244         if (PL_lex_stuff) {
9245             SvREFCNT_dec(PL_lex_stuff);
9246             PL_lex_stuff = NULL;
9247         }
9248         Perl_croak(aTHX_ "Substitution replacement not terminated");
9249     }
9250     PL_multi_start = first_start;       /* so whole substitution is taken together */
9251
9252     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9253
9254 #ifdef PERL_MAD
9255     if (PL_madskills) {
9256         CURMAD('z', PL_thisopen);
9257         CURMAD('R', PL_thisstuff);
9258         CURMAD('Z', PL_thisclose);
9259     }
9260     modstart = s;
9261 #endif
9262
9263     while (*s) {
9264         if (*s == EXEC_PAT_MOD) {
9265             s++;
9266             es++;
9267         }
9268         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9269         {
9270             break;
9271         }
9272     }
9273
9274 #ifdef PERL_MAD
9275     if (PL_madskills) {
9276         if (modstart != s)
9277             curmad('m', newSVpvn(modstart, s - modstart));
9278         append_madprops(PL_thismad, (OP*)pm, 0);
9279         PL_thismad = 0;
9280     }
9281 #endif
9282     if ((pm->op_pmflags & PMf_CONTINUE)) {
9283         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9284     }
9285
9286     if (es) {
9287         SV * const repl = newSVpvs("");
9288
9289         PL_sublex_info.super_bufptr = s;
9290         PL_sublex_info.super_bufend = PL_bufend;
9291         PL_multi_end = 0;
9292         pm->op_pmflags |= PMf_EVAL;
9293         while (es-- > 0) {
9294             if (es)
9295                 sv_catpvs(repl, "eval ");
9296             else
9297                 sv_catpvs(repl, "do ");
9298         }
9299         sv_catpvs(repl, "{");
9300         sv_catsv(repl, PL_lex_repl);
9301         if (strchr(SvPVX(PL_lex_repl), '#'))
9302             sv_catpvs(repl, "\n");
9303         sv_catpvs(repl, "}");
9304         SvEVALED_on(repl);
9305         SvREFCNT_dec(PL_lex_repl);
9306         PL_lex_repl = repl;
9307     }
9308
9309     PL_lex_op = (OP*)pm;
9310     pl_yylval.ival = OP_SUBST;
9311     return s;
9312 }
9313
9314 STATIC char *
9315 S_scan_trans(pTHX_ char *start)
9316 {
9317     dVAR;
9318     register char* s;
9319     OP *o;
9320     U8 squash;
9321     U8 del;
9322     U8 complement;
9323     bool nondestruct = 0;
9324 #ifdef PERL_MAD
9325     char *modstart;
9326 #endif
9327
9328     PERL_ARGS_ASSERT_SCAN_TRANS;
9329
9330     pl_yylval.ival = OP_NULL;
9331
9332     s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9333     if (!s)
9334         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9335
9336     if (s[-1] == PL_multi_open)
9337         s--;
9338 #ifdef PERL_MAD
9339     if (PL_madskills) {
9340         CURMAD('q', PL_thisopen);
9341         CURMAD('_', PL_thiswhite);
9342         CURMAD('E', PL_thisstuff);
9343         CURMAD('Q', PL_thisclose);
9344         PL_realtokenstart = s - SvPVX(PL_linestr);
9345     }
9346 #endif
9347
9348     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9349     if (!s) {
9350         if (PL_lex_stuff) {
9351             SvREFCNT_dec(PL_lex_stuff);
9352             PL_lex_stuff = NULL;
9353         }
9354         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9355     }
9356     if (PL_madskills) {
9357         CURMAD('z', PL_thisopen);
9358         CURMAD('R', PL_thisstuff);
9359         CURMAD('Z', PL_thisclose);
9360     }
9361
9362     complement = del = squash = 0;
9363 #ifdef PERL_MAD
9364     modstart = s;
9365 #endif
9366     while (1) {
9367         switch (*s) {
9368         case 'c':
9369             complement = OPpTRANS_COMPLEMENT;
9370             break;
9371         case 'd':
9372             del = OPpTRANS_DELETE;
9373             break;
9374         case 's':
9375             squash = OPpTRANS_SQUASH;
9376             break;
9377         case 'r':
9378             nondestruct = 1;
9379             break;
9380         default:
9381             goto no_more;
9382         }
9383         s++;
9384     }
9385   no_more:
9386
9387     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9388     o->op_private &= ~OPpTRANS_ALL;
9389     o->op_private |= del|squash|complement|
9390       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9391       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
9392
9393     PL_lex_op = o;
9394     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9395
9396 #ifdef PERL_MAD
9397     if (PL_madskills) {
9398         if (modstart != s)
9399             curmad('m', newSVpvn(modstart, s - modstart));
9400         append_madprops(PL_thismad, o, 0);
9401         PL_thismad = 0;
9402     }
9403 #endif
9404
9405     return s;
9406 }
9407
9408 STATIC char *
9409 S_scan_heredoc(pTHX_ register char *s)
9410 {
9411     dVAR;
9412     SV *herewas;
9413     I32 op_type = OP_SCALAR;
9414     I32 len;
9415     SV *tmpstr;
9416     char term;
9417     const char *found_newline;
9418     register char *d;
9419     register char *e;
9420     char *peek;
9421     const int outer = (PL_rsfp || PL_parser->filtered)
9422                    && !(PL_lex_inwhat == OP_SCALAR);
9423 #ifdef PERL_MAD
9424     I32 stuffstart = s - SvPVX(PL_linestr);
9425     char *tstart;
9426
9427     PL_realtokenstart = -1;
9428 #endif
9429
9430     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9431
9432     s += 2;
9433     d = PL_tokenbuf;
9434     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9435     if (!outer)
9436         *d++ = '\n';
9437     peek = s;
9438     while (SPACE_OR_TAB(*peek))
9439         peek++;
9440     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9441         s = peek;
9442         term = *s++;
9443         s = delimcpy(d, e, s, PL_bufend, term, &len);
9444         d += len;
9445         if (s < PL_bufend)
9446             s++;
9447     }
9448     else {
9449         if (*s == '\\')
9450             s++, term = '\'';
9451         else
9452             term = '"';
9453         if (!isALNUM_lazy_if(s,UTF))
9454             deprecate("bare << to mean <<\"\"");
9455         for (; isALNUM_lazy_if(s,UTF); s++) {
9456             if (d < e)
9457                 *d++ = *s;
9458         }
9459     }
9460     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9461         Perl_croak(aTHX_ "Delimiter for here document is too long");
9462     *d++ = '\n';
9463     *d = '\0';
9464     len = d - PL_tokenbuf;
9465
9466 #ifdef PERL_MAD
9467     if (PL_madskills) {
9468         tstart = PL_tokenbuf + !outer;
9469         PL_thisclose = newSVpvn(tstart, len - !outer);
9470         tstart = SvPVX(PL_linestr) + stuffstart;
9471         PL_thisopen = newSVpvn(tstart, s - tstart);
9472         stuffstart = s - SvPVX(PL_linestr);
9473     }
9474 #endif
9475 #ifndef PERL_STRICT_CR
9476     d = strchr(s, '\r');
9477     if (d) {
9478         char * const olds = s;
9479         s = d;
9480         while (s < PL_bufend) {
9481             if (*s == '\r') {
9482                 *d++ = '\n';
9483                 if (*++s == '\n')
9484                     s++;
9485             }
9486             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9487                 *d++ = *s++;
9488                 s++;
9489             }
9490             else
9491                 *d++ = *s++;
9492         }
9493         *d = '\0';
9494         PL_bufend = d;
9495         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9496         s = olds;
9497     }
9498 #endif
9499 #ifdef PERL_MAD
9500     found_newline = 0;
9501 #endif
9502     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9503         herewas = newSVpvn(s,PL_bufend-s);
9504     }
9505     else {
9506 #ifdef PERL_MAD
9507         herewas = newSVpvn(s-1,found_newline-s+1);
9508 #else
9509         s--;
9510         herewas = newSVpvn(s,found_newline-s);
9511 #endif
9512     }
9513 #ifdef PERL_MAD
9514     if (PL_madskills) {
9515         tstart = SvPVX(PL_linestr) + stuffstart;
9516         if (PL_thisstuff)
9517             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9518         else
9519             PL_thisstuff = newSVpvn(tstart, s - tstart);
9520     }
9521 #endif
9522     s += SvCUR(herewas);
9523
9524 #ifdef PERL_MAD
9525     stuffstart = s - SvPVX(PL_linestr);
9526
9527     if (found_newline)
9528         s--;
9529 #endif
9530
9531     tmpstr = newSV_type(SVt_PVIV);
9532     SvGROW(tmpstr, 80);
9533     if (term == '\'') {
9534         op_type = OP_CONST;
9535         SvIV_set(tmpstr, -1);
9536     }
9537     else if (term == '`') {
9538         op_type = OP_BACKTICK;
9539         SvIV_set(tmpstr, '\\');
9540     }
9541
9542     CLINE;
9543     PL_multi_start = CopLINE(PL_curcop);
9544     PL_multi_open = PL_multi_close = '<';
9545     term = *PL_tokenbuf;
9546     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp
9547      && !PL_parser->filtered) {
9548         char * const bufptr = PL_sublex_info.super_bufptr;
9549         char * const bufend = PL_sublex_info.super_bufend;
9550         char * const olds = s - SvCUR(herewas);
9551         s = strchr(bufptr, '\n');
9552         if (!s)
9553             s = bufend;
9554         d = s;
9555         while (s < bufend &&
9556           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9557             if (*s++ == '\n')
9558                 CopLINE_inc(PL_curcop);
9559         }
9560         if (s >= bufend) {
9561             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9562             missingterm(PL_tokenbuf);
9563         }
9564         sv_setpvn(herewas,bufptr,d-bufptr+1);
9565         sv_setpvn(tmpstr,d+1,s-d);
9566         s += len - 1;
9567         sv_catpvn(herewas,s,bufend-s);
9568         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9569
9570         s = olds;
9571         goto retval;
9572     }
9573     else if (!outer) {
9574         d = s;
9575         while (s < PL_bufend &&
9576           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9577             if (*s++ == '\n')
9578                 CopLINE_inc(PL_curcop);
9579         }
9580         if (s >= PL_bufend) {
9581             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9582             missingterm(PL_tokenbuf);
9583         }
9584         sv_setpvn(tmpstr,d+1,s-d);
9585 #ifdef PERL_MAD
9586         if (PL_madskills) {
9587             if (PL_thisstuff)
9588                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9589             else
9590                 PL_thisstuff = newSVpvn(d + 1, s - d);
9591             stuffstart = s - SvPVX(PL_linestr);
9592         }
9593 #endif
9594         s += len - 1;
9595         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9596
9597         sv_catpvn(herewas,s,PL_bufend-s);
9598         sv_setsv(PL_linestr,herewas);
9599         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9600         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9601         PL_last_lop = PL_last_uni = NULL;
9602     }
9603     else
9604         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9605     while (s >= PL_bufend) {    /* multiple line string? */
9606 #ifdef PERL_MAD
9607         if (PL_madskills) {
9608             tstart = SvPVX(PL_linestr) + stuffstart;
9609             if (PL_thisstuff)
9610                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9611             else
9612                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9613         }
9614 #endif
9615         PL_bufptr = s;
9616         CopLINE_inc(PL_curcop);
9617         if (!outer || !lex_next_chunk(0)) {
9618             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9619             missingterm(PL_tokenbuf);
9620         }
9621         CopLINE_dec(PL_curcop);
9622         s = PL_bufptr;
9623 #ifdef PERL_MAD
9624         stuffstart = s - SvPVX(PL_linestr);
9625 #endif
9626         CopLINE_inc(PL_curcop);
9627         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9628         PL_last_lop = PL_last_uni = NULL;
9629 #ifndef PERL_STRICT_CR
9630         if (PL_bufend - PL_linestart >= 2) {
9631             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9632                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9633             {
9634                 PL_bufend[-2] = '\n';
9635                 PL_bufend--;
9636                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9637             }
9638             else if (PL_bufend[-1] == '\r')
9639                 PL_bufend[-1] = '\n';
9640         }
9641         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9642             PL_bufend[-1] = '\n';
9643 #endif
9644         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9645             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9646             *(SvPVX(PL_linestr) + off ) = ' ';
9647             lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
9648             sv_catsv(PL_linestr,herewas);
9649             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9650             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9651         }
9652         else {
9653             s = PL_bufend;
9654             sv_catsv(tmpstr,PL_linestr);
9655         }
9656     }
9657     s++;
9658 retval:
9659     PL_multi_end = CopLINE(PL_curcop);
9660     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9661         SvPV_shrink_to_cur(tmpstr);
9662     }
9663     SvREFCNT_dec(herewas);
9664     if (!IN_BYTES) {
9665         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9666             SvUTF8_on(tmpstr);
9667         else if (PL_encoding)
9668             sv_recode_to_utf8(tmpstr, PL_encoding);
9669     }
9670     PL_lex_stuff = tmpstr;
9671     pl_yylval.ival = op_type;
9672     return s;
9673 }
9674
9675 /* scan_inputsymbol
9676    takes: current position in input buffer
9677    returns: new position in input buffer
9678    side-effects: pl_yylval and lex_op are set.
9679
9680    This code handles:
9681
9682    <>           read from ARGV
9683    <FH>         read from filehandle
9684    <pkg::FH>    read from package qualified filehandle
9685    <pkg'FH>     read from package qualified filehandle
9686    <$fh>        read from filehandle in $fh
9687    <*.h>        filename glob
9688
9689 */
9690
9691 STATIC char *
9692 S_scan_inputsymbol(pTHX_ char *start)
9693 {
9694     dVAR;
9695     register char *s = start;           /* current position in buffer */
9696     char *end;
9697     I32 len;
9698     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9699     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9700
9701     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9702
9703     end = strchr(s, '\n');
9704     if (!end)
9705         end = PL_bufend;
9706     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9707
9708     /* die if we didn't have space for the contents of the <>,
9709        or if it didn't end, or if we see a newline
9710     */
9711
9712     if (len >= (I32)sizeof PL_tokenbuf)
9713         Perl_croak(aTHX_ "Excessively long <> operator");
9714     if (s >= end)
9715         Perl_croak(aTHX_ "Unterminated <> operator");
9716
9717     s++;
9718
9719     /* check for <$fh>
9720        Remember, only scalar variables are interpreted as filehandles by
9721        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9722        treated as a glob() call.
9723        This code makes use of the fact that except for the $ at the front,
9724        a scalar variable and a filehandle look the same.
9725     */
9726     if (*d == '$' && d[1]) d++;
9727
9728     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9729     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9730         d += UTF ? UTF8SKIP(d) : 1;
9731
9732     /* If we've tried to read what we allow filehandles to look like, and
9733        there's still text left, then it must be a glob() and not a getline.
9734        Use scan_str to pull out the stuff between the <> and treat it
9735        as nothing more than a string.
9736     */
9737
9738     if (d - PL_tokenbuf != len) {
9739         pl_yylval.ival = OP_GLOB;
9740         s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9741         if (!s)
9742            Perl_croak(aTHX_ "Glob not terminated");
9743         return s;
9744     }
9745     else {
9746         bool readline_overriden = FALSE;
9747         GV *gv_readline;
9748         GV **gvp;
9749         /* we're in a filehandle read situation */
9750         d = PL_tokenbuf;
9751
9752         /* turn <> into <ARGV> */
9753         if (!len)
9754             Copy("ARGV",d,5,char);
9755
9756         /* Check whether readline() is overriden */
9757         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9758         if ((gv_readline
9759                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9760                 ||
9761                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9762                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9763                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9764             readline_overriden = TRUE;
9765
9766         /* if <$fh>, create the ops to turn the variable into a
9767            filehandle
9768         */
9769         if (*d == '$') {
9770             /* try to find it in the pad for this block, otherwise find
9771                add symbol table ops
9772             */
9773             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9774             if (tmp != NOT_IN_PAD) {
9775                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9776                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9777                     HEK * const stashname = HvNAME_HEK(stash);
9778                     SV * const sym = sv_2mortal(newSVhek(stashname));
9779                     sv_catpvs(sym, "::");
9780                     sv_catpv(sym, d+1);
9781                     d = SvPVX(sym);
9782                     goto intro_sym;
9783                 }
9784                 else {
9785                     OP * const o = newOP(OP_PADSV, 0);
9786                     o->op_targ = tmp;
9787                     PL_lex_op = readline_overriden
9788                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9789                                 op_append_elem(OP_LIST, o,
9790                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9791                         : (OP*)newUNOP(OP_READLINE, 0, o);
9792                 }
9793             }
9794             else {
9795                 GV *gv;
9796                 ++d;
9797 intro_sym:
9798                 gv = gv_fetchpv(d,
9799                                 (PL_in_eval
9800                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9801                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
9802                                 SVt_PV);
9803                 PL_lex_op = readline_overriden
9804                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9805                             op_append_elem(OP_LIST,
9806                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9807                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9808                     : (OP*)newUNOP(OP_READLINE, 0,
9809                             newUNOP(OP_RV2SV, 0,
9810                                 newGVOP(OP_GV, 0, gv)));
9811             }
9812             if (!readline_overriden)
9813                 PL_lex_op->op_flags |= OPf_SPECIAL;
9814             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9815             pl_yylval.ival = OP_NULL;
9816         }
9817
9818         /* If it's none of the above, it must be a literal filehandle
9819            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9820         else {
9821             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9822             PL_lex_op = readline_overriden
9823                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9824                         op_append_elem(OP_LIST,
9825                             newGVOP(OP_GV, 0, gv),
9826                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9827                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9828             pl_yylval.ival = OP_NULL;
9829         }
9830     }
9831
9832     return s;
9833 }
9834
9835
9836 /* scan_str
9837    takes: start position in buffer
9838           keep_quoted preserve \ on the embedded delimiter(s)
9839           keep_delims preserve the delimiters around the string
9840           re_reparse  compiling a run-time /(?{})/:
9841                         collapse // to /,  and skip encoding src
9842    returns: position to continue reading from buffer
9843    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9844         updates the read buffer.
9845
9846    This subroutine pulls a string out of the input.  It is called for:
9847         q               single quotes           q(literal text)
9848         '               single quotes           'literal text'
9849         qq              double quotes           qq(interpolate $here please)
9850         "               double quotes           "interpolate $here please"
9851         qx              backticks               qx(/bin/ls -l)
9852         `               backticks               `/bin/ls -l`
9853         qw              quote words             @EXPORT_OK = qw( func() $spam )
9854         m//             regexp match            m/this/
9855         s///            regexp substitute       s/this/that/
9856         tr///           string transliterate    tr/this/that/
9857         y///            string transliterate    y/this/that/
9858         ($*@)           sub prototypes          sub foo ($)
9859         (stuff)         sub attr parameters     sub foo : attr(stuff)
9860         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9861
9862    In most of these cases (all but <>, patterns and transliterate)
9863    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9864    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9865    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9866    calls scan_str().
9867
9868    It skips whitespace before the string starts, and treats the first
9869    character as the delimiter.  If the delimiter is one of ([{< then
9870    the corresponding "close" character )]}> is used as the closing
9871    delimiter.  It allows quoting of delimiters, and if the string has
9872    balanced delimiters ([{<>}]) it allows nesting.
9873
9874    On success, the SV with the resulting string is put into lex_stuff or,
9875    if that is already non-NULL, into lex_repl. The second case occurs only
9876    when parsing the RHS of the special constructs s/// and tr/// (y///).
9877    For convenience, the terminating delimiter character is stuffed into
9878    SvIVX of the SV.
9879 */
9880
9881 STATIC char *
9882 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
9883 {
9884     dVAR;
9885     SV *sv;                             /* scalar value: string */
9886     const char *tmps;                   /* temp string, used for delimiter matching */
9887     register char *s = start;           /* current position in the buffer */
9888     register char term;                 /* terminating character */
9889     register char *to;                  /* current position in the sv's data */
9890     I32 brackets = 1;                   /* bracket nesting level */
9891     bool has_utf8 = FALSE;              /* is there any utf8 content? */
9892     I32 termcode;                       /* terminating char. code */
9893     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
9894     STRLEN termlen;                     /* length of terminating string */
9895     int last_off = 0;                   /* last position for nesting bracket */
9896 #ifdef PERL_MAD
9897     int stuffstart;
9898     char *tstart;
9899 #endif
9900
9901     PERL_ARGS_ASSERT_SCAN_STR;
9902
9903     /* skip space before the delimiter */
9904     if (isSPACE(*s)) {
9905         s = PEEKSPACE(s);
9906     }
9907
9908 #ifdef PERL_MAD
9909     if (PL_realtokenstart >= 0) {
9910         stuffstart = PL_realtokenstart;
9911         PL_realtokenstart = -1;
9912     }
9913     else
9914         stuffstart = start - SvPVX(PL_linestr);
9915 #endif
9916     /* mark where we are, in case we need to report errors */
9917     CLINE;
9918
9919     /* after skipping whitespace, the next character is the terminator */
9920     term = *s;
9921     if (!UTF) {
9922         termcode = termstr[0] = term;
9923         termlen = 1;
9924     }
9925     else {
9926         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9927         Copy(s, termstr, termlen, U8);
9928         if (!UTF8_IS_INVARIANT(term))
9929             has_utf8 = TRUE;
9930     }
9931
9932     /* mark where we are */
9933     PL_multi_start = CopLINE(PL_curcop);
9934     PL_multi_open = term;
9935
9936     /* find corresponding closing delimiter */
9937     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9938         termcode = termstr[0] = term = tmps[5];
9939
9940     PL_multi_close = term;
9941
9942     /* create a new SV to hold the contents.  79 is the SV's initial length.
9943        What a random number. */
9944     sv = newSV_type(SVt_PVIV);
9945     SvGROW(sv, 80);
9946     SvIV_set(sv, termcode);
9947     (void)SvPOK_only(sv);               /* validate pointer */
9948
9949     /* move past delimiter and try to read a complete string */
9950     if (keep_delims)
9951         sv_catpvn(sv, s, termlen);
9952     s += termlen;
9953 #ifdef PERL_MAD
9954     tstart = SvPVX(PL_linestr) + stuffstart;
9955     if (!PL_thisopen && !keep_delims) {
9956         PL_thisopen = newSVpvn(tstart, s - tstart);
9957         stuffstart = s - SvPVX(PL_linestr);
9958     }
9959 #endif
9960     for (;;) {
9961         if (PL_encoding && !UTF && !re_reparse) {
9962             bool cont = TRUE;
9963
9964             while (cont) {
9965                 int offset = s - SvPVX_const(PL_linestr);
9966                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9967                                            &offset, (char*)termstr, termlen);
9968                 const char * const ns = SvPVX_const(PL_linestr) + offset;
9969                 char * const svlast = SvEND(sv) - 1;
9970
9971                 for (; s < ns; s++) {
9972                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9973                         CopLINE_inc(PL_curcop);
9974                 }
9975                 if (!found)
9976                     goto read_more_line;
9977                 else {
9978                     /* handle quoted delimiters */
9979                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9980                         const char *t;
9981                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9982                             t--;
9983                         if ((svlast-1 - t) % 2) {
9984                             if (!keep_quoted) {
9985                                 *(svlast-1) = term;
9986                                 *svlast = '\0';
9987                                 SvCUR_set(sv, SvCUR(sv) - 1);
9988                             }
9989                             continue;
9990                         }
9991                     }
9992                     if (PL_multi_open == PL_multi_close) {
9993                         cont = FALSE;
9994                     }
9995                     else {
9996                         const char *t;
9997                         char *w;
9998                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9999                             /* At here, all closes are "was quoted" one,
10000                                so we don't check PL_multi_close. */
10001                             if (*t == '\\') {
10002                                 if (!keep_quoted && *(t+1) == PL_multi_open)
10003                                     t++;
10004                                 else
10005                                     *w++ = *t++;
10006                             }
10007                             else if (*t == PL_multi_open)
10008                                 brackets++;
10009
10010                             *w = *t;
10011                         }
10012                         if (w < t) {
10013                             *w++ = term;
10014                             *w = '\0';
10015                             SvCUR_set(sv, w - SvPVX_const(sv));
10016                         }
10017                         last_off = w - SvPVX(sv);
10018                         if (--brackets <= 0)
10019                             cont = FALSE;
10020                     }
10021                 }
10022             }
10023             if (!keep_delims) {
10024                 SvCUR_set(sv, SvCUR(sv) - 1);
10025                 *SvEND(sv) = '\0';
10026             }
10027             break;
10028         }
10029
10030         /* extend sv if need be */
10031         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10032         /* set 'to' to the next character in the sv's string */
10033         to = SvPVX(sv)+SvCUR(sv);
10034
10035         /* if open delimiter is the close delimiter read unbridle */
10036         if (PL_multi_open == PL_multi_close) {
10037             for (; s < PL_bufend; s++,to++) {
10038                 /* embedded newlines increment the current line number */
10039                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10040                     CopLINE_inc(PL_curcop);
10041                 /* handle quoted delimiters */
10042                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10043                     if (!keep_quoted
10044                         && (s[1] == term
10045                             || (re_reparse && s[1] == '\\'))
10046                     )
10047                         s++;
10048                     /* any other quotes are simply copied straight through */
10049                     else
10050                         *to++ = *s++;
10051                 }
10052                 /* terminate when run out of buffer (the for() condition), or
10053                    have found the terminator */
10054                 else if (*s == term) {
10055                     if (termlen == 1)
10056                         break;
10057                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10058                         break;
10059                 }
10060                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10061                     has_utf8 = TRUE;
10062                 *to = *s;
10063             }
10064         }
10065
10066         /* if the terminator isn't the same as the start character (e.g.,
10067            matched brackets), we have to allow more in the quoting, and
10068            be prepared for nested brackets.
10069         */
10070         else {
10071             /* read until we run out of string, or we find the terminator */
10072             for (; s < PL_bufend; s++,to++) {
10073                 /* embedded newlines increment the line count */
10074                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10075                     CopLINE_inc(PL_curcop);
10076                 /* backslashes can escape the open or closing characters */
10077                 if (*s == '\\' && s+1 < PL_bufend) {
10078                     if (!keep_quoted &&
10079                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10080                         s++;
10081                     else
10082                         *to++ = *s++;
10083                 }
10084                 /* allow nested opens and closes */
10085                 else if (*s == PL_multi_close && --brackets <= 0)
10086                     break;
10087                 else if (*s == PL_multi_open)
10088                     brackets++;
10089                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10090                     has_utf8 = TRUE;
10091                 *to = *s;
10092             }
10093         }
10094         /* terminate the copied string and update the sv's end-of-string */
10095         *to = '\0';
10096         SvCUR_set(sv, to - SvPVX_const(sv));
10097
10098         /*
10099          * this next chunk reads more into the buffer if we're not done yet
10100          */
10101
10102         if (s < PL_bufend)
10103             break;              /* handle case where we are done yet :-) */
10104
10105 #ifndef PERL_STRICT_CR
10106         if (to - SvPVX_const(sv) >= 2) {
10107             if ((to[-2] == '\r' && to[-1] == '\n') ||
10108                 (to[-2] == '\n' && to[-1] == '\r'))
10109             {
10110                 to[-2] = '\n';
10111                 to--;
10112                 SvCUR_set(sv, to - SvPVX_const(sv));
10113             }
10114             else if (to[-1] == '\r')
10115                 to[-1] = '\n';
10116         }
10117         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10118             to[-1] = '\n';
10119 #endif
10120
10121      read_more_line:
10122         /* if we're out of file, or a read fails, bail and reset the current
10123            line marker so we can report where the unterminated string began
10124         */
10125 #ifdef PERL_MAD
10126         if (PL_madskills) {
10127             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10128             if (PL_thisstuff)
10129                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10130             else
10131                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10132         }
10133 #endif
10134         CopLINE_inc(PL_curcop);
10135         PL_bufptr = PL_bufend;
10136         if (!lex_next_chunk(0)) {
10137             sv_free(sv);
10138             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10139             return NULL;
10140         }
10141         s = PL_bufptr;
10142 #ifdef PERL_MAD
10143         stuffstart = 0;
10144 #endif
10145     }
10146
10147     /* at this point, we have successfully read the delimited string */
10148
10149     if (!PL_encoding || UTF || re_reparse) {
10150 #ifdef PERL_MAD
10151         if (PL_madskills) {
10152             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10153             const int len = s - tstart;
10154             if (PL_thisstuff)
10155                 sv_catpvn(PL_thisstuff, tstart, len);
10156             else
10157                 PL_thisstuff = newSVpvn(tstart, len);
10158             if (!PL_thisclose && !keep_delims)
10159                 PL_thisclose = newSVpvn(s,termlen);
10160         }
10161 #endif
10162
10163         if (keep_delims)
10164             sv_catpvn(sv, s, termlen);
10165         s += termlen;
10166     }
10167 #ifdef PERL_MAD
10168     else {
10169         if (PL_madskills) {
10170             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10171             const int len = s - tstart - termlen;
10172             if (PL_thisstuff)
10173                 sv_catpvn(PL_thisstuff, tstart, len);
10174             else
10175                 PL_thisstuff = newSVpvn(tstart, len);
10176             if (!PL_thisclose && !keep_delims)
10177                 PL_thisclose = newSVpvn(s - termlen,termlen);
10178         }
10179     }
10180 #endif
10181     if (has_utf8 || (PL_encoding && !re_reparse))
10182         SvUTF8_on(sv);
10183
10184     PL_multi_end = CopLINE(PL_curcop);
10185
10186     /* if we allocated too much space, give some back */
10187     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10188         SvLEN_set(sv, SvCUR(sv) + 1);
10189         SvPV_renew(sv, SvLEN(sv));
10190     }
10191
10192     /* decide whether this is the first or second quoted string we've read
10193        for this op
10194     */
10195
10196     if (PL_lex_stuff)
10197         PL_lex_repl = sv;
10198     else
10199         PL_lex_stuff = sv;
10200     return s;
10201 }
10202
10203 /*
10204   scan_num
10205   takes: pointer to position in buffer
10206   returns: pointer to new position in buffer
10207   side-effects: builds ops for the constant in pl_yylval.op
10208
10209   Read a number in any of the formats that Perl accepts:
10210
10211   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10212   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10213   0b[01](_?[01])*
10214   0[0-7](_?[0-7])*
10215   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10216
10217   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10218   thing it reads.
10219
10220   If it reads a number without a decimal point or an exponent, it will
10221   try converting the number to an integer and see if it can do so
10222   without loss of precision.
10223 */
10224
10225 char *
10226 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10227 {
10228     dVAR;
10229     register const char *s = start;     /* current position in buffer */
10230     register char *d;                   /* destination in temp buffer */
10231     register char *e;                   /* end of temp buffer */
10232     NV nv;                              /* number read, as a double */
10233     SV *sv = NULL;                      /* place to put the converted number */
10234     bool floatit;                       /* boolean: int or float? */
10235     const char *lastub = NULL;          /* position of last underbar */
10236     static char const number_too_long[] = "Number too long";
10237
10238     PERL_ARGS_ASSERT_SCAN_NUM;
10239
10240     /* We use the first character to decide what type of number this is */
10241
10242     switch (*s) {
10243     default:
10244         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10245
10246     /* if it starts with a 0, it could be an octal number, a decimal in
10247        0.13 disguise, or a hexadecimal number, or a binary number. */
10248     case '0':
10249         {
10250           /* variables:
10251              u          holds the "number so far"
10252              shift      the power of 2 of the base
10253                         (hex == 4, octal == 3, binary == 1)
10254              overflowed was the number more than we can hold?
10255
10256              Shift is used when we add a digit.  It also serves as an "are
10257              we in octal/hex/binary?" indicator to disallow hex characters
10258              when in octal mode.
10259            */
10260             NV n = 0.0;
10261             UV u = 0;
10262             I32 shift;
10263             bool overflowed = FALSE;
10264             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10265             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10266             static const char* const bases[5] =
10267               { "", "binary", "", "octal", "hexadecimal" };
10268             static const char* const Bases[5] =
10269               { "", "Binary", "", "Octal", "Hexadecimal" };
10270             static const char* const maxima[5] =
10271               { "",
10272                 "0b11111111111111111111111111111111",
10273                 "",
10274                 "037777777777",
10275                 "0xffffffff" };
10276             const char *base, *Base, *max;
10277
10278             /* check for hex */
10279             if (s[1] == 'x' || s[1] == 'X') {
10280                 shift = 4;
10281                 s += 2;
10282                 just_zero = FALSE;
10283             } else if (s[1] == 'b' || s[1] == 'B') {
10284                 shift = 1;
10285                 s += 2;
10286                 just_zero = FALSE;
10287             }
10288             /* check for a decimal in disguise */
10289             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10290                 goto decimal;
10291             /* so it must be octal */
10292             else {
10293                 shift = 3;
10294                 s++;
10295             }
10296
10297             if (*s == '_') {
10298                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10299                                "Misplaced _ in number");
10300                lastub = s++;
10301             }
10302
10303             base = bases[shift];
10304             Base = Bases[shift];
10305             max  = maxima[shift];
10306
10307             /* read the rest of the number */
10308             for (;;) {
10309                 /* x is used in the overflow test,
10310                    b is the digit we're adding on. */
10311                 UV x, b;
10312
10313                 switch (*s) {
10314
10315                 /* if we don't mention it, we're done */
10316                 default:
10317                     goto out;
10318
10319                 /* _ are ignored -- but warned about if consecutive */
10320                 case '_':
10321                     if (lastub && s == lastub + 1)
10322                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10323                                        "Misplaced _ in number");
10324                     lastub = s++;
10325                     break;
10326
10327                 /* 8 and 9 are not octal */
10328                 case '8': case '9':
10329                     if (shift == 3)
10330                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10331                     /* FALL THROUGH */
10332
10333                 /* octal digits */
10334                 case '2': case '3': case '4':
10335                 case '5': case '6': case '7':
10336                     if (shift == 1)
10337                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10338                     /* FALL THROUGH */
10339
10340                 case '0': case '1':
10341                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10342                     goto digit;
10343
10344                 /* hex digits */
10345                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10346                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10347                     /* make sure they said 0x */
10348                     if (shift != 4)
10349                         goto out;
10350                     b = (*s++ & 7) + 9;
10351
10352                     /* Prepare to put the digit we have onto the end
10353                        of the number so far.  We check for overflows.
10354                     */
10355
10356                   digit:
10357                     just_zero = FALSE;
10358                     if (!overflowed) {
10359                         x = u << shift; /* make room for the digit */
10360
10361                         if ((x >> shift) != u
10362                             && !(PL_hints & HINT_NEW_BINARY)) {
10363                             overflowed = TRUE;
10364                             n = (NV) u;
10365                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10366                                              "Integer overflow in %s number",
10367                                              base);
10368                         } else
10369                             u = x | b;          /* add the digit to the end */
10370                     }
10371                     if (overflowed) {
10372                         n *= nvshift[shift];
10373                         /* If an NV has not enough bits in its
10374                          * mantissa to represent an UV this summing of
10375                          * small low-order numbers is a waste of time
10376                          * (because the NV cannot preserve the
10377                          * low-order bits anyway): we could just
10378                          * remember when did we overflow and in the
10379                          * end just multiply n by the right
10380                          * amount. */
10381                         n += (NV) b;
10382                     }
10383                     break;
10384                 }
10385             }
10386
10387           /* if we get here, we had success: make a scalar value from
10388              the number.
10389           */
10390           out:
10391
10392             /* final misplaced underbar check */
10393             if (s[-1] == '_') {
10394                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10395             }
10396
10397             if (overflowed) {
10398                 if (n > 4294967295.0)
10399                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10400                                    "%s number > %s non-portable",
10401                                    Base, max);
10402                 sv = newSVnv(n);
10403             }
10404             else {
10405 #if UVSIZE > 4
10406                 if (u > 0xffffffff)
10407                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10408                                    "%s number > %s non-portable",
10409                                    Base, max);
10410 #endif
10411                 sv = newSVuv(u);
10412             }
10413             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10414                 sv = new_constant(start, s - start, "integer",
10415                                   sv, NULL, NULL, 0);
10416             else if (PL_hints & HINT_NEW_BINARY)
10417                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10418         }
10419         break;
10420
10421     /*
10422       handle decimal numbers.
10423       we're also sent here when we read a 0 as the first digit
10424     */
10425     case '1': case '2': case '3': case '4': case '5':
10426     case '6': case '7': case '8': case '9': case '.':
10427       decimal:
10428         d = PL_tokenbuf;
10429         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10430         floatit = FALSE;
10431
10432         /* read next group of digits and _ and copy into d */
10433         while (isDIGIT(*s) || *s == '_') {
10434             /* skip underscores, checking for misplaced ones
10435                if -w is on
10436             */
10437             if (*s == '_') {
10438                 if (lastub && s == lastub + 1)
10439                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10440                                    "Misplaced _ in number");
10441                 lastub = s++;
10442             }
10443             else {
10444                 /* check for end of fixed-length buffer */
10445                 if (d >= e)
10446                     Perl_croak(aTHX_ number_too_long);
10447                 /* if we're ok, copy the character */
10448                 *d++ = *s++;
10449             }
10450         }
10451
10452         /* final misplaced underbar check */
10453         if (lastub && s == lastub + 1) {
10454             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10455         }
10456
10457         /* read a decimal portion if there is one.  avoid
10458            3..5 being interpreted as the number 3. followed
10459            by .5
10460         */
10461         if (*s == '.' && s[1] != '.') {
10462             floatit = TRUE;
10463             *d++ = *s++;
10464
10465             if (*s == '_') {
10466                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10467                                "Misplaced _ in number");
10468                 lastub = s;
10469             }
10470
10471             /* copy, ignoring underbars, until we run out of digits.
10472             */
10473             for (; isDIGIT(*s) || *s == '_'; s++) {
10474                 /* fixed length buffer check */
10475                 if (d >= e)
10476                     Perl_croak(aTHX_ number_too_long);
10477                 if (*s == '_') {
10478                    if (lastub && s == lastub + 1)
10479                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10480                                       "Misplaced _ in number");
10481                    lastub = s;
10482                 }
10483                 else
10484                     *d++ = *s;
10485             }
10486             /* fractional part ending in underbar? */
10487             if (s[-1] == '_') {
10488                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10489                                "Misplaced _ in number");
10490             }
10491             if (*s == '.' && isDIGIT(s[1])) {
10492                 /* oops, it's really a v-string, but without the "v" */
10493                 s = start;
10494                 goto vstring;
10495             }
10496         }
10497
10498         /* read exponent part, if present */
10499         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10500             floatit = TRUE;
10501             s++;
10502
10503             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10504             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10505
10506             /* stray preinitial _ */
10507             if (*s == '_') {
10508                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10509                                "Misplaced _ in number");
10510                 lastub = s++;
10511             }
10512
10513             /* allow positive or negative exponent */
10514             if (*s == '+' || *s == '-')
10515                 *d++ = *s++;
10516
10517             /* stray initial _ */
10518             if (*s == '_') {
10519                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10520                                "Misplaced _ in number");
10521                 lastub = s++;
10522             }
10523
10524             /* read digits of exponent */
10525             while (isDIGIT(*s) || *s == '_') {
10526                 if (isDIGIT(*s)) {
10527                     if (d >= e)
10528                         Perl_croak(aTHX_ number_too_long);
10529                     *d++ = *s++;
10530                 }
10531                 else {
10532                    if (((lastub && s == lastub + 1) ||
10533                         (!isDIGIT(s[1]) && s[1] != '_')))
10534                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10535                                       "Misplaced _ in number");
10536                    lastub = s++;
10537                 }
10538             }
10539         }
10540
10541
10542         /*
10543            We try to do an integer conversion first if no characters
10544            indicating "float" have been found.
10545          */
10546
10547         if (!floatit) {
10548             UV uv;
10549             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10550
10551             if (flags == IS_NUMBER_IN_UV) {
10552               if (uv <= IV_MAX)
10553                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10554               else
10555                 sv = newSVuv(uv);
10556             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10557               if (uv <= (UV) IV_MIN)
10558                 sv = newSViv(-(IV)uv);
10559               else
10560                 floatit = TRUE;
10561             } else
10562               floatit = TRUE;
10563         }
10564         if (floatit) {
10565             /* terminate the string */
10566             *d = '\0';
10567             nv = Atof(PL_tokenbuf);
10568             sv = newSVnv(nv);
10569         }
10570
10571         if ( floatit
10572              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10573             const char *const key = floatit ? "float" : "integer";
10574             const STRLEN keylen = floatit ? 5 : 7;
10575             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10576                                 key, keylen, sv, NULL, NULL, 0);
10577         }
10578         break;
10579
10580     /* if it starts with a v, it could be a v-string */
10581     case 'v':
10582 vstring:
10583                 sv = newSV(5); /* preallocate storage space */
10584                 s = scan_vstring(s, PL_bufend, sv);
10585         break;
10586     }
10587
10588     /* make the op for the constant and return */
10589
10590     if (sv)
10591         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10592     else
10593         lvalp->opval = NULL;
10594
10595     return (char *)s;
10596 }
10597
10598 STATIC char *
10599 S_scan_formline(pTHX_ register char *s)
10600 {
10601     dVAR;
10602     register char *eol;
10603     register char *t;
10604     SV * const stuff = newSVpvs("");
10605     bool needargs = FALSE;
10606     bool eofmt = FALSE;
10607 #ifdef PERL_MAD
10608     char *tokenstart = s;
10609     SV* savewhite = NULL;
10610
10611     if (PL_madskills) {
10612         savewhite = PL_thiswhite;
10613         PL_thiswhite = 0;
10614     }
10615 #endif
10616
10617     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10618
10619     while (!needargs) {
10620         if (*s == '.') {
10621             t = s+1;
10622 #ifdef PERL_STRICT_CR
10623             while (SPACE_OR_TAB(*t))
10624                 t++;
10625 #else
10626             while (SPACE_OR_TAB(*t) || *t == '\r')
10627                 t++;
10628 #endif
10629             if (*t == '\n' || t == PL_bufend) {
10630                 eofmt = TRUE;
10631                 break;
10632             }
10633         }
10634         if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
10635             eol = (char *) memchr(s,'\n',PL_bufend-s);
10636             if (!eol++)
10637                 eol = PL_bufend;
10638         }
10639         else
10640             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10641         if (*s != '#') {
10642             for (t = s; t < eol; t++) {
10643                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10644                     needargs = FALSE;
10645                     goto enough;        /* ~~ must be first line in formline */
10646                 }
10647                 if (*t == '@' || *t == '^')
10648                     needargs = TRUE;
10649             }
10650             if (eol > s) {
10651                 sv_catpvn(stuff, s, eol-s);
10652 #ifndef PERL_STRICT_CR
10653                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10654                     char *end = SvPVX(stuff) + SvCUR(stuff);
10655                     end[-2] = '\n';
10656                     end[-1] = '\0';
10657                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10658                 }
10659 #endif
10660             }
10661             else
10662               break;
10663         }
10664         s = (char*)eol;
10665         if (PL_rsfp || PL_parser->filtered) {
10666             bool got_some;
10667 #ifdef PERL_MAD
10668             if (PL_madskills) {
10669                 if (PL_thistoken)
10670                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10671                 else
10672                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10673             }
10674 #endif
10675             PL_bufptr = PL_bufend;
10676             CopLINE_inc(PL_curcop);
10677             got_some = lex_next_chunk(0);
10678             CopLINE_dec(PL_curcop);
10679             s = PL_bufptr;
10680 #ifdef PERL_MAD
10681             tokenstart = PL_bufptr;
10682 #endif
10683             if (!got_some)
10684                 break;
10685         }
10686         incline(s);
10687     }
10688   enough:
10689     if (SvCUR(stuff)) {
10690         PL_expect = XTERM;
10691         if (needargs) {
10692             PL_lex_state = LEX_NORMAL;
10693             start_force(PL_curforce);
10694             NEXTVAL_NEXTTOKE.ival = 0;
10695             force_next(',');
10696         }
10697         else
10698             PL_lex_state = LEX_FORMLINE;
10699         if (!IN_BYTES) {
10700             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10701                 SvUTF8_on(stuff);
10702             else if (PL_encoding)
10703                 sv_recode_to_utf8(stuff, PL_encoding);
10704         }
10705         start_force(PL_curforce);
10706         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10707         force_next(THING);
10708         start_force(PL_curforce);
10709         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10710         force_next(LSTOP);
10711     }
10712     else {
10713         SvREFCNT_dec(stuff);
10714         if (eofmt)
10715             PL_lex_formbrack = 0;
10716         PL_bufptr = s;
10717     }
10718 #ifdef PERL_MAD
10719     if (PL_madskills) {
10720         if (PL_thistoken)
10721             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10722         else
10723             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10724         PL_thiswhite = savewhite;
10725     }
10726 #endif
10727     return s;
10728 }
10729
10730 I32
10731 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10732 {
10733     dVAR;
10734     const I32 oldsavestack_ix = PL_savestack_ix;
10735     CV* const outsidecv = PL_compcv;
10736
10737     if (PL_compcv) {
10738         assert(SvTYPE(PL_compcv) == SVt_PVCV);
10739     }
10740     SAVEI32(PL_subline);
10741     save_item(PL_subname);
10742     SAVESPTR(PL_compcv);
10743
10744     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10745     CvFLAGS(PL_compcv) |= flags;
10746
10747     PL_subline = CopLINE(PL_curcop);
10748     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10749     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10750     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10751
10752     return oldsavestack_ix;
10753 }
10754
10755 #ifdef __SC__
10756 #pragma segment Perl_yylex
10757 #endif
10758 static int
10759 S_yywarn(pTHX_ const char *const s, U32 flags)
10760 {
10761     dVAR;
10762
10763     PERL_ARGS_ASSERT_YYWARN;
10764
10765     PL_in_eval |= EVAL_WARNONLY;
10766     yyerror_pv(s, flags);
10767     PL_in_eval &= ~EVAL_WARNONLY;
10768     return 0;
10769 }
10770
10771 int
10772 Perl_yyerror(pTHX_ const char *const s)
10773 {
10774     PERL_ARGS_ASSERT_YYERROR;
10775     return yyerror_pvn(s, strlen(s), 0);
10776 }
10777
10778 int
10779 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10780 {
10781     PERL_ARGS_ASSERT_YYERROR_PV;
10782     return yyerror_pvn(s, strlen(s), flags);
10783 }
10784
10785 int
10786 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10787 {
10788     dVAR;
10789     const char *context = NULL;
10790     int contlen = -1;
10791     SV *msg;
10792     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10793     int yychar  = PL_parser->yychar;
10794     U32 is_utf8 = flags & SVf_UTF8;
10795
10796     PERL_ARGS_ASSERT_YYERROR_PVN;
10797
10798     if (!yychar || (yychar == ';' && !PL_rsfp))
10799         sv_catpvs(where_sv, "at EOF");
10800     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10801       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10802       PL_oldbufptr != PL_bufptr) {
10803         /*
10804                 Only for NetWare:
10805                 The code below is removed for NetWare because it abends/crashes on NetWare
10806                 when the script has error such as not having the closing quotes like:
10807                     if ($var eq "value)
10808                 Checking of white spaces is anyway done in NetWare code.
10809         */
10810 #ifndef NETWARE
10811         while (isSPACE(*PL_oldoldbufptr))
10812             PL_oldoldbufptr++;
10813 #endif
10814         context = PL_oldoldbufptr;
10815         contlen = PL_bufptr - PL_oldoldbufptr;
10816     }
10817     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10818       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10819         /*
10820                 Only for NetWare:
10821                 The code below is removed for NetWare because it abends/crashes on NetWare
10822                 when the script has error such as not having the closing quotes like:
10823                     if ($var eq "value)
10824                 Checking of white spaces is anyway done in NetWare code.
10825         */
10826 #ifndef NETWARE
10827         while (isSPACE(*PL_oldbufptr))
10828             PL_oldbufptr++;
10829 #endif
10830         context = PL_oldbufptr;
10831         contlen = PL_bufptr - PL_oldbufptr;
10832     }
10833     else if (yychar > 255)
10834         sv_catpvs(where_sv, "next token ???");
10835     else if (yychar == -2) { /* YYEMPTY */
10836         if (PL_lex_state == LEX_NORMAL ||
10837            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10838             sv_catpvs(where_sv, "at end of line");
10839         else if (PL_lex_inpat)
10840             sv_catpvs(where_sv, "within pattern");
10841         else
10842             sv_catpvs(where_sv, "within string");
10843     }
10844     else {
10845         sv_catpvs(where_sv, "next char ");
10846         if (yychar < 32)
10847             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10848         else if (isPRINT_LC(yychar)) {
10849             const char string = yychar;
10850             sv_catpvn(where_sv, &string, 1);
10851         }
10852         else
10853             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10854     }
10855     msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
10856     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10857         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10858     if (context)
10859         Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
10860                             SVfARG(newSVpvn_flags(context, contlen,
10861                                         SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
10862     else
10863         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
10864     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10865         Perl_sv_catpvf(aTHX_ msg,
10866         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10867                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10868         PL_multi_end = 0;
10869     }
10870     if (PL_in_eval & EVAL_WARNONLY) {
10871         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10872     }
10873     else
10874         qerror(msg);
10875     if (PL_error_count >= 10) {
10876         if (PL_in_eval && SvCUR(ERRSV))
10877             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10878                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
10879         else
10880             Perl_croak(aTHX_ "%s has too many errors.\n",
10881             OutCopFILE(PL_curcop));
10882     }
10883     PL_in_my = 0;
10884     PL_in_my_stash = NULL;
10885     return 0;
10886 }
10887 #ifdef __SC__
10888 #pragma segment Main
10889 #endif
10890
10891 STATIC char*
10892 S_swallow_bom(pTHX_ U8 *s)
10893 {
10894     dVAR;
10895     const STRLEN slen = SvCUR(PL_linestr);
10896
10897     PERL_ARGS_ASSERT_SWALLOW_BOM;
10898
10899     switch (s[0]) {
10900     case 0xFF:
10901         if (s[1] == 0xFE) {
10902             /* UTF-16 little-endian? (or UTF-32LE?) */
10903             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10904                 /* diag_listed_as: Unsupported script encoding %s */
10905                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10906 #ifndef PERL_NO_UTF16_FILTER
10907             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10908             s += 2;
10909             if (PL_bufend > (char*)s) {
10910                 s = add_utf16_textfilter(s, TRUE);
10911             }
10912 #else
10913             /* diag_listed_as: Unsupported script encoding %s */
10914             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10915 #endif
10916         }
10917         break;
10918     case 0xFE:
10919         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10920 #ifndef PERL_NO_UTF16_FILTER
10921             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10922             s += 2;
10923             if (PL_bufend > (char *)s) {
10924                 s = add_utf16_textfilter(s, FALSE);
10925             }
10926 #else
10927             /* diag_listed_as: Unsupported script encoding %s */
10928             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10929 #endif
10930         }
10931         break;
10932     case 0xEF:
10933         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10934             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10935             s += 3;                      /* UTF-8 */
10936         }
10937         break;
10938     case 0:
10939         if (slen > 3) {
10940              if (s[1] == 0) {
10941                   if (s[2] == 0xFE && s[3] == 0xFF) {
10942                        /* UTF-32 big-endian */
10943                        /* diag_listed_as: Unsupported script encoding %s */
10944                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10945                   }
10946              }
10947              else if (s[2] == 0 && s[3] != 0) {
10948                   /* Leading bytes
10949                    * 00 xx 00 xx
10950                    * are a good indicator of UTF-16BE. */
10951 #ifndef PERL_NO_UTF16_FILTER
10952                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10953                   s = add_utf16_textfilter(s, FALSE);
10954 #else
10955                   /* diag_listed_as: Unsupported script encoding %s */
10956                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10957 #endif
10958              }
10959         }
10960 #ifdef EBCDIC
10961     case 0xDD:
10962         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10963             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10964             s += 4;                      /* UTF-8 */
10965         }
10966         break;
10967 #endif
10968
10969     default:
10970          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10971                   /* Leading bytes
10972                    * xx 00 xx 00
10973                    * are a good indicator of UTF-16LE. */
10974 #ifndef PERL_NO_UTF16_FILTER
10975               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10976               s = add_utf16_textfilter(s, TRUE);
10977 #else
10978               /* diag_listed_as: Unsupported script encoding %s */
10979               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10980 #endif
10981          }
10982     }
10983     return (char*)s;
10984 }
10985
10986
10987 #ifndef PERL_NO_UTF16_FILTER
10988 static I32
10989 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10990 {
10991     dVAR;
10992     SV *const filter = FILTER_DATA(idx);
10993     /* We re-use this each time round, throwing the contents away before we
10994        return.  */
10995     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10996     SV *const utf8_buffer = filter;
10997     IV status = IoPAGE(filter);
10998     const bool reverse = cBOOL(IoLINES(filter));
10999     I32 retval;
11000
11001     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11002
11003     /* As we're automatically added, at the lowest level, and hence only called
11004        from this file, we can be sure that we're not called in block mode. Hence
11005        don't bother writing code to deal with block mode.  */
11006     if (maxlen) {
11007         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11008     }
11009     if (status < 0) {
11010         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11011     }
11012     DEBUG_P(PerlIO_printf(Perl_debug_log,
11013                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11014                           FPTR2DPTR(void *, S_utf16_textfilter),
11015                           reverse ? 'l' : 'b', idx, maxlen, status,
11016                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11017
11018     while (1) {
11019         STRLEN chars;
11020         STRLEN have;
11021         I32 newlen;
11022         U8 *end;
11023         /* First, look in our buffer of existing UTF-8 data:  */
11024         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11025
11026         if (nl) {
11027             ++nl;
11028         } else if (status == 0) {
11029             /* EOF */
11030             IoPAGE(filter) = 0;
11031             nl = SvEND(utf8_buffer);
11032         }
11033         if (nl) {
11034             STRLEN got = nl - SvPVX(utf8_buffer);
11035             /* Did we have anything to append?  */
11036             retval = got != 0;
11037             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11038             /* Everything else in this code works just fine if SVp_POK isn't
11039                set.  This, however, needs it, and we need it to work, else
11040                we loop infinitely because the buffer is never consumed.  */
11041             sv_chop(utf8_buffer, nl);
11042             break;
11043         }
11044
11045         /* OK, not a complete line there, so need to read some more UTF-16.
11046            Read an extra octect if the buffer currently has an odd number. */
11047         while (1) {
11048             if (status <= 0)
11049                 break;
11050             if (SvCUR(utf16_buffer) >= 2) {
11051                 /* Location of the high octet of the last complete code point.
11052                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11053                    *coupled* with all the benefits of partial reads and
11054                    endianness.  */
11055                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11056                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11057
11058                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11059                     break;
11060                 }
11061
11062                 /* We have the first half of a surrogate. Read more.  */
11063                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11064             }
11065
11066             status = FILTER_READ(idx + 1, utf16_buffer,
11067                                  160 + (SvCUR(utf16_buffer) & 1));
11068             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11069             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11070             if (status < 0) {
11071                 /* Error */
11072                 IoPAGE(filter) = status;
11073                 return status;
11074             }
11075         }
11076
11077         chars = SvCUR(utf16_buffer) >> 1;
11078         have = SvCUR(utf8_buffer);
11079         SvGROW(utf8_buffer, have + chars * 3 + 1);
11080
11081         if (reverse) {
11082             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11083                                          (U8*)SvPVX_const(utf8_buffer) + have,
11084                                          chars * 2, &newlen);
11085         } else {
11086             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11087                                 (U8*)SvPVX_const(utf8_buffer) + have,
11088                                 chars * 2, &newlen);
11089         }
11090         SvCUR_set(utf8_buffer, have + newlen);
11091         *end = '\0';
11092
11093         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11094            it's private to us, and utf16_to_utf8{,reversed} take a
11095            (pointer,length) pair, rather than a NUL-terminated string.  */
11096         if(SvCUR(utf16_buffer) & 1) {
11097             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11098             SvCUR_set(utf16_buffer, 1);
11099         } else {
11100             SvCUR_set(utf16_buffer, 0);
11101         }
11102     }
11103     DEBUG_P(PerlIO_printf(Perl_debug_log,
11104                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11105                           status,
11106                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11107     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11108     return retval;
11109 }
11110
11111 static U8 *
11112 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11113 {
11114     SV *filter = filter_add(S_utf16_textfilter, NULL);
11115
11116     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11117
11118     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11119     sv_setpvs(filter, "");
11120     IoLINES(filter) = reversed;
11121     IoPAGE(filter) = 1; /* Not EOF */
11122
11123     /* Sadly, we have to return a valid pointer, come what may, so we have to
11124        ignore any error return from this.  */
11125     SvCUR_set(PL_linestr, 0);
11126     if (FILTER_READ(0, PL_linestr, 0)) {
11127         SvUTF8_on(PL_linestr);
11128     } else {
11129         SvUTF8_on(PL_linestr);
11130     }
11131     PL_bufend = SvEND(PL_linestr);
11132     return (U8*)SvPVX(PL_linestr);
11133 }
11134 #endif
11135
11136 /*
11137 Returns a pointer to the next character after the parsed
11138 vstring, as well as updating the passed in sv.
11139
11140 Function must be called like
11141
11142         sv = newSV(5);
11143         s = scan_vstring(s,e,sv);
11144
11145 where s and e are the start and end of the string.
11146 The sv should already be large enough to store the vstring
11147 passed in, for performance reasons.
11148
11149 */
11150
11151 char *
11152 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11153 {
11154     dVAR;
11155     const char *pos = s;
11156     const char *start = s;
11157
11158     PERL_ARGS_ASSERT_SCAN_VSTRING;
11159
11160     if (*pos == 'v') pos++;  /* get past 'v' */
11161     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11162         pos++;
11163     if ( *pos != '.') {
11164         /* this may not be a v-string if followed by => */
11165         const char *next = pos;
11166         while (next < e && isSPACE(*next))
11167             ++next;
11168         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11169             /* return string not v-string */
11170             sv_setpvn(sv,(char *)s,pos-s);
11171             return (char *)pos;
11172         }
11173     }
11174
11175     if (!isALPHA(*pos)) {
11176         U8 tmpbuf[UTF8_MAXBYTES+1];
11177
11178         if (*s == 'v')
11179             s++;  /* get past 'v' */
11180
11181         sv_setpvs(sv, "");
11182
11183         for (;;) {
11184             /* this is atoi() that tolerates underscores */
11185             U8 *tmpend;
11186             UV rev = 0;
11187             const char *end = pos;
11188             UV mult = 1;
11189             while (--end >= s) {
11190                 if (*end != '_') {
11191                     const UV orev = rev;
11192                     rev += (*end - '0') * mult;
11193                     mult *= 10;
11194                     if (orev > rev)
11195                         /* diag_listed_as: Integer overflow in %s number */
11196                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11197                                          "Integer overflow in decimal number");
11198                 }
11199             }
11200 #ifdef EBCDIC
11201             if (rev > 0x7FFFFFFF)
11202                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11203 #endif
11204             /* Append native character for the rev point */
11205             tmpend = uvchr_to_utf8(tmpbuf, rev);
11206             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11207             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11208                  SvUTF8_on(sv);
11209             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11210                  s = ++pos;
11211             else {
11212                  s = pos;
11213                  break;
11214             }
11215             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11216                  pos++;
11217         }
11218         SvPOK_on(sv);
11219         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11220         SvRMAGICAL_on(sv);
11221     }
11222     return (char *)s;
11223 }
11224
11225 int
11226 Perl_keyword_plugin_standard(pTHX_
11227         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11228 {
11229     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11230     PERL_UNUSED_CONTEXT;
11231     PERL_UNUSED_ARG(keyword_ptr);
11232     PERL_UNUSED_ARG(keyword_len);
11233     PERL_UNUSED_ARG(op_ptr);
11234     return KEYWORD_PLUGIN_DECLINE;
11235 }
11236
11237 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11238 static void
11239 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11240 {
11241     SAVEI32(PL_lex_brackets);
11242     if (PL_lex_brackets > 100)
11243         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11244     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11245     SAVEI32(PL_lex_allbrackets);
11246     PL_lex_allbrackets = 0;
11247     SAVEI8(PL_lex_fakeeof);
11248     PL_lex_fakeeof = (U8)fakeeof;
11249     if(yyparse(gramtype) && !PL_parser->error_count)
11250         qerror(Perl_mess(aTHX_ "Parse error"));
11251 }
11252
11253 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11254 static OP *
11255 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11256 {
11257     OP *o;
11258     ENTER;
11259     SAVEVPTR(PL_eval_root);
11260     PL_eval_root = NULL;
11261     parse_recdescent(gramtype, fakeeof);
11262     o = PL_eval_root;
11263     LEAVE;
11264     return o;
11265 }
11266
11267 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11268 static OP *
11269 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11270 {
11271     OP *exprop;
11272     if (flags & ~PARSE_OPTIONAL)
11273         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11274     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11275     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11276         if (!PL_parser->error_count)
11277             qerror(Perl_mess(aTHX_ "Parse error"));
11278         exprop = newOP(OP_NULL, 0);
11279     }
11280     return exprop;
11281 }
11282
11283 /*
11284 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11285
11286 Parse a Perl arithmetic expression.  This may contain operators of precedence
11287 down to the bit shift operators.  The expression must be followed (and thus
11288 terminated) either by a comparison or lower-precedence operator or by
11289 something that would normally terminate an expression such as semicolon.
11290 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11291 otherwise it is mandatory.  It is up to the caller to ensure that the
11292 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11293 the source of the code to be parsed and the lexical context for the
11294 expression.
11295
11296 The op tree representing the expression is returned.  If an optional
11297 expression is absent, a null pointer is returned, otherwise the pointer
11298 will be non-null.
11299
11300 If an error occurs in parsing or compilation, in most cases a valid op
11301 tree is returned anyway.  The error is reflected in the parser state,
11302 normally resulting in a single exception at the top level of parsing
11303 which covers all the compilation errors that occurred.  Some compilation
11304 errors, however, will throw an exception immediately.
11305
11306 =cut
11307 */
11308
11309 OP *
11310 Perl_parse_arithexpr(pTHX_ U32 flags)
11311 {
11312     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11313 }
11314
11315 /*
11316 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11317
11318 Parse a Perl term expression.  This may contain operators of precedence
11319 down to the assignment operators.  The expression must be followed (and thus
11320 terminated) either by a comma or lower-precedence operator or by
11321 something that would normally terminate an expression such as semicolon.
11322 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11323 otherwise it is mandatory.  It is up to the caller to ensure that the
11324 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11325 the source of the code to be parsed and the lexical context for the
11326 expression.
11327
11328 The op tree representing the expression is returned.  If an optional
11329 expression is absent, a null pointer is returned, otherwise the pointer
11330 will be non-null.
11331
11332 If an error occurs in parsing or compilation, in most cases a valid op
11333 tree is returned anyway.  The error is reflected in the parser state,
11334 normally resulting in a single exception at the top level of parsing
11335 which covers all the compilation errors that occurred.  Some compilation
11336 errors, however, will throw an exception immediately.
11337
11338 =cut
11339 */
11340
11341 OP *
11342 Perl_parse_termexpr(pTHX_ U32 flags)
11343 {
11344     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11345 }
11346
11347 /*
11348 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11349
11350 Parse a Perl list expression.  This may contain operators of precedence
11351 down to the comma operator.  The expression must be followed (and thus
11352 terminated) either by a low-precedence logic operator such as C<or> or by
11353 something that would normally terminate an expression such as semicolon.
11354 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11355 otherwise it is mandatory.  It is up to the caller to ensure that the
11356 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11357 the source of the code to be parsed and the lexical context for the
11358 expression.
11359
11360 The op tree representing the expression is returned.  If an optional
11361 expression is absent, a null pointer is returned, otherwise the pointer
11362 will be non-null.
11363
11364 If an error occurs in parsing or compilation, in most cases a valid op
11365 tree is returned anyway.  The error is reflected in the parser state,
11366 normally resulting in a single exception at the top level of parsing
11367 which covers all the compilation errors that occurred.  Some compilation
11368 errors, however, will throw an exception immediately.
11369
11370 =cut
11371 */
11372
11373 OP *
11374 Perl_parse_listexpr(pTHX_ U32 flags)
11375 {
11376     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11377 }
11378
11379 /*
11380 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11381
11382 Parse a single complete Perl expression.  This allows the full
11383 expression grammar, including the lowest-precedence operators such
11384 as C<or>.  The expression must be followed (and thus terminated) by a
11385 token that an expression would normally be terminated by: end-of-file,
11386 closing bracketing punctuation, semicolon, or one of the keywords that
11387 signals a postfix expression-statement modifier.  If I<flags> includes
11388 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11389 mandatory.  It is up to the caller to ensure that the dynamic parser
11390 state (L</PL_parser> et al) is correctly set to reflect the source of
11391 the code to be parsed and the lexical context for the expression.
11392
11393 The op tree representing the expression is returned.  If an optional
11394 expression is absent, a null pointer is returned, otherwise the pointer
11395 will be non-null.
11396
11397 If an error occurs in parsing or compilation, in most cases a valid op
11398 tree is returned anyway.  The error is reflected in the parser state,
11399 normally resulting in a single exception at the top level of parsing
11400 which covers all the compilation errors that occurred.  Some compilation
11401 errors, however, will throw an exception immediately.
11402
11403 =cut
11404 */
11405
11406 OP *
11407 Perl_parse_fullexpr(pTHX_ U32 flags)
11408 {
11409     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11410 }
11411
11412 /*
11413 =for apidoc Amx|OP *|parse_block|U32 flags
11414
11415 Parse a single complete Perl code block.  This consists of an opening
11416 brace, a sequence of statements, and a closing brace.  The block
11417 constitutes a lexical scope, so C<my> variables and various compile-time
11418 effects can be contained within it.  It is up to the caller to ensure
11419 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11420 reflect the source of the code to be parsed and the lexical context for
11421 the statement.
11422
11423 The op tree representing the code block is returned.  This is always a
11424 real op, never a null pointer.  It will normally be a C<lineseq> list,
11425 including C<nextstate> or equivalent ops.  No ops to construct any kind
11426 of runtime scope are included by virtue of it being a block.
11427
11428 If an error occurs in parsing or compilation, in most cases a valid op
11429 tree (most likely null) is returned anyway.  The error is reflected in
11430 the parser state, normally resulting in a single exception at the top
11431 level of parsing which covers all the compilation errors that occurred.
11432 Some compilation errors, however, will throw an exception immediately.
11433
11434 The I<flags> parameter is reserved for future use, and must always
11435 be zero.
11436
11437 =cut
11438 */
11439
11440 OP *
11441 Perl_parse_block(pTHX_ U32 flags)
11442 {
11443     if (flags)
11444         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11445     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11446 }
11447
11448 /*
11449 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11450
11451 Parse a single unadorned Perl statement.  This may be a normal imperative
11452 statement or a declaration that has compile-time effect.  It does not
11453 include any label or other affixture.  It is up to the caller to ensure
11454 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11455 reflect the source of the code to be parsed and the lexical context for
11456 the statement.
11457
11458 The op tree representing the statement is returned.  This may be a
11459 null pointer if the statement is null, for example if it was actually
11460 a subroutine definition (which has compile-time side effects).  If not
11461 null, it will be ops directly implementing the statement, suitable to
11462 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11463 equivalent op (except for those embedded in a scope contained entirely
11464 within the statement).
11465
11466 If an error occurs in parsing or compilation, in most cases a valid op
11467 tree (most likely null) is returned anyway.  The error is reflected in
11468 the parser state, normally resulting in a single exception at the top
11469 level of parsing which covers all the compilation errors that occurred.
11470 Some compilation errors, however, will throw an exception immediately.
11471
11472 The I<flags> parameter is reserved for future use, and must always
11473 be zero.
11474
11475 =cut
11476 */
11477
11478 OP *
11479 Perl_parse_barestmt(pTHX_ U32 flags)
11480 {
11481     if (flags)
11482         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11483     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11484 }
11485
11486 /*
11487 =for apidoc Amx|SV *|parse_label|U32 flags
11488
11489 Parse a single label, possibly optional, of the type that may prefix a
11490 Perl statement.  It is up to the caller to ensure that the dynamic parser
11491 state (L</PL_parser> et al) is correctly set to reflect the source of
11492 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11493 label is optional, otherwise it is mandatory.
11494
11495 The name of the label is returned in the form of a fresh scalar.  If an
11496 optional label is absent, a null pointer is returned.
11497
11498 If an error occurs in parsing, which can only occur if the label is
11499 mandatory, a valid label is returned anyway.  The error is reflected in
11500 the parser state, normally resulting in a single exception at the top
11501 level of parsing which covers all the compilation errors that occurred.
11502
11503 =cut
11504 */
11505
11506 SV *
11507 Perl_parse_label(pTHX_ U32 flags)
11508 {
11509     if (flags & ~PARSE_OPTIONAL)
11510         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11511     if (PL_lex_state == LEX_KNOWNEXT) {
11512         PL_parser->yychar = yylex();
11513         if (PL_parser->yychar == LABEL) {
11514             SV *lsv;
11515             PL_parser->yychar = YYEMPTY;
11516             lsv = newSV_type(SVt_PV);
11517             sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
11518             return lsv;
11519         } else {
11520             yyunlex();
11521             goto no_label;
11522         }
11523     } else {
11524         char *s, *t;
11525         STRLEN wlen, bufptr_pos;
11526         lex_read_space(0);
11527         t = s = PL_bufptr;
11528         if (!isIDFIRST_lazy_if(s, UTF))
11529             goto no_label;
11530         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11531         if (word_takes_any_delimeter(s, wlen))
11532             goto no_label;
11533         bufptr_pos = s - SvPVX(PL_linestr);
11534         PL_bufptr = t;
11535         lex_read_space(LEX_KEEP_PREVIOUS);
11536         t = PL_bufptr;
11537         s = SvPVX(PL_linestr) + bufptr_pos;
11538         if (t[0] == ':' && t[1] != ':') {
11539             PL_oldoldbufptr = PL_oldbufptr;
11540             PL_oldbufptr = s;
11541             PL_bufptr = t+1;
11542             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11543         } else {
11544             PL_bufptr = s;
11545             no_label:
11546             if (flags & PARSE_OPTIONAL) {
11547                 return NULL;
11548             } else {
11549                 qerror(Perl_mess(aTHX_ "Parse error"));
11550                 return newSVpvs("x");
11551             }
11552         }
11553     }
11554 }
11555
11556 /*
11557 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11558
11559 Parse a single complete Perl statement.  This may be a normal imperative
11560 statement or a declaration that has compile-time effect, and may include
11561 optional labels.  It is up to the caller to ensure that the dynamic
11562 parser state (L</PL_parser> et al) is correctly set to reflect the source
11563 of the code to be parsed and the lexical context for the statement.
11564
11565 The op tree representing the statement is returned.  This may be a
11566 null pointer if the statement is null, for example if it was actually
11567 a subroutine definition (which has compile-time side effects).  If not
11568 null, it will be the result of a L</newSTATEOP> call, normally including
11569 a C<nextstate> or equivalent op.
11570
11571 If an error occurs in parsing or compilation, in most cases a valid op
11572 tree (most likely null) is returned anyway.  The error is reflected in
11573 the parser state, normally resulting in a single exception at the top
11574 level of parsing which covers all the compilation errors that occurred.
11575 Some compilation errors, however, will throw an exception immediately.
11576
11577 The I<flags> parameter is reserved for future use, and must always
11578 be zero.
11579
11580 =cut
11581 */
11582
11583 OP *
11584 Perl_parse_fullstmt(pTHX_ U32 flags)
11585 {
11586     if (flags)
11587         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11588     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11589 }
11590
11591 /*
11592 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11593
11594 Parse a sequence of zero or more Perl statements.  These may be normal
11595 imperative statements, including optional labels, or declarations
11596 that have compile-time effect, or any mixture thereof.  The statement
11597 sequence ends when a closing brace or end-of-file is encountered in a
11598 place where a new statement could have validly started.  It is up to
11599 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11600 is correctly set to reflect the source of the code to be parsed and the
11601 lexical context for the statements.
11602
11603 The op tree representing the statement sequence is returned.  This may
11604 be a null pointer if the statements were all null, for example if there
11605 were no statements or if there were only subroutine definitions (which
11606 have compile-time side effects).  If not null, it will be a C<lineseq>
11607 list, normally including C<nextstate> or equivalent ops.
11608
11609 If an error occurs in parsing or compilation, in most cases a valid op
11610 tree is returned anyway.  The error is reflected in the parser state,
11611 normally resulting in a single exception at the top level of parsing
11612 which covers all the compilation errors that occurred.  Some compilation
11613 errors, however, will throw an exception immediately.
11614
11615 The I<flags> parameter is reserved for future use, and must always
11616 be zero.
11617
11618 =cut
11619 */
11620
11621 OP *
11622 Perl_parse_stmtseq(pTHX_ U32 flags)
11623 {
11624     OP *stmtseqop;
11625     I32 c;
11626     if (flags)
11627         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11628     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11629     c = lex_peek_unichar(0);
11630     if (c != -1 && c != /*{*/'}')
11631         qerror(Perl_mess(aTHX_ "Parse error"));
11632     return stmtseqop;
11633 }
11634
11635 /*
11636  * Local variables:
11637  * c-indentation-style: bsd
11638  * c-basic-offset: 4
11639  * indent-tabs-mode: nil
11640  * End:
11641  *
11642  * ex: set ts=8 sts=4 sw=4 et:
11643  */