toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XENUMMASK  0x3f
 130 #define XFAKEEOF   0x40
 131 #define XFAKEBRACK 0x80
 132
 133 #ifdef USE_UTF8_SCRIPTS
 134 #   define UTF (!IN_BYTES)
 135 #else
 136 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 137 #endif
 138
 139 /* The maximum number of characters preceding the unrecognized one to display */
 140 #define UNRECOGNIZED_PRECEDE_COUNT 10
 141
 142 /* In variables named $^X, these are the legal values for X.
 143  * 1999-02-27 mjd-perl-patch@plover.com */
 144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 145
 146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 147
 148 /* LEX_* are values for PL_lex_state, the state of the lexer.
 149  * They are arranged oddly so that the guard on the switch statement
 150  * can get by with a single comparison (if the compiler is smart enough).
 151  *
 152  * These values refer to the various states within a sublex parse,
 153  * i.e. within a double quotish string
 154  */
 155
 156 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 157
 158 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 159 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 160 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 161 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 162 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 163
 164                                    /* at end of code, eg "$x" followed by:  */
 165 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 166 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 167
 168 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 169                                         string or after \E, $foo, etc       */
 170 #define LEX_INTERPCONST          2 /* NOT USED */
 171 #define LEX_FORMLINE             1 /* expecting a format line               */
 172 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 173
 174
 175 #ifdef DEBUGGING
 176 static const char* const lex_state_names[] = {
 177     "KNOWNEXT",
 178     "FORMLINE",
 179     "INTERPCONST",
 180     "INTERPCONCAT",
 181     "INTERPENDMAYBE",
 182     "INTERPEND",
 183     "INTERPSTART",
 184     "INTERPPUSH",
 185     "INTERPCASEMOD",
 186     "INTERPNORMAL",
 187     "NORMAL"
 188 };
 189 #endif
 190
 191 #ifdef ff_next
 192 #undef ff_next
 193 #endif
 194
 195 #include "keywords.h"
 196
 197 /* CLINE is a macro that ensures PL_copline has a sane value */
 198
 199 #ifdef CLINE
 200 #undef CLINE
 201 #endif
 202 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 203
 204 #ifdef PERL_MAD
 205 #  define SKIPSPACE0(s) skipspace0(s)
 206 #  define SKIPSPACE1(s) skipspace1(s)
 207 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 208 #  define PEEKSPACE(s) skipspace2(s,0)
 209 #else
 210 #  define SKIPSPACE0(s) skipspace(s)
 211 #  define SKIPSPACE1(s) skipspace(s)
 212 #  define SKIPSPACE2(s,tsv) skipspace(s)
 213 #  define PEEKSPACE(s) skipspace(s)
 214 #endif
 215
 216 /*
 217  * Convenience functions to return different tokens and prime the
 218  * lexer for the next token.  They all take an argument.
 219  *
 220  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 221  * OPERATOR     : generic operator
 222  * AOPERATOR    : assignment operator
 223  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 224  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 225  * PREREF       : *EXPR where EXPR is not a simple identifier
 226  * TERM         : expression term
 227  * LOOPX        : loop exiting command (goto, last, dump, etc)
 228  * FTST         : file test operator
 229  * FUN0         : zero-argument function
 230  * FUN0OP       : zero-argument function, with its op created in this file
 231  * FUN1         : not used, except for not, which isn't a UNIOP
 232  * BOop         : bitwise or or xor
 233  * BAop         : bitwise and
 234  * SHop         : shift operator
 235  * PWop         : power operator
 236  * PMop         : pattern-matching operator
 237  * Aop          : addition-level operator
 238  * Mop          : multiplication-level operator
 239  * Eop          : equality-testing operator
 240  * Rop          : relational operator <= != gt
 241  *
 242  * Also see LOP and lop() below.
 243  */
 244
 245 #ifdef DEBUGGING /* Serve -DT. */
 246 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 247 #else
 248 #   define REPORT(retval) (retval)
 249 #endif
 250
 251 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 252 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 253 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 254 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 255 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 256 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 257 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 258 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 259 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 260 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 261 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 262 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 263 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 264 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 265 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 266 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 267 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 268 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 269 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 270 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 271 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 272
 273 /* This bit of chicanery makes a unary function followed by
 274  * a parenthesis into a function with one argument, highest precedence.
 275  * The UNIDOR macro is for unary functions that can be followed by the //
 276  * operator (such as C<shift // 0>).
 277  */
 278 #define UNI3(f,x,have_x) { \
 279         pl_yylval.ival = f; \
 280         if (have_x) PL_expect = x; \
 281         PL_bufptr = s; \
 282         PL_last_uni = PL_oldbufptr; \
 283         PL_last_lop_op = f; \
 284         if (*s == '(') \
 285             return REPORT( (int)FUNC1 ); \
 286         s = PEEKSPACE(s); \
 287         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 288         }
 289 #define UNI(f)    UNI3(f,XTERM,1)
 290 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 291 #define UNIPROTO(f,optional) { \
 292         if (optional) PL_last_uni = PL_oldbufptr; \
 293         OPERATOR(f); \
 294         }
 295
 296 #define UNIBRACK(f) UNI3(f,0,0)
 297
 298 /* grandfather return to old style */
 299 #define OLDLOP(f) \
 300         do { \
 301             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 302                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 303             pl_yylval.ival = (f); \
 304             PL_expect = XTERM; \
 305             PL_bufptr = s; \
 306             return (int)LSTOP; \
 307         } while(0)
 308
 309 #ifdef DEBUGGING
 310
 311 /* how to interpret the pl_yylval associated with the token */
 312 enum token_type {
 313     TOKENTYPE_NONE,
 314     TOKENTYPE_IVAL,
 315     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 316     TOKENTYPE_PVAL,
 317     TOKENTYPE_OPVAL
 318 };
 319
 320 static struct debug_tokens {
 321     const int token;
 322     enum token_type type;
 323     const char *name;
 324 } const debug_tokens[] =
 325 {
 326     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 327     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 328     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 329     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 330     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 331     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 332     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 333     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 334     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 335     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 336     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 337     { DO,               TOKENTYPE_NONE,         "DO" },
 338     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 339     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 340     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 341     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 342     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 343     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 344     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 345     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 346     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 347     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 348     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 349     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 350     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 351     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 352     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 353     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 354     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 355     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 356     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 357     { IF,               TOKENTYPE_IVAL,         "IF" },
 358     { LABEL,            TOKENTYPE_OPVAL,        "LABEL" },
 359     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 360     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 361     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 362     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 363     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 364     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 365     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 366     { MY,               TOKENTYPE_IVAL,         "MY" },
 367     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 368     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 369     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 370     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 371     { OROR,             TOKENTYPE_NONE,         "OROR" },
 372     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 373     { PEG,              TOKENTYPE_NONE,         "PEG" },
 374     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 375     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 376     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 377     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 378     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 379     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 380     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 381     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 382     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 383     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 384     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 385     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 386     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 387     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 388     { SUB,              TOKENTYPE_NONE,         "SUB" },
 389     { THING,            TOKENTYPE_OPVAL,        "THING" },
 390     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 391     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 392     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 393     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 394     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 395     { USE,              TOKENTYPE_IVAL,         "USE" },
 396     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 397     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 398     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 399     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 400     { 0,                TOKENTYPE_NONE,         NULL }
 401 };
 402
 403 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 404
 405 STATIC int
 406 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 407 {
 408     dVAR;
 409
 410     PERL_ARGS_ASSERT_TOKEREPORT;
 411
 412     if (DEBUG_T_TEST) {
 413         const char *name = NULL;
 414         enum token_type type = TOKENTYPE_NONE;
 415         const struct debug_tokens *p;
 416         SV* const report = newSVpvs("<== ");
 417
 418         for (p = debug_tokens; p->token; p++) {
 419             if (p->token == (int)rv) {
 420                 name = p->name;
 421                 type = p->type;
 422                 break;
 423             }
 424         }
 425         if (name)
 426             Perl_sv_catpv(aTHX_ report, name);
 427         else if ((char)rv > ' ' && (char)rv < '~')
 428             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 429         else if (!rv)
 430             sv_catpvs(report, "EOF");
 431         else
 432             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 433         switch (type) {
 434         case TOKENTYPE_NONE:
 435             break;
 436         case TOKENTYPE_IVAL:
 437             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 438             break;
 439         case TOKENTYPE_OPNUM:
 440             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 441                                     PL_op_name[lvalp->ival]);
 442             break;
 443         case TOKENTYPE_PVAL:
 444             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 445             break;
 446         case TOKENTYPE_OPVAL:
 447             if (lvalp->opval) {
 448                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 449                                     PL_op_name[lvalp->opval->op_type]);
 450                 if (lvalp->opval->op_type == OP_CONST) {
 451                     Perl_sv_catpvf(aTHX_ report, " %s",
 452                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 453                 }
 454
 455             }
 456             else
 457                 sv_catpvs(report, "(opval=null)");
 458             break;
 459         }
 460         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 461     };
 462     return (int)rv;
 463 }
 464
 465
 466 /* print the buffer with suitable escapes */
 467
 468 STATIC void
 469 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 470 {
 471     SV* const tmp = newSVpvs("");
 472
 473     PERL_ARGS_ASSERT_PRINTBUF;
 474
 475     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 476     SvREFCNT_dec(tmp);
 477 }
 478
 479 #endif
 480
 481 static int
 482 S_deprecate_commaless_var_list(pTHX) {
 483     PL_expect = XTERM;
 484     deprecate("comma-less variable list");
 485     return REPORT(','); /* grandfather non-comma-format format */
 486 }
 487
 488 /*
 489  * S_ao
 490  *
 491  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 492  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 493  */
 494
 495 STATIC int
 496 S_ao(pTHX_ int toketype)
 497 {
 498     dVAR;
 499     if (*PL_bufptr == '=') {
 500         PL_bufptr++;
 501         if (toketype == ANDAND)
 502             pl_yylval.ival = OP_ANDASSIGN;
 503         else if (toketype == OROR)
 504             pl_yylval.ival = OP_ORASSIGN;
 505         else if (toketype == DORDOR)
 506             pl_yylval.ival = OP_DORASSIGN;
 507         toketype = ASSIGNOP;
 508     }
 509     return toketype;
 510 }
 511
 512 /*
 513  * S_no_op
 514  * When Perl expects an operator and finds something else, no_op
 515  * prints the warning.  It always prints "<something> found where
 516  * operator expected.  It prints "Missing semicolon on previous line?"
 517  * if the surprise occurs at the start of the line.  "do you need to
 518  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 519  * where the compiler doesn't know if foo is a method call or a function.
 520  * It prints "Missing operator before end of line" if there's nothing
 521  * after the missing operator, or "... before <...>" if there is something
 522  * after the missing operator.
 523  */
 524
 525 STATIC void
 526 S_no_op(pTHX_ const char *const what, char *s)
 527 {
 528     dVAR;
 529     char * const oldbp = PL_bufptr;
 530     const bool is_first = (PL_oldbufptr == PL_linestart);
 531
 532     PERL_ARGS_ASSERT_NO_OP;
 533
 534     if (!s)
 535         s = oldbp;
 536     else
 537         PL_bufptr = s;
 538     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 539     if (ckWARN_d(WARN_SYNTAX)) {
 540         if (is_first)
 541             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 542                     "\t(Missing semicolon on previous line?)\n");
 543         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 544             const char *t;
 545             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
 546                                                             t += UTF ? UTF8SKIP(t) : 1)
 547                 NOOP;
 548             if (t < PL_bufptr && isSPACE(*t))
 549                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 550                         "\t(Do you need to predeclare %"SVf"?)\n",
 551                     SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
 552                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 553         }
 554         else {
 555             assert(s >= oldbp);
 556             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 557                     "\t(Missing operator before %"SVf"?)\n",
 558                     SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
 559                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
 560         }
 561     }
 562     PL_bufptr = oldbp;
 563 }
 564
 565 /*
 566  * S_missingterm
 567  * Complain about missing quote/regexp/heredoc terminator.
 568  * If it's called with NULL then it cauterizes the line buffer.
 569  * If we're in a delimited string and the delimiter is a control
 570  * character, it's reformatted into a two-char sequence like ^C.
 571  * This is fatal.
 572  */
 573
 574 STATIC void
 575 S_missingterm(pTHX_ char *s)
 576 {
 577     dVAR;
 578     char tmpbuf[3];
 579     char q;
 580     if (s) {
 581         char * const nl = strrchr(s,'\n');
 582         if (nl)
 583             *nl = '\0';
 584     }
 585     else if (isCNTRL(PL_multi_close)) {
 586         *tmpbuf = '^';
 587         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 588         tmpbuf[2] = '\0';
 589         s = tmpbuf;
 590     }
 591     else {
 592         *tmpbuf = (char)PL_multi_close;
 593         tmpbuf[1] = '\0';
 594         s = tmpbuf;
 595     }
 596     q = strchr(s,'"') ? '\'' : '"';
 597     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 598 }
 599
 600 #include "feature.h"
 601
 602 /*
 603  * Check whether the named feature is enabled.
 604  */
 605 bool
 606 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 607 {
 608     dVAR;
 609     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 610
 611     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 612
 613     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 614
 615     if (namelen > MAX_FEATURE_LEN)
 616         return FALSE;
 617     memcpy(&he_name[8], name, namelen);
 618
 619     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 620                                      REFCOUNTED_HE_EXISTS));
 621 }
 622
 623 /*
 624  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 625  * utf16-to-utf8-reversed.
 626  */
 627
 628 #ifdef PERL_CR_FILTER
 629 static void
 630 strip_return(SV *sv)
 631 {
 632     const char *s = SvPVX_const(sv);
 633     const char * const e = s + SvCUR(sv);
 634
 635     PERL_ARGS_ASSERT_STRIP_RETURN;
 636
 637     /* outer loop optimized to do nothing if there are no CR-LFs */
 638     while (s < e) {
 639         if (*s++ == '\r' && *s == '\n') {
 640             /* hit a CR-LF, need to copy the rest */
 641             char *d = s - 1;
 642             *d++ = *s++;
 643             while (s < e) {
 644                 if (*s == '\r' && s[1] == '\n')
 645                     s++;
 646                 *d++ = *s++;
 647             }
 648             SvCUR(sv) -= s - d;
 649             return;
 650         }
 651     }
 652 }
 653
 654 STATIC I32
 655 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 656 {
 657     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 658     if (count > 0 && !maxlen)
 659         strip_return(sv);
 660     return count;
 661 }
 662 #endif
 663
 664 /*
 665 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 666
 667 Creates and initialises a new lexer/parser state object, supplying
 668 a context in which to lex and parse from a new source of Perl code.
 669 A pointer to the new state object is placed in L</PL_parser>.  An entry
 670 is made on the save stack so that upon unwinding the new state object
 671 will be destroyed and the former value of L</PL_parser> will be restored.
 672 Nothing else need be done to clean up the parsing context.
 673
 674 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 675 non-null, provides a string (in SV form) containing code to be parsed.
 676 A copy of the string is made, so subsequent modification of I<line>
 677 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 678 from which code will be read to be parsed.  If both are non-null, the
 679 code in I<line> comes first and must consist of complete lines of input,
 680 and I<rsfp> supplies the remainder of the source.
 681
 682 The I<flags> parameter is reserved for future use.  Currently it is only
 683 used by perl internally, so extensions should always pass zero.
 684
 685 =cut
 686 */
 687
 688 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 689    can share filters with the current parser.
 690    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 691    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 692    destruction. This is used to handle the case of defaulting to reading the
 693    script from the standard input because no filename was given on the command
 694    line (without getting confused by situation where STDIN has been closed, so
 695    the script handle is opened on fd 0)  */
 696
 697 void
 698 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 699 {
 700     dVAR;
 701     const char *s = NULL;
 702     yy_parser *parser, *oparser;
 703     if (flags && flags & ~LEX_START_FLAGS)
 704         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 705
 706     /* create and initialise a parser */
 707
 708     Newxz(parser, 1, yy_parser);
 709     parser->old_parser = oparser = PL_parser;
 710     PL_parser = parser;
 711
 712     parser->stack = NULL;
 713     parser->ps = NULL;
 714     parser->stack_size = 0;
 715
 716     /* on scope exit, free this parser and restore any outer one */
 717     SAVEPARSER(parser);
 718     parser->saved_curcop = PL_curcop;
 719
 720     /* initialise lexer state */
 721
 722 #ifdef PERL_MAD
 723     parser->curforce = -1;
 724 #else
 725     parser->nexttoke = 0;
 726 #endif
 727     parser->error_count = oparser ? oparser->error_count : 0;
 728     parser->copline = NOLINE;
 729     parser->lex_state = LEX_NORMAL;
 730     parser->expect = XSTATE;
 731     parser->rsfp = rsfp;
 732     parser->rsfp_filters =
 733       !(flags & LEX_START_SAME_FILTER) || !oparser
 734         ? NULL
 735         : MUTABLE_AV(SvREFCNT_inc(
 736             oparser->rsfp_filters
 737              ? oparser->rsfp_filters
 738              : (oparser->rsfp_filters = newAV())
 739           ));
 740
 741     Newx(parser->lex_brackstack, 120, char);
 742     Newx(parser->lex_casestack, 12, char);
 743     *parser->lex_casestack = '\0';
 744
 745     if (line) {
 746         STRLEN len;
 747         s = SvPV_const(line, len);
 748         parser->linestr = flags & LEX_START_COPIED
 749                             ? SvREFCNT_inc_simple_NN(line)
 750                             : newSVpvn_flags(s, len, SvUTF8(line));
 751         if (!len || s[len-1] != ';')
 752             sv_catpvs(parser->linestr, "\n;");
 753     } else {
 754         parser->linestr = newSVpvs("\n;");
 755     }
 756     parser->oldoldbufptr =
 757         parser->oldbufptr =
 758         parser->bufptr =
 759         parser->linestart = SvPVX(parser->linestr);
 760     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 761     parser->last_lop = parser->last_uni = NULL;
 762     parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 763                                  |LEX_DONT_CLOSE_RSFP);
 764
 765     parser->in_pod = parser->filtered = 0;
 766 }
 767
 768
 769 /* delete a parser object */
 770
 771 void
 772 Perl_parser_free(pTHX_  const yy_parser *parser)
 773 {
 774     PERL_ARGS_ASSERT_PARSER_FREE;
 775
 776     PL_curcop = parser->saved_curcop;
 777     SvREFCNT_dec(parser->linestr);
 778
 779     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 780         PerlIO_clearerr(parser->rsfp);
 781     else if (parser->rsfp && (!parser->old_parser ||
 782                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 783         PerlIO_close(parser->rsfp);
 784     SvREFCNT_dec(parser->rsfp_filters);
 785
 786     Safefree(parser->lex_brackstack);
 787     Safefree(parser->lex_casestack);
 788     PL_parser = parser->old_parser;
 789     Safefree(parser);
 790 }
 791
 792
 793 /*
 794 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 795
 796 Buffer scalar containing the chunk currently under consideration of the
 797 text currently being lexed.  This is always a plain string scalar (for
 798 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 799 normal scalar means; instead refer to the buffer directly by the pointer
 800 variables described below.
 801
 802 The lexer maintains various C<char*> pointers to things in the
 803 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 804 reallocated, all of these pointers must be updated.  Don't attempt to
 805 do this manually, but rather use L</lex_grow_linestr> if you need to
 806 reallocate the buffer.
 807
 808 The content of the text chunk in the buffer is commonly exactly one
 809 complete line of input, up to and including a newline terminator,
 810 but there are situations where it is otherwise.  The octets of the
 811 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 812 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 813 flag on this scalar, which may disagree with it.
 814
 815 For direct examination of the buffer, the variable
 816 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 817 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 818 of these pointers is usually preferable to examination of the scalar
 819 through normal scalar means.
 820
 821 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 822
 823 Direct pointer to the end of the chunk of text currently being lexed, the
 824 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 825 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 826 always located at the end of the buffer, and does not count as part of
 827 the buffer's contents.
 828
 829 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 830
 831 Points to the current position of lexing inside the lexer buffer.
 832 Characters around this point may be freely examined, within
 833 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 834 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 835 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 836
 837 Lexing code (whether in the Perl core or not) moves this pointer past
 838 the characters that it consumes.  It is also expected to perform some
 839 bookkeeping whenever a newline character is consumed.  This movement
 840 can be more conveniently performed by the function L</lex_read_to>,
 841 which handles newlines appropriately.
 842
 843 Interpretation of the buffer's octets can be abstracted out by
 844 using the slightly higher-level functions L</lex_peek_unichar> and
 845 L</lex_read_unichar>.
 846
 847 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 848
 849 Points to the start of the current line inside the lexer buffer.
 850 This is useful for indicating at which column an error occurred, and
 851 not much else.  This must be updated by any lexing code that consumes
 852 a newline; the function L</lex_read_to> handles this detail.
 853
 854 =cut
 855 */
 856
 857 /*
 858 =for apidoc Amx|bool|lex_bufutf8
 859
 860 Indicates whether the octets in the lexer buffer
 861 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 862 of Unicode characters.  If not, they should be interpreted as Latin-1
 863 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 864
 865 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 866 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 867 encoding.
 868
 869 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 870 is significant, but not the whole story regarding the input character
 871 encoding.  Normally, when a file is being read, the scalar contains octets
 872 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 873 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 874 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 875 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 876 is in effect.  This logic may change in the future; use this function
 877 instead of implementing the logic yourself.
 878
 879 =cut
 880 */
 881
 882 bool
 883 Perl_lex_bufutf8(pTHX)
 884 {
 885     return UTF;
 886 }
 887
 888 /*
 889 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 890
 891 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 892 at least I<len> octets (including terminating NUL).  Returns a
 893 pointer to the reallocated buffer.  This is necessary before making
 894 any direct modification of the buffer that would increase its length.
 895 L</lex_stuff_pvn> provides a more convenient way to insert text into
 896 the buffer.
 897
 898 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 899 this function updates all of the lexer's variables that point directly
 900 into the buffer.
 901
 902 =cut
 903 */
 904
 905 char *
 906 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 907 {
 908     SV *linestr;
 909     char *buf;
 910     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 911     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 912     linestr = PL_parser->linestr;
 913     buf = SvPVX(linestr);
 914     if (len <= SvLEN(linestr))
 915         return buf;
 916     bufend_pos = PL_parser->bufend - buf;
 917     bufptr_pos = PL_parser->bufptr - buf;
 918     oldbufptr_pos = PL_parser->oldbufptr - buf;
 919     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 920     linestart_pos = PL_parser->linestart - buf;
 921     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 922     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 923     re_eval_start_pos = PL_sublex_info.re_eval_start ?
 924                             PL_sublex_info.re_eval_start - buf : 0;
 925
 926     buf = sv_grow(linestr, len);
 927
 928     PL_parser->bufend = buf + bufend_pos;
 929     PL_parser->bufptr = buf + bufptr_pos;
 930     PL_parser->oldbufptr = buf + oldbufptr_pos;
 931     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 932     PL_parser->linestart = buf + linestart_pos;
 933     if (PL_parser->last_uni)
 934         PL_parser->last_uni = buf + last_uni_pos;
 935     if (PL_parser->last_lop)
 936         PL_parser->last_lop = buf + last_lop_pos;
 937     if (PL_sublex_info.re_eval_start)
 938         PL_sublex_info.re_eval_start  = buf + re_eval_start_pos;
 939     return buf;
 940 }
 941
 942 /*
 943 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 944
 945 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 946 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 947 reallocating the buffer if necessary.  This means that lexing code that
 948 runs later will see the characters as if they had appeared in the input.
 949 It is not recommended to do this as part of normal parsing, and most
 950 uses of this facility run the risk of the inserted characters being
 951 interpreted in an unintended manner.
 952
 953 The string to be inserted is represented by I<len> octets starting
 954 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 955 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 956 The characters are recoded for the lexer buffer, according to how the
 957 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 958 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 959 function is more convenient.
 960
 961 =cut
 962 */
 963
 964 void
 965 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 966 {
 967     dVAR;
 968     char *bufptr;
 969     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 970     if (flags & ~(LEX_STUFF_UTF8))
 971         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 972     if (UTF) {
 973         if (flags & LEX_STUFF_UTF8) {
 974             goto plain_copy;
 975         } else {
 976             STRLEN highhalf = 0;
 977             const char *p, *e = pv+len;
 978             for (p = pv; p != e; p++)
 979                 highhalf += !!(((U8)*p) & 0x80);
 980             if (!highhalf)
 981                 goto plain_copy;
 982             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 983             bufptr = PL_parser->bufptr;
 984             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 985             SvCUR_set(PL_parser->linestr,
 986                 SvCUR(PL_parser->linestr) + len+highhalf);
 987             PL_parser->bufend += len+highhalf;
 988             for (p = pv; p != e; p++) {
 989                 U8 c = (U8)*p;
 990                 if (c & 0x80) {
 991                     *bufptr++ = (char)(0xc0 | (c >> 6));
 992                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 993                 } else {
 994                     *bufptr++ = (char)c;
 995                 }
 996             }
 997         }
 998     } else {
 999         if (flags & LEX_STUFF_UTF8) {
1000             STRLEN highhalf = 0;
1001             const char *p, *e = pv+len;
1002             for (p = pv; p != e; p++) {
1003                 U8 c = (U8)*p;
1004                 if (c >= 0xc4) {
1005                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1006                                 "non-Latin-1 character into Latin-1 input");
1007                 } else if (c >= 0xc2 && p+1 != e &&
1008                             (((U8)p[1]) & 0xc0) == 0x80) {
1009                     p++;
1010                     highhalf++;
1011                 } else if (c >= 0x80) {
1012                     /* malformed UTF-8 */
1013                     ENTER;
1014                     SAVESPTR(PL_warnhook);
1015                     PL_warnhook = PERL_WARNHOOK_FATAL;
1016                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1017                     LEAVE;
1018                 }
1019             }
1020             if (!highhalf)
1021                 goto plain_copy;
1022             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1023             bufptr = PL_parser->bufptr;
1024             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1025             SvCUR_set(PL_parser->linestr,
1026                 SvCUR(PL_parser->linestr) + len-highhalf);
1027             PL_parser->bufend += len-highhalf;
1028             for (p = pv; p != e; p++) {
1029                 U8 c = (U8)*p;
1030                 if (c & 0x80) {
1031                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1032                     p++;
1033                 } else {
1034                     *bufptr++ = (char)c;
1035                 }
1036             }
1037         } else {
1038             plain_copy:
1039             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1040             bufptr = PL_parser->bufptr;
1041             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1042             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1043             PL_parser->bufend += len;
1044             Copy(pv, bufptr, len, char);
1045         }
1046     }
1047 }
1048
1049 /*
1050 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1051
1052 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1053 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1054 reallocating the buffer if necessary.  This means that lexing code that
1055 runs later will see the characters as if they had appeared in the input.
1056 It is not recommended to do this as part of normal parsing, and most
1057 uses of this facility run the risk of the inserted characters being
1058 interpreted in an unintended manner.
1059
1060 The string to be inserted is represented by octets starting at I<pv>
1061 and continuing to the first nul.  These octets are interpreted as either
1062 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1063 in I<flags>.  The characters are recoded for the lexer buffer, according
1064 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1065 If it is not convenient to nul-terminate a string to be inserted, the
1066 L</lex_stuff_pvn> function is more appropriate.
1067
1068 =cut
1069 */
1070
1071 void
1072 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1073 {
1074     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1075     lex_stuff_pvn(pv, strlen(pv), flags);
1076 }
1077
1078 /*
1079 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1080
1081 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1082 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1083 reallocating the buffer if necessary.  This means that lexing code that
1084 runs later will see the characters as if they had appeared in the input.
1085 It is not recommended to do this as part of normal parsing, and most
1086 uses of this facility run the risk of the inserted characters being
1087 interpreted in an unintended manner.
1088
1089 The string to be inserted is the string value of I<sv>.  The characters
1090 are recoded for the lexer buffer, according to how the buffer is currently
1091 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1092 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1093 need to construct a scalar.
1094
1095 =cut
1096 */
1097
1098 void
1099 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1100 {
1101     char *pv;
1102     STRLEN len;
1103     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1104     if (flags)
1105         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1106     pv = SvPV(sv, len);
1107     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1108 }
1109
1110 /*
1111 =for apidoc Amx|void|lex_unstuff|char *ptr
1112
1113 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1114 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1115 This hides the discarded text from any lexing code that runs later,
1116 as if the text had never appeared.
1117
1118 This is not the normal way to consume lexed text.  For that, use
1119 L</lex_read_to>.
1120
1121 =cut
1122 */
1123
1124 void
1125 Perl_lex_unstuff(pTHX_ char *ptr)
1126 {
1127     char *buf, *bufend;
1128     STRLEN unstuff_len;
1129     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1130     buf = PL_parser->bufptr;
1131     if (ptr < buf)
1132         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1133     if (ptr == buf)
1134         return;
1135     bufend = PL_parser->bufend;
1136     if (ptr > bufend)
1137         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1138     unstuff_len = ptr - buf;
1139     Move(ptr, buf, bufend+1-ptr, char);
1140     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1141     PL_parser->bufend = bufend - unstuff_len;
1142 }
1143
1144 /*
1145 =for apidoc Amx|void|lex_read_to|char *ptr
1146
1147 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1148 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1149 performing the correct bookkeeping whenever a newline character is passed.
1150 This is the normal way to consume lexed text.
1151
1152 Interpretation of the buffer's octets can be abstracted out by
1153 using the slightly higher-level functions L</lex_peek_unichar> and
1154 L</lex_read_unichar>.
1155
1156 =cut
1157 */
1158
1159 void
1160 Perl_lex_read_to(pTHX_ char *ptr)
1161 {
1162     char *s;
1163     PERL_ARGS_ASSERT_LEX_READ_TO;
1164     s = PL_parser->bufptr;
1165     if (ptr < s || ptr > PL_parser->bufend)
1166         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1167     for (; s != ptr; s++)
1168         if (*s == '\n') {
1169             CopLINE_inc(PL_curcop);
1170             PL_parser->linestart = s+1;
1171         }
1172     PL_parser->bufptr = ptr;
1173 }
1174
1175 /*
1176 =for apidoc Amx|void|lex_discard_to|char *ptr
1177
1178 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1179 up to I<ptr>.  The remaining content of the buffer will be moved, and
1180 all pointers into the buffer updated appropriately.  I<ptr> must not
1181 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1182 it is not permitted to discard text that has yet to be lexed.
1183
1184 Normally it is not necessarily to do this directly, because it suffices to
1185 use the implicit discarding behaviour of L</lex_next_chunk> and things
1186 based on it.  However, if a token stretches across multiple lines,
1187 and the lexing code has kept multiple lines of text in the buffer for
1188 that purpose, then after completion of the token it would be wise to
1189 explicitly discard the now-unneeded earlier lines, to avoid future
1190 multi-line tokens growing the buffer without bound.
1191
1192 =cut
1193 */
1194
1195 void
1196 Perl_lex_discard_to(pTHX_ char *ptr)
1197 {
1198     char *buf;
1199     STRLEN discard_len;
1200     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1201     buf = SvPVX(PL_parser->linestr);
1202     if (ptr < buf)
1203         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1204     if (ptr == buf)
1205         return;
1206     if (ptr > PL_parser->bufptr)
1207         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1208     discard_len = ptr - buf;
1209     if (PL_parser->oldbufptr < ptr)
1210         PL_parser->oldbufptr = ptr;
1211     if (PL_parser->oldoldbufptr < ptr)
1212         PL_parser->oldoldbufptr = ptr;
1213     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1214         PL_parser->last_uni = NULL;
1215     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1216         PL_parser->last_lop = NULL;
1217     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1218     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1219     PL_parser->bufend -= discard_len;
1220     PL_parser->bufptr -= discard_len;
1221     PL_parser->oldbufptr -= discard_len;
1222     PL_parser->oldoldbufptr -= discard_len;
1223     if (PL_parser->last_uni)
1224         PL_parser->last_uni -= discard_len;
1225     if (PL_parser->last_lop)
1226         PL_parser->last_lop -= discard_len;
1227 }
1228
1229 /*
1230 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1231
1232 Reads in the next chunk of text to be lexed, appending it to
1233 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1234 looked to the end of the current chunk and wants to know more.  It is
1235 usual, but not necessary, for lexing to have consumed the entirety of
1236 the current chunk at this time.
1237
1238 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1239 chunk (i.e., the current chunk has been entirely consumed), normally the
1240 current chunk will be discarded at the same time that the new chunk is
1241 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1242 will not be discarded.  If the current chunk has not been entirely
1243 consumed, then it will not be discarded regardless of the flag.
1244
1245 Returns true if some new text was added to the buffer, or false if the
1246 buffer has reached the end of the input text.
1247
1248 =cut
1249 */
1250
1251 #define LEX_FAKE_EOF 0x80000000
1252 #define LEX_NO_TERM  0x40000000
1253
1254 bool
1255 Perl_lex_next_chunk(pTHX_ U32 flags)
1256 {
1257     SV *linestr;
1258     char *buf;
1259     STRLEN old_bufend_pos, new_bufend_pos;
1260     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1261     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1262     bool got_some_for_debugger = 0;
1263     bool got_some;
1264     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1265         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1266     linestr = PL_parser->linestr;
1267     buf = SvPVX(linestr);
1268     if (!(flags & LEX_KEEP_PREVIOUS) &&
1269             PL_parser->bufptr == PL_parser->bufend) {
1270         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1271         linestart_pos = 0;
1272         if (PL_parser->last_uni != PL_parser->bufend)
1273             PL_parser->last_uni = NULL;
1274         if (PL_parser->last_lop != PL_parser->bufend)
1275             PL_parser->last_lop = NULL;
1276         last_uni_pos = last_lop_pos = 0;
1277         *buf = 0;
1278         SvCUR(linestr) = 0;
1279     } else {
1280         old_bufend_pos = PL_parser->bufend - buf;
1281         bufptr_pos = PL_parser->bufptr - buf;
1282         oldbufptr_pos = PL_parser->oldbufptr - buf;
1283         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1284         linestart_pos = PL_parser->linestart - buf;
1285         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1286         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1287     }
1288     if (flags & LEX_FAKE_EOF) {
1289         goto eof;
1290     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1291         got_some = 0;
1292     } else if (filter_gets(linestr, old_bufend_pos)) {
1293         got_some = 1;
1294         got_some_for_debugger = 1;
1295     } else if (flags & LEX_NO_TERM) {
1296         got_some = 0;
1297     } else {
1298         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1299             sv_setpvs(linestr, "");
1300         eof:
1301         /* End of real input.  Close filehandle (unless it was STDIN),
1302          * then add implicit termination.
1303          */
1304         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1305             PerlIO_clearerr(PL_parser->rsfp);
1306         else if (PL_parser->rsfp)
1307             (void)PerlIO_close(PL_parser->rsfp);
1308         PL_parser->rsfp = NULL;
1309         PL_parser->in_pod = PL_parser->filtered = 0;
1310 #ifdef PERL_MAD
1311         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1312             PL_faketokens = 1;
1313 #endif
1314         if (!PL_in_eval && PL_minus_p) {
1315             sv_catpvs(linestr,
1316                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1317             PL_minus_n = PL_minus_p = 0;
1318         } else if (!PL_in_eval && PL_minus_n) {
1319             sv_catpvs(linestr, /*{*/";}");
1320             PL_minus_n = 0;
1321         } else
1322             sv_catpvs(linestr, ";");
1323         got_some = 1;
1324     }
1325     buf = SvPVX(linestr);
1326     new_bufend_pos = SvCUR(linestr);
1327     PL_parser->bufend = buf + new_bufend_pos;
1328     PL_parser->bufptr = buf + bufptr_pos;
1329     PL_parser->oldbufptr = buf + oldbufptr_pos;
1330     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1331     PL_parser->linestart = buf + linestart_pos;
1332     if (PL_parser->last_uni)
1333         PL_parser->last_uni = buf + last_uni_pos;
1334     if (PL_parser->last_lop)
1335         PL_parser->last_lop = buf + last_lop_pos;
1336     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1337             PL_curstash != PL_debstash) {
1338         /* debugger active and we're not compiling the debugger code,
1339          * so store the line into the debugger's array of lines
1340          */
1341         update_debugger_info(NULL, buf+old_bufend_pos,
1342             new_bufend_pos-old_bufend_pos);
1343     }
1344     return got_some;
1345 }
1346
1347 /*
1348 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1349
1350 Looks ahead one (Unicode) character in the text currently being lexed.
1351 Returns the codepoint (unsigned integer value) of the next character,
1352 or -1 if lexing has reached the end of the input text.  To consume the
1353 peeked character, use L</lex_read_unichar>.
1354
1355 If the next character is in (or extends into) the next chunk of input
1356 text, the next chunk will be read in.  Normally the current chunk will be
1357 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1358 then the current chunk will not be discarded.
1359
1360 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1361 is encountered, an exception is generated.
1362
1363 =cut
1364 */
1365
1366 I32
1367 Perl_lex_peek_unichar(pTHX_ U32 flags)
1368 {
1369     dVAR;
1370     char *s, *bufend;
1371     if (flags & ~(LEX_KEEP_PREVIOUS))
1372         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1373     s = PL_parser->bufptr;
1374     bufend = PL_parser->bufend;
1375     if (UTF) {
1376         U8 head;
1377         I32 unichar;
1378         STRLEN len, retlen;
1379         if (s == bufend) {
1380             if (!lex_next_chunk(flags))
1381                 return -1;
1382             s = PL_parser->bufptr;
1383             bufend = PL_parser->bufend;
1384         }
1385         head = (U8)*s;
1386         if (!(head & 0x80))
1387             return head;
1388         if (head & 0x40) {
1389             len = PL_utf8skip[head];
1390             while ((STRLEN)(bufend-s) < len) {
1391                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1392                     break;
1393                 s = PL_parser->bufptr;
1394                 bufend = PL_parser->bufend;
1395             }
1396         }
1397         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1398         if (retlen == (STRLEN)-1) {
1399             /* malformed UTF-8 */
1400             ENTER;
1401             SAVESPTR(PL_warnhook);
1402             PL_warnhook = PERL_WARNHOOK_FATAL;
1403             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1404             LEAVE;
1405         }
1406         return unichar;
1407     } else {
1408         if (s == bufend) {
1409             if (!lex_next_chunk(flags))
1410                 return -1;
1411             s = PL_parser->bufptr;
1412         }
1413         return (U8)*s;
1414     }
1415 }
1416
1417 /*
1418 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1419
1420 Reads the next (Unicode) character in the text currently being lexed.
1421 Returns the codepoint (unsigned integer value) of the character read,
1422 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1423 if lexing has reached the end of the input text.  To non-destructively
1424 examine the next character, use L</lex_peek_unichar> instead.
1425
1426 If the next character is in (or extends into) the next chunk of input
1427 text, the next chunk will be read in.  Normally the current chunk will be
1428 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1429 then the current chunk will not be discarded.
1430
1431 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1432 is encountered, an exception is generated.
1433
1434 =cut
1435 */
1436
1437 I32
1438 Perl_lex_read_unichar(pTHX_ U32 flags)
1439 {
1440     I32 c;
1441     if (flags & ~(LEX_KEEP_PREVIOUS))
1442         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1443     c = lex_peek_unichar(flags);
1444     if (c != -1) {
1445         if (c == '\n')
1446             CopLINE_inc(PL_curcop);
1447         if (UTF)
1448             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1449         else
1450             ++(PL_parser->bufptr);
1451     }
1452     return c;
1453 }
1454
1455 /*
1456 =for apidoc Amx|void|lex_read_space|U32 flags
1457
1458 Reads optional spaces, in Perl style, in the text currently being
1459 lexed.  The spaces may include ordinary whitespace characters and
1460 Perl-style comments.  C<#line> directives are processed if encountered.
1461 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1462 at a non-space character (or the end of the input text).
1463
1464 If spaces extend into the next chunk of input text, the next chunk will
1465 be read in.  Normally the current chunk will be discarded at the same
1466 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1467 chunk will not be discarded.
1468
1469 =cut
1470 */
1471
1472 #define LEX_NO_NEXT_CHUNK 0x80000000
1473
1474 void
1475 Perl_lex_read_space(pTHX_ U32 flags)
1476 {
1477     char *s, *bufend;
1478     bool need_incline = 0;
1479     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1480         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1481 #ifdef PERL_MAD
1482     if (PL_skipwhite) {
1483         sv_free(PL_skipwhite);
1484         PL_skipwhite = NULL;
1485     }
1486     if (PL_madskills)
1487         PL_skipwhite = newSVpvs("");
1488 #endif /* PERL_MAD */
1489     s = PL_parser->bufptr;
1490     bufend = PL_parser->bufend;
1491     while (1) {
1492         char c = *s;
1493         if (c == '#') {
1494             do {
1495                 c = *++s;
1496             } while (!(c == '\n' || (c == 0 && s == bufend)));
1497         } else if (c == '\n') {
1498             s++;
1499             PL_parser->linestart = s;
1500             if (s == bufend)
1501                 need_incline = 1;
1502             else
1503                 incline(s);
1504         } else if (isSPACE(c)) {
1505             s++;
1506         } else if (c == 0 && s == bufend) {
1507             bool got_more;
1508 #ifdef PERL_MAD
1509             if (PL_madskills)
1510                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1511 #endif /* PERL_MAD */
1512             if (flags & LEX_NO_NEXT_CHUNK)
1513                 break;
1514             PL_parser->bufptr = s;
1515             CopLINE_inc(PL_curcop);
1516             got_more = lex_next_chunk(flags);
1517             CopLINE_dec(PL_curcop);
1518             s = PL_parser->bufptr;
1519             bufend = PL_parser->bufend;
1520             if (!got_more)
1521                 break;
1522             if (need_incline && PL_parser->rsfp) {
1523                 incline(s);
1524                 need_incline = 0;
1525             }
1526         } else {
1527             break;
1528         }
1529     }
1530 #ifdef PERL_MAD
1531     if (PL_madskills)
1532         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1533 #endif /* PERL_MAD */
1534     PL_parser->bufptr = s;
1535 }
1536
1537 /*
1538  * S_incline
1539  * This subroutine has nothing to do with tilting, whether at windmills
1540  * or pinball tables.  Its name is short for "increment line".  It
1541  * increments the current line number in CopLINE(PL_curcop) and checks
1542  * to see whether the line starts with a comment of the form
1543  *    # line 500 "foo.pm"
1544  * If so, it sets the current line number and file to the values in the comment.
1545  */
1546
1547 STATIC void
1548 S_incline(pTHX_ const char *s)
1549 {
1550     dVAR;
1551     const char *t;
1552     const char *n;
1553     const char *e;
1554     line_t line_num;
1555
1556     PERL_ARGS_ASSERT_INCLINE;
1557
1558     CopLINE_inc(PL_curcop);
1559     if (*s++ != '#')
1560         return;
1561     while (SPACE_OR_TAB(*s))
1562         s++;
1563     if (strnEQ(s, "line", 4))
1564         s += 4;
1565     else
1566         return;
1567     if (SPACE_OR_TAB(*s))
1568         s++;
1569     else
1570         return;
1571     while (SPACE_OR_TAB(*s))
1572         s++;
1573     if (!isDIGIT(*s))
1574         return;
1575
1576     n = s;
1577     while (isDIGIT(*s))
1578         s++;
1579     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1580         return;
1581     while (SPACE_OR_TAB(*s))
1582         s++;
1583     if (*s == '"' && (t = strchr(s+1, '"'))) {
1584         s++;
1585         e = t + 1;
1586     }
1587     else {
1588         t = s;
1589         while (!isSPACE(*t))
1590             t++;
1591         e = t;
1592     }
1593     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1594         e++;
1595     if (*e != '\n' && *e != '\0')
1596         return;         /* false alarm */
1597
1598     line_num = atoi(n)-1;
1599
1600     if (t - s > 0) {
1601         const STRLEN len = t - s;
1602         SV *const temp_sv = CopFILESV(PL_curcop);
1603         const char *cf;
1604         STRLEN tmplen;
1605
1606         if (temp_sv) {
1607             cf = SvPVX(temp_sv);
1608             tmplen = SvCUR(temp_sv);
1609         } else {
1610             cf = NULL;
1611             tmplen = 0;
1612         }
1613
1614         if (!PL_rsfp && !PL_parser->filtered) {
1615             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1616              * to *{"::_<newfilename"} */
1617             /* However, the long form of evals is only turned on by the
1618                debugger - usually they're "(eval %lu)" */
1619             char smallbuf[128];
1620             char *tmpbuf;
1621             GV **gvp;
1622             STRLEN tmplen2 = len;
1623             if (tmplen + 2 <= sizeof smallbuf)
1624                 tmpbuf = smallbuf;
1625             else
1626                 Newx(tmpbuf, tmplen + 2, char);
1627             tmpbuf[0] = '_';
1628             tmpbuf[1] = '<';
1629             memcpy(tmpbuf + 2, cf, tmplen);
1630             tmplen += 2;
1631             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1632             if (gvp) {
1633                 char *tmpbuf2;
1634                 GV *gv2;
1635
1636                 if (tmplen2 + 2 <= sizeof smallbuf)
1637                     tmpbuf2 = smallbuf;
1638                 else
1639                     Newx(tmpbuf2, tmplen2 + 2, char);
1640
1641                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1642                     /* Either they malloc'd it, or we malloc'd it,
1643                        so no prefix is present in ours.  */
1644                     tmpbuf2[0] = '_';
1645                     tmpbuf2[1] = '<';
1646                 }
1647
1648                 memcpy(tmpbuf2 + 2, s, tmplen2);
1649                 tmplen2 += 2;
1650
1651                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1652                 if (!isGV(gv2)) {
1653                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1654                     /* adjust ${"::_<newfilename"} to store the new file name */
1655                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1656                     /* The line number may differ. If that is the case,
1657                        alias the saved lines that are in the array.
1658                        Otherwise alias the whole array. */
1659                     if (CopLINE(PL_curcop) == line_num) {
1660                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1661                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1662                     }
1663                     else if (GvAV(*gvp)) {
1664                         AV * const av = GvAV(*gvp);
1665                         const I32 start = CopLINE(PL_curcop)+1;
1666                         I32 items = AvFILLp(av) - start;
1667                         if (items > 0) {
1668                             AV * const av2 = GvAVn(gv2);
1669                             SV **svp = AvARRAY(av) + start;
1670                             I32 l = (I32)line_num+1;
1671                             while (items--)
1672                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1673                         }
1674                     }
1675                 }
1676
1677                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1678             }
1679             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1680         }
1681         CopFILE_free(PL_curcop);
1682         CopFILE_setn(PL_curcop, s, len);
1683     }
1684     CopLINE_set(PL_curcop, line_num);
1685 }
1686
1687 #ifdef PERL_MAD
1688 /* skip space before PL_thistoken */
1689
1690 STATIC char *
1691 S_skipspace0(pTHX_ register char *s)
1692 {
1693     PERL_ARGS_ASSERT_SKIPSPACE0;
1694
1695     s = skipspace(s);
1696     if (!PL_madskills)
1697         return s;
1698     if (PL_skipwhite) {
1699         if (!PL_thiswhite)
1700             PL_thiswhite = newSVpvs("");
1701         sv_catsv(PL_thiswhite, PL_skipwhite);
1702         sv_free(PL_skipwhite);
1703         PL_skipwhite = 0;
1704     }
1705     PL_realtokenstart = s - SvPVX(PL_linestr);
1706     return s;
1707 }
1708
1709 /* skip space after PL_thistoken */
1710
1711 STATIC char *
1712 S_skipspace1(pTHX_ register char *s)
1713 {
1714     const char *start = s;
1715     I32 startoff = start - SvPVX(PL_linestr);
1716
1717     PERL_ARGS_ASSERT_SKIPSPACE1;
1718
1719     s = skipspace(s);
1720     if (!PL_madskills)
1721         return s;
1722     start = SvPVX(PL_linestr) + startoff;
1723     if (!PL_thistoken && PL_realtokenstart >= 0) {
1724         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1725         PL_thistoken = newSVpvn(tstart, start - tstart);
1726     }
1727     PL_realtokenstart = -1;
1728     if (PL_skipwhite) {
1729         if (!PL_nextwhite)
1730             PL_nextwhite = newSVpvs("");
1731         sv_catsv(PL_nextwhite, PL_skipwhite);
1732         sv_free(PL_skipwhite);
1733         PL_skipwhite = 0;
1734     }
1735     return s;
1736 }
1737
1738 STATIC char *
1739 S_skipspace2(pTHX_ register char *s, SV **svp)
1740 {
1741     char *start;
1742     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1743     const I32 startoff = s - SvPVX(PL_linestr);
1744
1745     PERL_ARGS_ASSERT_SKIPSPACE2;
1746
1747     s = skipspace(s);
1748     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1749     if (!PL_madskills || !svp)
1750         return s;
1751     start = SvPVX(PL_linestr) + startoff;
1752     if (!PL_thistoken && PL_realtokenstart >= 0) {
1753         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1754         PL_thistoken = newSVpvn(tstart, start - tstart);
1755         PL_realtokenstart = -1;
1756     }
1757     if (PL_skipwhite) {
1758         if (!*svp)
1759             *svp = newSVpvs("");
1760         sv_setsv(*svp, PL_skipwhite);
1761         sv_free(PL_skipwhite);
1762         PL_skipwhite = 0;
1763     }
1764
1765     return s;
1766 }
1767 #endif
1768
1769 STATIC void
1770 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1771 {
1772     AV *av = CopFILEAVx(PL_curcop);
1773     if (av) {
1774         SV * const sv = newSV_type(SVt_PVMG);
1775         if (orig_sv)
1776             sv_setsv(sv, orig_sv);
1777         else
1778             sv_setpvn(sv, buf, len);
1779         (void)SvIOK_on(sv);
1780         SvIV_set(sv, 0);
1781         av_store(av, (I32)CopLINE(PL_curcop), sv);
1782     }
1783 }
1784
1785 /*
1786  * S_skipspace
1787  * Called to gobble the appropriate amount and type of whitespace.
1788  * Skips comments as well.
1789  */
1790
1791 STATIC char *
1792 S_skipspace(pTHX_ register char *s)
1793 {
1794 #ifdef PERL_MAD
1795     char *start = s;
1796 #endif /* PERL_MAD */
1797     PERL_ARGS_ASSERT_SKIPSPACE;
1798 #ifdef PERL_MAD
1799     if (PL_skipwhite) {
1800         sv_free(PL_skipwhite);
1801         PL_skipwhite = NULL;
1802     }
1803 #endif /* PERL_MAD */
1804     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1805         while (s < PL_bufend && SPACE_OR_TAB(*s))
1806             s++;
1807     } else {
1808         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1809         PL_bufptr = s;
1810         lex_read_space(LEX_KEEP_PREVIOUS |
1811                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1812                     LEX_NO_NEXT_CHUNK : 0));
1813         s = PL_bufptr;
1814         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1815         if (PL_linestart > PL_bufptr)
1816             PL_bufptr = PL_linestart;
1817         return s;
1818     }
1819 #ifdef PERL_MAD
1820     if (PL_madskills)
1821         PL_skipwhite = newSVpvn(start, s-start);
1822 #endif /* PERL_MAD */
1823     return s;
1824 }
1825
1826 /*
1827  * S_check_uni
1828  * Check the unary operators to ensure there's no ambiguity in how they're
1829  * used.  An ambiguous piece of code would be:
1830  *     rand + 5
1831  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1832  * the +5 is its argument.
1833  */
1834
1835 STATIC void
1836 S_check_uni(pTHX)
1837 {
1838     dVAR;
1839     const char *s;
1840     const char *t;
1841
1842     if (PL_oldoldbufptr != PL_last_uni)
1843         return;
1844     while (isSPACE(*PL_last_uni))
1845         PL_last_uni++;
1846     s = PL_last_uni;
1847     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1848         s++;
1849     if ((t = strchr(s, '(')) && t < PL_bufptr)
1850         return;
1851
1852     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1853                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1854                      (int)(s - PL_last_uni), PL_last_uni);
1855 }
1856
1857 /*
1858  * LOP : macro to build a list operator.  Its behaviour has been replaced
1859  * with a subroutine, S_lop() for which LOP is just another name.
1860  */
1861
1862 #define LOP(f,x) return lop(f,x,s)
1863
1864 /*
1865  * S_lop
1866  * Build a list operator (or something that might be one).  The rules:
1867  *  - if we have a next token, then it's a list operator [why?]
1868  *  - if the next thing is an opening paren, then it's a function
1869  *  - else it's a list operator
1870  */
1871
1872 STATIC I32
1873 S_lop(pTHX_ I32 f, int x, char *s)
1874 {
1875     dVAR;
1876
1877     PERL_ARGS_ASSERT_LOP;
1878
1879     pl_yylval.ival = f;
1880     CLINE;
1881     PL_expect = x;
1882     PL_bufptr = s;
1883     PL_last_lop = PL_oldbufptr;
1884     PL_last_lop_op = (OPCODE)f;
1885 #ifdef PERL_MAD
1886     if (PL_lasttoke)
1887         goto lstop;
1888 #else
1889     if (PL_nexttoke)
1890         goto lstop;
1891 #endif
1892     if (*s == '(')
1893         return REPORT(FUNC);
1894     s = PEEKSPACE(s);
1895     if (*s == '(')
1896         return REPORT(FUNC);
1897     else {
1898         lstop:
1899         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1900             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1901         return REPORT(LSTOP);
1902     }
1903 }
1904
1905 #ifdef PERL_MAD
1906  /*
1907  * S_start_force
1908  * Sets up for an eventual force_next().  start_force(0) basically does
1909  * an unshift, while start_force(-1) does a push.  yylex removes items
1910  * on the "pop" end.
1911  */
1912
1913 STATIC void
1914 S_start_force(pTHX_ int where)
1915 {
1916     int i;
1917
1918     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1919         where = PL_lasttoke;
1920     assert(PL_curforce < 0 || PL_curforce == where);
1921     if (PL_curforce != where) {
1922         for (i = PL_lasttoke; i > where; --i) {
1923             PL_nexttoke[i] = PL_nexttoke[i-1];
1924         }
1925         PL_lasttoke++;
1926     }
1927     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1928         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1929     PL_curforce = where;
1930     if (PL_nextwhite) {
1931         if (PL_madskills)
1932             curmad('^', newSVpvs(""));
1933         CURMAD('_', PL_nextwhite);
1934     }
1935 }
1936
1937 STATIC void
1938 S_curmad(pTHX_ char slot, SV *sv)
1939 {
1940     MADPROP **where;
1941
1942     if (!sv)
1943         return;
1944     if (PL_curforce < 0)
1945         where = &PL_thismad;
1946     else
1947         where = &PL_nexttoke[PL_curforce].next_mad;
1948
1949     if (PL_faketokens)
1950         sv_setpvs(sv, "");
1951     else {
1952         if (!IN_BYTES) {
1953             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1954                 SvUTF8_on(sv);
1955             else if (PL_encoding) {
1956                 sv_recode_to_utf8(sv, PL_encoding);
1957             }
1958         }
1959     }
1960
1961     /* keep a slot open for the head of the list? */
1962     if (slot != '_' && *where && (*where)->mad_key == '^') {
1963         (*where)->mad_key = slot;
1964         sv_free(MUTABLE_SV(((*where)->mad_val)));
1965         (*where)->mad_val = (void*)sv;
1966     }
1967     else
1968         addmad(newMADsv(slot, sv), where, 0);
1969 }
1970 #else
1971 #  define start_force(where)    NOOP
1972 #  define curmad(slot, sv)      NOOP
1973 #endif
1974
1975 /*
1976  * S_force_next
1977  * When the lexer realizes it knows the next token (for instance,
1978  * it is reordering tokens for the parser) then it can call S_force_next
1979  * to know what token to return the next time the lexer is called.  Caller
1980  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1981  * and possibly PL_expect to ensure the lexer handles the token correctly.
1982  */
1983
1984 STATIC void
1985 S_force_next(pTHX_ I32 type)
1986 {
1987     dVAR;
1988 #ifdef DEBUGGING
1989     if (DEBUG_T_TEST) {
1990         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1991         tokereport(type, &NEXTVAL_NEXTTOKE);
1992     }
1993 #endif
1994     /* Don’t let opslab_force_free snatch it */
1995     if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
1996         assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
1997         NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
1998     }
1999 #ifdef PERL_MAD
2000     if (PL_curforce < 0)
2001         start_force(PL_lasttoke);
2002     PL_nexttoke[PL_curforce].next_type = type;
2003     if (PL_lex_state != LEX_KNOWNEXT)
2004         PL_lex_defer = PL_lex_state;
2005     PL_lex_state = LEX_KNOWNEXT;
2006     PL_lex_expect = PL_expect;
2007     PL_curforce = -1;
2008 #else
2009     PL_nexttype[PL_nexttoke] = type;
2010     PL_nexttoke++;
2011     if (PL_lex_state != LEX_KNOWNEXT) {
2012         PL_lex_defer = PL_lex_state;
2013         PL_lex_expect = PL_expect;
2014         PL_lex_state = LEX_KNOWNEXT;
2015     }
2016 #endif
2017 }
2018
2019 void
2020 Perl_yyunlex(pTHX)
2021 {
2022     int yyc = PL_parser->yychar;
2023     if (yyc != YYEMPTY) {
2024         if (yyc) {
2025             start_force(-1);
2026             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2027             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2028                 PL_lex_allbrackets--;
2029                 PL_lex_brackets--;
2030                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2031             } else if (yyc == '('/*)*/) {
2032                 PL_lex_allbrackets--;
2033                 yyc |= (2<<24);
2034             }
2035             force_next(yyc);
2036         }
2037         PL_parser->yychar = YYEMPTY;
2038     }
2039 }
2040
2041 STATIC SV *
2042 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2043 {
2044     dVAR;
2045     SV * const sv = newSVpvn_utf8(start, len,
2046                                   !IN_BYTES
2047                                   && UTF
2048                                   && !is_ascii_string((const U8*)start, len)
2049                                   && is_utf8_string((const U8*)start, len));
2050     return sv;
2051 }
2052
2053 /*
2054  * S_force_word
2055  * When the lexer knows the next thing is a word (for instance, it has
2056  * just seen -> and it knows that the next char is a word char, then
2057  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2058  * lookahead.
2059  *
2060  * Arguments:
2061  *   char *start : buffer position (must be within PL_linestr)
2062  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2063  *   int check_keyword : if true, Perl checks to make sure the word isn't
2064  *       a keyword (do this if the word is a label, e.g. goto FOO)
2065  *   int allow_pack : if true, : characters will also be allowed (require,
2066  *       use, etc. do this)
2067  *   int allow_initial_tick : used by the "sub" lexer only.
2068  */
2069
2070 STATIC char *
2071 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2072 {
2073     dVAR;
2074     char *s;
2075     STRLEN len;
2076
2077     PERL_ARGS_ASSERT_FORCE_WORD;
2078
2079     start = SKIPSPACE1(start);
2080     s = start;
2081     if (isIDFIRST_lazy_if(s,UTF) ||
2082         (allow_pack && *s == ':') ||
2083         (allow_initial_tick && *s == '\'') )
2084     {
2085         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2086         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2087             return start;
2088         start_force(PL_curforce);
2089         if (PL_madskills)
2090             curmad('X', newSVpvn(start,s-start));
2091         if (token == METHOD) {
2092             s = SKIPSPACE1(s);
2093             if (*s == '(')
2094                 PL_expect = XTERM;
2095             else {
2096                 PL_expect = XOPERATOR;
2097             }
2098         }
2099         if (PL_madskills)
2100             curmad('g', newSVpvs( "forced" ));
2101         NEXTVAL_NEXTTOKE.opval
2102             = (OP*)newSVOP(OP_CONST,0,
2103                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2104         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2105         force_next(token);
2106     }
2107     return s;
2108 }
2109
2110 /*
2111  * S_force_ident
2112  * Called when the lexer wants $foo *foo &foo etc, but the program
2113  * text only contains the "foo" portion.  The first argument is a pointer
2114  * to the "foo", and the second argument is the type symbol to prefix.
2115  * Forces the next token to be a "WORD".
2116  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2117  */
2118
2119 STATIC void
2120 S_force_ident(pTHX_ register const char *s, int kind)
2121 {
2122     dVAR;
2123
2124     PERL_ARGS_ASSERT_FORCE_IDENT;
2125
2126     if (*s) {
2127         const STRLEN len = strlen(s);
2128         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2129                                                                 UTF ? SVf_UTF8 : 0));
2130         start_force(PL_curforce);
2131         NEXTVAL_NEXTTOKE.opval = o;
2132         force_next(WORD);
2133         if (kind) {
2134             o->op_private = OPpCONST_ENTERED;
2135             /* XXX see note in pp_entereval() for why we forgo typo
2136                warnings if the symbol must be introduced in an eval.
2137                GSAR 96-10-12 */
2138             gv_fetchpvn_flags(s, len,
2139                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2140                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2141                               kind == '$' ? SVt_PV :
2142                               kind == '@' ? SVt_PVAV :
2143                               kind == '%' ? SVt_PVHV :
2144                               SVt_PVGV
2145                               );
2146         }
2147     }
2148 }
2149
2150 NV
2151 Perl_str_to_version(pTHX_ SV *sv)
2152 {
2153     NV retval = 0.0;
2154     NV nshift = 1.0;
2155     STRLEN len;
2156     const char *start = SvPV_const(sv,len);
2157     const char * const end = start + len;
2158     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2159
2160     PERL_ARGS_ASSERT_STR_TO_VERSION;
2161
2162     while (start < end) {
2163         STRLEN skip;
2164         UV n;
2165         if (utf)
2166             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2167         else {
2168             n = *(U8*)start;
2169             skip = 1;
2170         }
2171         retval += ((NV)n)/nshift;
2172         start += skip;
2173         nshift *= 1000;
2174     }
2175     return retval;
2176 }
2177
2178 /*
2179  * S_force_version
2180  * Forces the next token to be a version number.
2181  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2182  * and if "guessing" is TRUE, then no new token is created (and the caller
2183  * must use an alternative parsing method).
2184  */
2185
2186 STATIC char *
2187 S_force_version(pTHX_ char *s, int guessing)
2188 {
2189     dVAR;
2190     OP *version = NULL;
2191     char *d;
2192 #ifdef PERL_MAD
2193     I32 startoff = s - SvPVX(PL_linestr);
2194 #endif
2195
2196     PERL_ARGS_ASSERT_FORCE_VERSION;
2197
2198     s = SKIPSPACE1(s);
2199
2200     d = s;
2201     if (*d == 'v')
2202         d++;
2203     if (isDIGIT(*d)) {
2204         while (isDIGIT(*d) || *d == '_' || *d == '.')
2205             d++;
2206 #ifdef PERL_MAD
2207         if (PL_madskills) {
2208             start_force(PL_curforce);
2209             curmad('X', newSVpvn(s,d-s));
2210         }
2211 #endif
2212         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2213             SV *ver;
2214 #ifdef USE_LOCALE_NUMERIC
2215             char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2216             setlocale(LC_NUMERIC, "C");
2217 #endif
2218             s = scan_num(s, &pl_yylval);
2219 #ifdef USE_LOCALE_NUMERIC
2220             setlocale(LC_NUMERIC, loc);
2221             Safefree(loc);
2222 #endif
2223             version = pl_yylval.opval;
2224             ver = cSVOPx(version)->op_sv;
2225             if (SvPOK(ver) && !SvNIOK(ver)) {
2226                 SvUPGRADE(ver, SVt_PVNV);
2227                 SvNV_set(ver, str_to_version(ver));
2228                 SvNOK_on(ver);          /* hint that it is a version */
2229             }
2230         }
2231         else if (guessing) {
2232 #ifdef PERL_MAD
2233             if (PL_madskills) {
2234                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2235                 PL_nextwhite = 0;
2236                 s = SvPVX(PL_linestr) + startoff;
2237             }
2238 #endif
2239             return s;
2240         }
2241     }
2242
2243 #ifdef PERL_MAD
2244     if (PL_madskills && !version) {
2245         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2246         PL_nextwhite = 0;
2247         s = SvPVX(PL_linestr) + startoff;
2248     }
2249 #endif
2250     /* NOTE: The parser sees the package name and the VERSION swapped */
2251     start_force(PL_curforce);
2252     NEXTVAL_NEXTTOKE.opval = version;
2253     force_next(WORD);
2254
2255     return s;
2256 }
2257
2258 /*
2259  * S_force_strict_version
2260  * Forces the next token to be a version number using strict syntax rules.
2261  */
2262
2263 STATIC char *
2264 S_force_strict_version(pTHX_ char *s)
2265 {
2266     dVAR;
2267     OP *version = NULL;
2268 #ifdef PERL_MAD
2269     I32 startoff = s - SvPVX(PL_linestr);
2270 #endif
2271     const char *errstr = NULL;
2272
2273     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2274
2275     while (isSPACE(*s)) /* leading whitespace */
2276         s++;
2277
2278     if (is_STRICT_VERSION(s,&errstr)) {
2279         SV *ver = newSV(0);
2280         s = (char *)scan_version(s, ver, 0);
2281         version = newSVOP(OP_CONST, 0, ver);
2282     }
2283     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2284             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2285     {
2286         PL_bufptr = s;
2287         if (errstr)
2288             yyerror(errstr); /* version required */
2289         return s;
2290     }
2291
2292 #ifdef PERL_MAD
2293     if (PL_madskills && !version) {
2294         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2295         PL_nextwhite = 0;
2296         s = SvPVX(PL_linestr) + startoff;
2297     }
2298 #endif
2299     /* NOTE: The parser sees the package name and the VERSION swapped */
2300     start_force(PL_curforce);
2301     NEXTVAL_NEXTTOKE.opval = version;
2302     force_next(WORD);
2303
2304     return s;
2305 }
2306
2307 /*
2308  * S_tokeq
2309  * Tokenize a quoted string passed in as an SV.  It finds the next
2310  * chunk, up to end of string or a backslash.  It may make a new
2311  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2312  * turns \\ into \.
2313  */
2314
2315 STATIC SV *
2316 S_tokeq(pTHX_ SV *sv)
2317 {
2318     dVAR;
2319     char *s;
2320     char *send;
2321     char *d;
2322     STRLEN len = 0;
2323     SV *pv = sv;
2324
2325     PERL_ARGS_ASSERT_TOKEQ;
2326
2327     if (!SvLEN(sv))
2328         goto finish;
2329
2330     s = SvPV_force(sv, len);
2331     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2332         goto finish;
2333     send = s + len;
2334     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2335     while (s < send && !(*s == '\\' && s[1] == '\\'))
2336         s++;
2337     if (s == send)
2338         goto finish;
2339     d = s;
2340     if ( PL_hints & HINT_NEW_STRING ) {
2341         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2342     }
2343     while (s < send) {
2344         if (*s == '\\') {
2345             if (s + 1 < send && (s[1] == '\\'))
2346                 s++;            /* all that, just for this */
2347         }
2348         *d++ = *s++;
2349     }
2350     *d = '\0';
2351     SvCUR_set(sv, d - SvPVX_const(sv));
2352   finish:
2353     if ( PL_hints & HINT_NEW_STRING )
2354        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2355     return sv;
2356 }
2357
2358 /*
2359  * Now come three functions related to double-quote context,
2360  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2361  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2362  * interact with PL_lex_state, and create fake ( ... ) argument lists
2363  * to handle functions and concatenation.
2364  * For example,
2365  *   "foo\lbar"
2366  * is tokenised as
2367  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2368  */
2369
2370 /*
2371  * S_sublex_start
2372  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2373  *
2374  * Pattern matching will set PL_lex_op to the pattern-matching op to
2375  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2376  *
2377  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2378  *
2379  * Everything else becomes a FUNC.
2380  *
2381  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2382  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2383  * call to S_sublex_push().
2384  */
2385
2386 STATIC I32
2387 S_sublex_start(pTHX)
2388 {
2389     dVAR;
2390     const I32 op_type = pl_yylval.ival;
2391
2392     if (op_type == OP_NULL) {
2393         pl_yylval.opval = PL_lex_op;
2394         PL_lex_op = NULL;
2395         return THING;
2396     }
2397     if (op_type == OP_CONST || op_type == OP_READLINE) {
2398         SV *sv = tokeq(PL_lex_stuff);
2399
2400         if (SvTYPE(sv) == SVt_PVIV) {
2401             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2402             STRLEN len;
2403             const char * const p = SvPV_const(sv, len);
2404             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2405             SvREFCNT_dec(sv);
2406             sv = nsv;
2407         }
2408         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2409         PL_lex_stuff = NULL;
2410         /* Allow <FH> // "foo" */
2411         if (op_type == OP_READLINE)
2412             PL_expect = XTERMORDORDOR;
2413         return THING;
2414     }
2415     else if (op_type == OP_BACKTICK && PL_lex_op) {
2416         /* readpipe() vas overriden */
2417         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2418         pl_yylval.opval = PL_lex_op;
2419         PL_lex_op = NULL;
2420         PL_lex_stuff = NULL;
2421         return THING;
2422     }
2423
2424     PL_sublex_info.super_state = PL_lex_state;
2425     PL_sublex_info.sub_inwhat = (U16)op_type;
2426     PL_sublex_info.sub_op = PL_lex_op;
2427     PL_lex_state = LEX_INTERPPUSH;
2428
2429     PL_expect = XTERM;
2430     if (PL_lex_op) {
2431         pl_yylval.opval = PL_lex_op;
2432         PL_lex_op = NULL;
2433         return PMFUNC;
2434     }
2435     else
2436         return FUNC;
2437 }
2438
2439 /*
2440  * S_sublex_push
2441  * Create a new scope to save the lexing state.  The scope will be
2442  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2443  * to the uc, lc, etc. found before.
2444  * Sets PL_lex_state to LEX_INTERPCONCAT.
2445  */
2446
2447 STATIC I32
2448 S_sublex_push(pTHX)
2449 {
2450     dVAR;
2451     ENTER;
2452
2453     PL_lex_state = PL_sublex_info.super_state;
2454     SAVEBOOL(PL_lex_dojoin);
2455     SAVEI32(PL_lex_brackets);
2456     SAVEI32(PL_lex_allbrackets);
2457     SAVEI32(PL_lex_formbrack);
2458     SAVEI8(PL_lex_fakeeof);
2459     SAVEI32(PL_lex_casemods);
2460     SAVEI32(PL_lex_starts);
2461     SAVEI8(PL_lex_state);
2462     SAVESPTR(PL_lex_repl);
2463     SAVEPPTR(PL_sublex_info.re_eval_start);
2464     SAVESPTR(PL_sublex_info.re_eval_str);
2465     SAVEPPTR(PL_sublex_info.super_bufptr);
2466     SAVEVPTR(PL_lex_inpat);
2467     SAVEI16(PL_lex_inwhat);
2468     SAVECOPLINE(PL_curcop);
2469     SAVEPPTR(PL_bufptr);
2470     SAVEPPTR(PL_bufend);
2471     SAVEPPTR(PL_oldbufptr);
2472     SAVEPPTR(PL_oldoldbufptr);
2473     SAVEPPTR(PL_last_lop);
2474     SAVEPPTR(PL_last_uni);
2475     SAVEPPTR(PL_linestart);
2476     SAVESPTR(PL_linestr);
2477     SAVEGENERICPV(PL_lex_brackstack);
2478     SAVEGENERICPV(PL_lex_casestack);
2479
2480     /* The here-doc parser needs to be able to peek into outer lexing
2481        scopes to find the body of the here-doc.  We use SvIVX(PL_linestr)
2482        to store the outer PL_bufptr and SvNVX to store the outer
2483        PL_linestr.  Since SvIVX already means something else, we use
2484        PL_sublex_info.super_bufptr for the innermost scope (the one we are
2485        now entering), and a localised SvIVX for outer scopes.
2486      */
2487     SvUPGRADE(PL_linestr, SVt_PVIV);
2488     /* A null super_bufptr means the outer lexing scope is not peekable,
2489        because it is a single line from an input stream. */
2490     SAVEIV(SvIVX(PL_linestr));
2491     SvIVX(PL_linestr) = PTR2IV(PL_sublex_info.super_bufptr);
2492     PL_sublex_info.super_bufptr =
2493         (SvTYPE(PL_linestr) < SVt_PVNV || !SvNVX(PL_linestr))
2494          && (PL_rsfp || PL_parser->filtered)
2495          ? NULL
2496          : PL_bufptr;
2497     SvUPGRADE(PL_lex_stuff, SVt_PVNV);
2498     SvNVX(PL_lex_stuff) = PTR2NV(PL_linestr);
2499
2500     PL_linestr = PL_lex_stuff;
2501     PL_lex_repl = PL_sublex_info.repl;
2502     PL_lex_stuff = NULL;
2503     PL_sublex_info.repl = NULL;
2504     PL_sublex_info.re_eval_start = NULL;
2505     PL_sublex_info.re_eval_str = NULL;
2506
2507     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2508         = SvPVX(PL_linestr);
2509     PL_bufend += SvCUR(PL_linestr);
2510     PL_last_lop = PL_last_uni = NULL;
2511     SAVEFREESV(PL_linestr);
2512
2513     PL_lex_dojoin = FALSE;
2514     PL_lex_brackets = PL_lex_formbrack = 0;
2515     PL_lex_allbrackets = 0;
2516     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2517     Newx(PL_lex_brackstack, 120, char);
2518     Newx(PL_lex_casestack, 12, char);
2519     PL_lex_casemods = 0;
2520     *PL_lex_casestack = '\0';
2521     PL_lex_starts = 0;
2522     PL_lex_state = LEX_INTERPCONCAT;
2523     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2524
2525     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2526     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2527     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2528         PL_lex_inpat = PL_sublex_info.sub_op;
2529     else
2530         PL_lex_inpat = NULL;
2531
2532     return '(';
2533 }
2534
2535 /*
2536  * S_sublex_done
2537  * Restores lexer state after a S_sublex_push.
2538  */
2539
2540 STATIC I32
2541 S_sublex_done(pTHX)
2542 {
2543     dVAR;
2544     if (!PL_lex_starts++) {
2545         SV * const sv = newSVpvs("");
2546         if (SvUTF8(PL_linestr))
2547             SvUTF8_on(sv);
2548         PL_expect = XOPERATOR;
2549         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2550         return THING;
2551     }
2552
2553     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2554         PL_lex_state = LEX_INTERPCASEMOD;
2555         return yylex();
2556     }
2557
2558     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2559     assert(PL_lex_inwhat != OP_TRANSR);
2560     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2561         SvUPGRADE(PL_lex_repl, SVt_PVNV);
2562         SvNVX(PL_lex_repl) = SvNVX(PL_linestr);
2563         PL_linestr = PL_lex_repl;
2564         PL_lex_inpat = 0;
2565         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2566         PL_bufend += SvCUR(PL_linestr);
2567         PL_last_lop = PL_last_uni = NULL;
2568         SAVEFREESV(PL_linestr);
2569         PL_lex_dojoin = FALSE;
2570         PL_lex_brackets = 0;
2571         PL_lex_allbrackets = 0;
2572         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2573         PL_lex_casemods = 0;
2574         *PL_lex_casestack = '\0';
2575         PL_lex_starts = 0;
2576         if (SvEVALED(PL_lex_repl)) {
2577             PL_lex_state = LEX_INTERPNORMAL;
2578             PL_lex_starts++;
2579             /*  we don't clear PL_lex_repl here, so that we can check later
2580                 whether this is an evalled subst; that means we rely on the
2581                 logic to ensure sublex_done() is called again only via the
2582                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2583         }
2584         else {
2585             PL_lex_state = LEX_INTERPCONCAT;
2586             PL_lex_repl = NULL;
2587         }
2588         return ',';
2589     }
2590     else {
2591 #ifdef PERL_MAD
2592         if (PL_madskills) {
2593             if (PL_thiswhite) {
2594                 if (!PL_endwhite)
2595                     PL_endwhite = newSVpvs("");
2596                 sv_catsv(PL_endwhite, PL_thiswhite);
2597                 PL_thiswhite = 0;
2598             }
2599             if (PL_thistoken)
2600                 sv_setpvs(PL_thistoken,"");
2601             else
2602                 PL_realtokenstart = -1;
2603         }
2604 #endif
2605         LEAVE;
2606         PL_bufend = SvPVX(PL_linestr);
2607         PL_bufend += SvCUR(PL_linestr);
2608         PL_expect = XOPERATOR;
2609         PL_sublex_info.sub_inwhat = 0;
2610         return ')';
2611     }
2612 }
2613
2614 /*
2615   scan_const
2616
2617   Extracts the next constant part of a pattern, double-quoted string,
2618   or transliteration.  This is terrifying code.
2619
2620   For example, in parsing the double-quoted string "ab\x63$d", it would
2621   stop at the '$' and return an OP_CONST containing 'abc'.
2622
2623   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2624   processing a pattern (PL_lex_inpat is true), a transliteration
2625   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2626
2627   Returns a pointer to the character scanned up to. If this is
2628   advanced from the start pointer supplied (i.e. if anything was
2629   successfully parsed), will leave an OP_CONST for the substring scanned
2630   in pl_yylval. Caller must intuit reason for not parsing further
2631   by looking at the next characters herself.
2632
2633   In patterns:
2634     expand:
2635       \N{ABC}  => \N{U+41.42.43}
2636
2637     pass through:
2638         all other \-char, including \N and \N{ apart from \N{ABC}
2639
2640     stops on:
2641         @ and $ where it appears to be a var, but not for $ as tail anchor
2642         \l \L \u \U \Q \E
2643         (?{  or  (??{
2644
2645
2646   In transliterations:
2647     characters are VERY literal, except for - not at the start or end
2648     of the string, which indicates a range. If the range is in bytes,
2649     scan_const expands the range to the full set of intermediate
2650     characters. If the range is in utf8, the hyphen is replaced with
2651     a certain range mark which will be handled by pmtrans() in op.c.
2652
2653   In double-quoted strings:
2654     backslashes:
2655       double-quoted style: \r and \n
2656       constants: \x31, etc.
2657       deprecated backrefs: \1 (in substitution replacements)
2658       case and quoting: \U \Q \E
2659     stops on @ and $
2660
2661   scan_const does *not* construct ops to handle interpolated strings.
2662   It stops processing as soon as it finds an embedded $ or @ variable
2663   and leaves it to the caller to work out what's going on.
2664
2665   embedded arrays (whether in pattern or not) could be:
2666       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2667
2668   $ in double-quoted strings must be the symbol of an embedded scalar.
2669
2670   $ in pattern could be $foo or could be tail anchor.  Assumption:
2671   it's a tail anchor if $ is the last thing in the string, or if it's
2672   followed by one of "()| \r\n\t"
2673
2674   \1 (backreferences) are turned into $1 in substitutions
2675
2676   The structure of the code is
2677       while (there's a character to process) {
2678           handle transliteration ranges
2679           skip regexp comments /(?#comment)/ and codes /(?{code})/
2680           skip #-initiated comments in //x patterns
2681           check for embedded arrays
2682           check for embedded scalars
2683           if (backslash) {
2684               deprecate \1 in substitution replacements
2685               handle string-changing backslashes \l \U \Q \E, etc.
2686               switch (what was escaped) {
2687                   handle \- in a transliteration (becomes a literal -)
2688                   if a pattern and not \N{, go treat as regular character
2689                   handle \132 (octal characters)
2690                   handle \x15 and \x{1234} (hex characters)
2691                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2692                   handle \cV (control characters)
2693                   handle printf-style backslashes (\f, \r, \n, etc)
2694               } (end switch)
2695               continue
2696           } (end if backslash)
2697           handle regular character
2698     } (end while character to read)
2699
2700 */
2701
2702 STATIC char *
2703 S_scan_const(pTHX_ char *start)
2704 {
2705     dVAR;
2706     char *send = PL_bufend;             /* end of the constant */
2707     SV *sv = newSV(send - start);               /* sv for the constant.  See
2708                                                    note below on sizing. */
2709     char *s = start;                    /* start of the constant */
2710     char *d = SvPVX(sv);                /* destination for copies */
2711     bool dorange = FALSE;                       /* are we in a translit range? */
2712     bool didrange = FALSE;                      /* did we just finish a range? */
2713     bool in_charclass = FALSE;                  /* within /[...]/ */
2714     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2715     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2716                                                    to be UTF8?  But, this can
2717                                                    show as true when the source
2718                                                    isn't utf8, as for example
2719                                                    when it is entirely composed
2720                                                    of hex constants */
2721
2722     /* Note on sizing:  The scanned constant is placed into sv, which is
2723      * initialized by newSV() assuming one byte of output for every byte of
2724      * input.  This routine expects newSV() to allocate an extra byte for a
2725      * trailing NUL, which this routine will append if it gets to the end of
2726      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2727      * CAPITAL LETTER A}), or more output than input if the constant ends up
2728      * recoded to utf8, but each time a construct is found that might increase
2729      * the needed size, SvGROW() is called.  Its size parameter each time is
2730      * based on the best guess estimate at the time, namely the length used so
2731      * far, plus the length the current construct will occupy, plus room for
2732      * the trailing NUL, plus one byte for every input byte still unscanned */
2733
2734     UV uv;
2735 #ifdef EBCDIC
2736     UV literal_endpoint = 0;
2737     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2738 #endif
2739
2740     PERL_ARGS_ASSERT_SCAN_CONST;
2741
2742     assert(PL_lex_inwhat != OP_TRANSR);
2743     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2744         /* If we are doing a trans and we know we want UTF8 set expectation */
2745         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2746         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2747     }
2748
2749
2750     while (s < send || dorange) {
2751
2752         /* get transliterations out of the way (they're most literal) */
2753         if (PL_lex_inwhat == OP_TRANS) {
2754             /* expand a range A-Z to the full set of characters.  AIE! */
2755             if (dorange) {
2756                 I32 i;                          /* current expanded character */
2757                 I32 min;                        /* first character in range */
2758                 I32 max;                        /* last character in range */
2759
2760 #ifdef EBCDIC
2761                 UV uvmax = 0;
2762 #endif
2763
2764                 if (has_utf8
2765 #ifdef EBCDIC
2766                     && !native_range
2767 #endif
2768                     ) {
2769                     char * const c = (char*)utf8_hop((U8*)d, -1);
2770                     char *e = d++;
2771                     while (e-- > c)
2772                         *(e + 1) = *e;
2773                     *c = (char)UTF_TO_NATIVE(0xff);
2774                     /* mark the range as done, and continue */
2775                     dorange = FALSE;
2776                     didrange = TRUE;
2777                     continue;
2778                 }
2779
2780                 i = d - SvPVX_const(sv);                /* remember current offset */
2781 #ifdef EBCDIC
2782                 SvGROW(sv,
2783                        SvLEN(sv) + (has_utf8 ?
2784                                     (512 - UTF_CONTINUATION_MARK +
2785                                      UNISKIP(0x100))
2786                                     : 256));
2787                 /* How many two-byte within 0..255: 128 in UTF-8,
2788                  * 96 in UTF-8-mod. */
2789 #else
2790                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2791 #endif
2792                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2793 #ifdef EBCDIC
2794                 if (has_utf8) {
2795                     int j;
2796                     for (j = 0; j <= 1; j++) {
2797                         char * const c = (char*)utf8_hop((U8*)d, -1);
2798                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2799                         if (j)
2800                             min = (U8)uv;
2801                         else if (uv < 256)
2802                             max = (U8)uv;
2803                         else {
2804                             max = (U8)0xff; /* only to \xff */
2805                             uvmax = uv; /* \x{100} to uvmax */
2806                         }
2807                         d = c; /* eat endpoint chars */
2808                      }
2809                 }
2810                else {
2811 #endif
2812                    d -= 2;              /* eat the first char and the - */
2813                    min = (U8)*d;        /* first char in range */
2814                    max = (U8)d[1];      /* last char in range  */
2815 #ifdef EBCDIC
2816                }
2817 #endif
2818
2819                 if (min > max) {
2820                     Perl_croak(aTHX_
2821                                "Invalid range \"%c-%c\" in transliteration operator",
2822                                (char)min, (char)max);
2823                 }
2824
2825 #ifdef EBCDIC
2826                 if (literal_endpoint == 2 &&
2827                     ((isLOWER(min) && isLOWER(max)) ||
2828                      (isUPPER(min) && isUPPER(max)))) {
2829                     if (isLOWER(min)) {
2830                         for (i = min; i <= max; i++)
2831                             if (isLOWER(i))
2832                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2833                     } else {
2834                         for (i = min; i <= max; i++)
2835                             if (isUPPER(i))
2836                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2837                     }
2838                 }
2839                 else
2840 #endif
2841                     for (i = min; i <= max; i++)
2842 #ifdef EBCDIC
2843                         if (has_utf8) {
2844                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2845                             if (UNI_IS_INVARIANT(ch))
2846                                 *d++ = (U8)i;
2847                             else {
2848                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2849                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2850                             }
2851                         }
2852                         else
2853 #endif
2854                             *d++ = (char)i;
2855
2856 #ifdef EBCDIC
2857                 if (uvmax) {
2858                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2859                     if (uvmax > 0x101)
2860                         *d++ = (char)UTF_TO_NATIVE(0xff);
2861                     if (uvmax > 0x100)
2862                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2863                 }
2864 #endif
2865
2866                 /* mark the range as done, and continue */
2867                 dorange = FALSE;
2868                 didrange = TRUE;
2869 #ifdef EBCDIC
2870                 literal_endpoint = 0;
2871 #endif
2872                 continue;
2873             }
2874
2875             /* range begins (ignore - as first or last char) */
2876             else if (*s == '-' && s+1 < send  && s != start) {
2877                 if (didrange) {
2878                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2879                 }
2880                 if (has_utf8
2881 #ifdef EBCDIC
2882                     && !native_range
2883 #endif
2884                     ) {
2885                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2886                     s++;
2887                     continue;
2888                 }
2889                 dorange = TRUE;
2890                 s++;
2891             }
2892             else {
2893                 didrange = FALSE;
2894 #ifdef EBCDIC
2895                 literal_endpoint = 0;
2896                 native_range = TRUE;
2897 #endif
2898             }
2899         }
2900
2901         /* if we get here, we're not doing a transliteration */
2902
2903         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2904             char *s1 = s-1;
2905             int esc = 0;
2906             while (s1 >= start && *s1-- == '\\')
2907                 esc = !esc;
2908             if (!esc)
2909                 in_charclass = TRUE;
2910         }
2911
2912         else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
2913             char *s1 = s-1;
2914             int esc = 0;
2915             while (s1 >= start && *s1-- == '\\')
2916                 esc = !esc;
2917             if (!esc)
2918                 in_charclass = FALSE;
2919         }
2920
2921         /* skip for regexp comments /(?#comment)/, except for the last
2922          * char, which will be done separately.
2923          * Stop on (?{..}) and friends */
2924
2925         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2926             if (s[2] == '#') {
2927                 while (s+1 < send && *s != ')')
2928                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2929             }
2930             else if (!PL_lex_casemods && !in_charclass &&
2931                      (    s[2] == '{' /* This should match regcomp.c */
2932                       || (s[2] == '?' && s[3] == '{')))
2933             {
2934                 break;
2935             }
2936         }
2937
2938         /* likewise skip #-initiated comments in //x patterns */
2939         else if (*s == '#' && PL_lex_inpat &&
2940           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2941             while (s+1 < send && *s != '\n')
2942                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2943         }
2944
2945         /* no further processing of single-quoted regex */
2946         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
2947             goto default_action;
2948
2949         /* check for embedded arrays
2950            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2951            */
2952         else if (*s == '@' && s[1]) {
2953             if (isALNUM_lazy_if(s+1,UTF))
2954                 break;
2955             if (strchr(":'{$", s[1]))
2956                 break;
2957             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2958                 break; /* in regexp, neither @+ nor @- are interpolated */
2959         }
2960
2961         /* check for embedded scalars.  only stop if we're sure it's a
2962            variable.
2963         */
2964         else if (*s == '$') {
2965             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2966                 break;
2967             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2968                 if (s[1] == '\\') {
2969                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2970                                    "Possible unintended interpolation of $\\ in regex");
2971                 }
2972                 break;          /* in regexp, $ might be tail anchor */
2973             }
2974         }
2975
2976         /* End of else if chain - OP_TRANS rejoin rest */
2977
2978         /* backslashes */
2979         if (*s == '\\' && s+1 < send) {
2980             char* e;    /* Can be used for ending '}', etc. */
2981
2982             s++;
2983
2984             /* warn on \1 - \9 in substitution replacements, but note that \11
2985              * is an octal; and \19 is \1 followed by '9' */
2986             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2987                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2988             {
2989                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2990                 *--s = '$';
2991                 break;
2992             }
2993
2994             /* string-change backslash escapes */
2995             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
2996                 --s;
2997                 break;
2998             }
2999             /* In a pattern, process \N, but skip any other backslash escapes.
3000              * This is because we don't want to translate an escape sequence
3001              * into a meta symbol and have the regex compiler use the meta
3002              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3003              * in spite of this, we do have to process \N here while the proper
3004              * charnames handler is in scope.  See bugs #56444 and #62056.
3005              * There is a complication because \N in a pattern may also stand
3006              * for 'match a non-nl', and not mean a charname, in which case its
3007              * processing should be deferred to the regex compiler.  To be a
3008              * charname it must be followed immediately by a '{', and not look
3009              * like \N followed by a curly quantifier, i.e., not something like
3010              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3011              * quantifier */
3012             else if (PL_lex_inpat
3013                     && (*s != 'N'
3014                         || s[1] != '{'
3015                         || regcurly(s + 1)))
3016             {
3017                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3018                 goto default_action;
3019             }
3020
3021             switch (*s) {
3022
3023             /* quoted - in transliterations */
3024             case '-':
3025                 if (PL_lex_inwhat == OP_TRANS) {
3026                     *d++ = *s++;
3027                     continue;
3028                 }
3029                 /* FALL THROUGH */
3030             default:
3031                 {
3032                     if ((isALNUMC(*s)))
3033                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3034                                        "Unrecognized escape \\%c passed through",
3035                                        *s);
3036                     /* default action is to copy the quoted character */
3037                     goto default_action;
3038                 }
3039
3040             /* eg. \132 indicates the octal constant 0132 */
3041             case '0': case '1': case '2': case '3':
3042             case '4': case '5': case '6': case '7':
3043                 {
3044                     I32 flags = 0;
3045                     STRLEN len = 3;
3046                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3047                     s += len;
3048                 }
3049                 goto NUM_ESCAPE_INSERT;
3050
3051             /* eg. \o{24} indicates the octal constant \024 */
3052             case 'o':
3053                 {
3054                     STRLEN len;
3055                     const char* error;
3056
3057                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
3058                     s += len;
3059                     if (! valid) {
3060                         yyerror(error);
3061                         continue;
3062                     }
3063                     goto NUM_ESCAPE_INSERT;
3064                 }
3065
3066             /* eg. \x24 indicates the hex constant 0x24 */
3067             case 'x':
3068                 {
3069                     STRLEN len;
3070                     const char* error;
3071
3072                     bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
3073                     s += len;
3074                     if (! valid) {
3075                         yyerror(error);
3076                         continue;
3077                     }
3078                 }
3079
3080               NUM_ESCAPE_INSERT:
3081                 /* Insert oct or hex escaped character.  There will always be
3082                  * enough room in sv since such escapes will be longer than any
3083                  * UTF-8 sequence they can end up as, except if they force us
3084                  * to recode the rest of the string into utf8 */
3085
3086                 /* Here uv is the ordinal of the next character being added in
3087                  * unicode (converted from native). */
3088                 if (!UNI_IS_INVARIANT(uv)) {
3089                     if (!has_utf8 && uv > 255) {
3090                         /* Might need to recode whatever we have accumulated so
3091                          * far if it contains any chars variant in utf8 or
3092                          * utf-ebcdic. */
3093
3094                         SvCUR_set(sv, d - SvPVX_const(sv));
3095                         SvPOK_on(sv);
3096                         *d = '\0';
3097                         /* See Note on sizing above.  */
3098                         sv_utf8_upgrade_flags_grow(sv,
3099                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3100                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3101                         d = SvPVX(sv) + SvCUR(sv);
3102                         has_utf8 = TRUE;
3103                     }
3104
3105                     if (has_utf8) {
3106                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3107                         if (PL_lex_inwhat == OP_TRANS &&
3108                             PL_sublex_info.sub_op) {
3109                             PL_sublex_info.sub_op->op_private |=
3110                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3111                                              : OPpTRANS_TO_UTF);
3112                         }
3113 #ifdef EBCDIC
3114                         if (uv > 255 && !dorange)
3115                             native_range = FALSE;
3116 #endif
3117                     }
3118                     else {
3119                         *d++ = (char)uv;
3120                     }
3121                 }
3122                 else {
3123                     *d++ = (char) uv;
3124                 }
3125                 continue;
3126
3127             case 'N':
3128                 /* In a non-pattern \N must be a named character, like \N{LATIN
3129                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3130                  * mean to match a non-newline.  For non-patterns, named
3131                  * characters are converted to their string equivalents. In
3132                  * patterns, named characters are not converted to their
3133                  * ultimate forms for the same reasons that other escapes
3134                  * aren't.  Instead, they are converted to the \N{U+...} form
3135                  * to get the value from the charnames that is in effect right
3136                  * now, while preserving the fact that it was a named character
3137                  * so that the regex compiler knows this */
3138
3139                 /* This section of code doesn't generally use the
3140                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3141                  * a close examination of this macro and determined it is a
3142                  * no-op except on utfebcdic variant characters.  Every
3143                  * character generated by this that would normally need to be
3144                  * enclosed by this macro is invariant, so the macro is not
3145                  * needed, and would complicate use of copy().  XXX There are
3146                  * other parts of this file where the macro is used
3147                  * inconsistently, but are saved by it being a no-op */
3148
3149                 /* The structure of this section of code (besides checking for
3150                  * errors and upgrading to utf8) is:
3151                  *  Further disambiguate between the two meanings of \N, and if
3152                  *      not a charname, go process it elsewhere
3153                  *  If of form \N{U+...}, pass it through if a pattern;
3154                  *      otherwise convert to utf8
3155                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3156                  *  pattern; otherwise convert to utf8 */
3157
3158                 /* Here, s points to the 'N'; the test below is guaranteed to
3159                  * succeed if we are being called on a pattern as we already
3160                  * know from a test above that the next character is a '{'.
3161                  * On a non-pattern \N must mean 'named sequence, which
3162                  * requires braces */
3163                 s++;
3164                 if (*s != '{') {
3165                     yyerror("Missing braces on \\N{}");
3166                     continue;
3167                 }
3168                 s++;
3169
3170                 /* If there is no matching '}', it is an error. */
3171                 if (! (e = strchr(s, '}'))) {
3172                     if (! PL_lex_inpat) {
3173                         yyerror("Missing right brace on \\N{}");
3174                     } else {
3175                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3176                     }
3177                     continue;
3178                 }
3179
3180                 /* Here it looks like a named character */
3181
3182                 if (PL_lex_inpat) {
3183
3184                     /* XXX This block is temporary code.  \N{} implies that the
3185                      * pattern is to have Unicode semantics, and therefore
3186                      * currently has to be encoded in utf8.  By putting it in
3187                      * utf8 now, we save a whole pass in the regular expression
3188                      * compiler.  Once that code is changed so Unicode
3189                      * semantics doesn't necessarily have to be in utf8, this
3190                      * block should be removed.  However, the code that parses
3191                      * the output of this would have to be changed to not
3192                      * necessarily expect utf8 */
3193                     if (!has_utf8) {
3194                         SvCUR_set(sv, d - SvPVX_const(sv));
3195                         SvPOK_on(sv);
3196                         *d = '\0';
3197                         /* See Note on sizing above.  */
3198                         sv_utf8_upgrade_flags_grow(sv,
3199                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3200                                         /* 5 = '\N{' + cur char + NUL */
3201                                         (STRLEN)(send - s) + 5);
3202                         d = SvPVX(sv) + SvCUR(sv);
3203                         has_utf8 = TRUE;
3204                     }
3205                 }
3206
3207                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3208                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3209                                 | PERL_SCAN_DISALLOW_PREFIX;
3210                     STRLEN len;
3211
3212                     /* For \N{U+...}, the '...' is a unicode value even on
3213                      * EBCDIC machines */
3214                     s += 2;         /* Skip to next char after the 'U+' */
3215                     len = e - s;
3216                     uv = grok_hex(s, &len, &flags, NULL);
3217                     if (len == 0 || len != (STRLEN)(e - s)) {
3218                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3219                         s = e + 1;
3220                         continue;
3221                     }
3222
3223                     if (PL_lex_inpat) {
3224
3225                         /* On non-EBCDIC platforms, pass through to the regex
3226                          * compiler unchanged.  The reason we evaluated the
3227                          * number above is to make sure there wasn't a syntax
3228                          * error.  But on EBCDIC we convert to native so
3229                          * downstream code can continue to assume it's native
3230                          */
3231                         s -= 5;     /* Include the '\N{U+' */
3232 #ifdef EBCDIC
3233                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3234                                                                and the \0 */
3235                                     "\\N{U+%X}",
3236                                     (unsigned int) UNI_TO_NATIVE(uv));
3237 #else
3238                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3239                         d += e - s + 1;
3240 #endif
3241                     }
3242                     else {  /* Not a pattern: convert the hex to string */
3243
3244                          /* If destination is not in utf8, unconditionally
3245                           * recode it to be so.  This is because \N{} implies
3246                           * Unicode semantics, and scalars have to be in utf8
3247                           * to guarantee those semantics */
3248                         if (! has_utf8) {
3249                             SvCUR_set(sv, d - SvPVX_const(sv));
3250                             SvPOK_on(sv);
3251                             *d = '\0';
3252                             /* See Note on sizing above.  */
3253                             sv_utf8_upgrade_flags_grow(
3254                                         sv,
3255                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3256                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3257                             d = SvPVX(sv) + SvCUR(sv);
3258                             has_utf8 = TRUE;
3259                         }
3260
3261                         /* Add the string to the output */
3262                         if (UNI_IS_INVARIANT(uv)) {
3263                             *d++ = (char) uv;
3264                         }
3265                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3266                     }
3267                 }
3268                 else { /* Here is \N{NAME} but not \N{U+...}. */
3269
3270                     SV *res;            /* result from charnames */
3271                     const char *str;    /* the string in 'res' */
3272                     STRLEN len;         /* its length */
3273
3274                     /* Get the value for NAME */
3275                     res = newSVpvn(s, e - s);
3276                     res = new_constant( NULL, 0, "charnames",
3277                                         /* includes all of: \N{...} */
3278                                         res, NULL, s - 3, e - s + 4 );
3279
3280                     /* Most likely res will be in utf8 already since the
3281                      * standard charnames uses pack U, but a custom translator
3282                      * can leave it otherwise, so make sure.  XXX This can be
3283                      * revisited to not have charnames use utf8 for characters
3284                      * that don't need it when regexes don't have to be in utf8
3285                      * for Unicode semantics.  If doing so, remember EBCDIC */
3286                     sv_utf8_upgrade(res);
3287                     str = SvPV_const(res, len);
3288
3289                     /* Don't accept malformed input */
3290                     if (! is_utf8_string((U8 *) str, len)) {
3291                         yyerror("Malformed UTF-8 returned by \\N");
3292                     }
3293                     else if (PL_lex_inpat) {
3294
3295                         if (! len) { /* The name resolved to an empty string */
3296                             Copy("\\N{}", d, 4, char);
3297                             d += 4;
3298                         }
3299                         else {
3300                             /* In order to not lose information for the regex
3301                             * compiler, pass the result in the specially made
3302                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3303                             * the code points in hex of each character
3304                             * returned by charnames */
3305
3306                             const char *str_end = str + len;
3307                             STRLEN char_length;     /* cur char's byte length */
3308                             STRLEN output_length;   /* and the number of bytes
3309                                                        after this is translated
3310                                                        into hex digits */
3311                             const STRLEN off = d - SvPVX_const(sv);
3312
3313                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3314                              * max('U+', '.'); and 1 for NUL */
3315                             char hex_string[2 * UTF8_MAXBYTES + 5];
3316
3317                             /* Get the first character of the result. */
3318                             U32 uv = utf8n_to_uvuni((U8 *) str,
3319                                                     len,
3320                                                     &char_length,
3321                                                     UTF8_ALLOW_ANYUV);
3322
3323                             /* The call to is_utf8_string() above hopefully
3324                              * guarantees that there won't be an error.  But
3325                              * it's easy here to make sure.  The function just
3326                              * above warns and returns 0 if invalid utf8, but
3327                              * it can also return 0 if the input is validly a
3328                              * NUL. Disambiguate */
3329                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3330                                 uv = UNICODE_REPLACEMENT;
3331                             }
3332
3333                             /* Convert first code point to hex, including the
3334                              * boiler plate before it.  For all these, we
3335                              * convert to native format so that downstream code
3336                              * can continue to assume the input is native */
3337                             output_length =
3338                                 my_snprintf(hex_string, sizeof(hex_string),
3339                                             "\\N{U+%X",
3340                                             (unsigned int) UNI_TO_NATIVE(uv));
3341
3342                             /* Make sure there is enough space to hold it */
3343                             d = off + SvGROW(sv, off
3344                                                  + output_length
3345                                                  + (STRLEN)(send - e)
3346                                                  + 2);  /* '}' + NUL */
3347                             /* And output it */
3348                             Copy(hex_string, d, output_length, char);
3349                             d += output_length;
3350
3351                             /* For each subsequent character, append dot and
3352                              * its ordinal in hex */
3353                             while ((str += char_length) < str_end) {
3354                                 const STRLEN off = d - SvPVX_const(sv);
3355                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3356                                                         str_end - str,
3357                                                         &char_length,
3358                                                         UTF8_ALLOW_ANYUV);
3359                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3360                                     uv = UNICODE_REPLACEMENT;
3361                                 }
3362
3363                                 output_length =
3364                                     my_snprintf(hex_string, sizeof(hex_string),
3365                                             ".%X",
3366                                             (unsigned int) UNI_TO_NATIVE(uv));
3367
3368                                 d = off + SvGROW(sv, off
3369                                                      + output_length
3370                                                      + (STRLEN)(send - e)
3371                                                      + 2);      /* '}' +  NUL */
3372                                 Copy(hex_string, d, output_length, char);
3373                                 d += output_length;
3374                             }
3375
3376                             *d++ = '}'; /* Done.  Add the trailing brace */
3377                         }
3378                     }
3379                     else { /* Here, not in a pattern.  Convert the name to a
3380                             * string. */
3381
3382                          /* If destination is not in utf8, unconditionally
3383                           * recode it to be so.  This is because \N{} implies
3384                           * Unicode semantics, and scalars have to be in utf8
3385                           * to guarantee those semantics */
3386                         if (! has_utf8) {
3387                             SvCUR_set(sv, d - SvPVX_const(sv));
3388                             SvPOK_on(sv);
3389                             *d = '\0';
3390                             /* See Note on sizing above.  */
3391                             sv_utf8_upgrade_flags_grow(sv,
3392                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3393                                                 len + (STRLEN)(send - s) + 1);
3394                             d = SvPVX(sv) + SvCUR(sv);
3395                             has_utf8 = TRUE;
3396                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3397
3398                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3399                              * set correctly here). */
3400                             const STRLEN off = d - SvPVX_const(sv);
3401                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3402                         }
3403                         Copy(str, d, len, char);
3404                         d += len;
3405                     }
3406                     SvREFCNT_dec(res);
3407
3408                     /* Deprecate non-approved name syntax */
3409                     if (ckWARN_d(WARN_DEPRECATED)) {
3410                         bool problematic = FALSE;
3411                         char* i = s;
3412
3413                         /* For non-ut8 input, look to see that the first
3414                          * character is an alpha, then loop through the rest
3415                          * checking that each is a continuation */
3416                         if (! this_utf8) {
3417                             if (! isALPHAU(*i)) problematic = TRUE;
3418                             else for (i = s + 1; i < e; i++) {
3419                                 if (isCHARNAME_CONT(*i)) continue;
3420                                 problematic = TRUE;
3421                                 break;
3422                             }
3423                         }
3424                         else {
3425                             /* Similarly for utf8.  For invariants can check
3426                              * directly.  We accept anything above the latin1
3427                              * range because it is immaterial to Perl if it is
3428                              * correct or not, and is expensive to check.  But
3429                              * it is fairly easy in the latin1 range to convert
3430                              * the variants into a single character and check
3431                              * those */
3432                             if (UTF8_IS_INVARIANT(*i)) {
3433                                 if (! isALPHAU(*i)) problematic = TRUE;
3434                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3435                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3436                                                                             *(i+1)))))
3437                                 {
3438                                     problematic = TRUE;
3439                                 }
3440                             }
3441                             if (! problematic) for (i = s + UTF8SKIP(s);
3442                                                     i < e;
3443                                                     i+= UTF8SKIP(i))
3444                             {
3445                                 if (UTF8_IS_INVARIANT(*i)) {
3446                                     if (isCHARNAME_CONT(*i)) continue;
3447                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3448                                     continue;
3449                                 } else if (isCHARNAME_CONT(
3450                                             UNI_TO_NATIVE(
3451                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3452                                 {
3453                                     continue;
3454                                 }
3455                                 problematic = TRUE;
3456                                 break;
3457                             }
3458                         }
3459                         if (problematic) {
3460                             /* The e-i passed to the final %.*s makes sure that
3461                              * should the trailing NUL be missing that this
3462                              * print won't run off the end of the string */
3463                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3464                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3465                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3466                         }
3467                     }
3468                 } /* End \N{NAME} */
3469 #ifdef EBCDIC
3470                 if (!dorange)
3471                     native_range = FALSE; /* \N{} is defined to be Unicode */
3472 #endif
3473                 s = e + 1;  /* Point to just after the '}' */
3474                 continue;
3475
3476             /* \c is a control character */
3477             case 'c':
3478                 s++;
3479                 if (s < send) {
3480                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3481                 }
3482                 else {
3483                     yyerror("Missing control char name in \\c");
3484                 }
3485                 continue;
3486
3487             /* printf-style backslashes, formfeeds, newlines, etc */
3488             case 'b':
3489                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3490                 break;
3491             case 'n':
3492                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3493                 break;
3494             case 'r':
3495                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3496                 break;
3497             case 'f':
3498                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3499                 break;
3500             case 't':
3501                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3502                 break;
3503             case 'e':
3504                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3505                 break;
3506             case 'a':
3507                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3508                 break;
3509             } /* end switch */
3510
3511             s++;
3512             continue;
3513         } /* end if (backslash) */
3514 #ifdef EBCDIC
3515         else
3516             literal_endpoint++;
3517 #endif
3518
3519     default_action:
3520         /* If we started with encoded form, or already know we want it,
3521            then encode the next character */
3522         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3523             STRLEN len  = 1;
3524
3525
3526             /* One might think that it is wasted effort in the case of the
3527              * source being utf8 (this_utf8 == TRUE) to take the next character
3528              * in the source, convert it to an unsigned value, and then convert
3529              * it back again.  But the source has not been validated here.  The
3530              * routine that does the conversion checks for errors like
3531              * malformed utf8 */
3532
3533             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3534             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3535             if (!has_utf8) {
3536                 SvCUR_set(sv, d - SvPVX_const(sv));
3537                 SvPOK_on(sv);
3538                 *d = '\0';
3539                 /* See Note on sizing above.  */
3540                 sv_utf8_upgrade_flags_grow(sv,
3541                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3542                                         need + (STRLEN)(send - s) + 1);
3543                 d = SvPVX(sv) + SvCUR(sv);
3544                 has_utf8 = TRUE;
3545             } else if (need > len) {
3546                 /* encoded value larger than old, may need extra space (NOTE:
3547                  * SvCUR() is not set correctly here).   See Note on sizing
3548                  * above.  */
3549                 const STRLEN off = d - SvPVX_const(sv);
3550                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3551             }
3552             s += len;
3553
3554             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3555 #ifdef EBCDIC
3556             if (uv > 255 && !dorange)
3557                 native_range = FALSE;
3558 #endif
3559         }
3560         else {
3561             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3562         }
3563     } /* while loop to process each character */
3564
3565     /* terminate the string and set up the sv */
3566     *d = '\0';
3567     SvCUR_set(sv, d - SvPVX_const(sv));
3568     if (SvCUR(sv) >= SvLEN(sv))
3569         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3570                    " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3571
3572     SvPOK_on(sv);
3573     if (PL_encoding && !has_utf8) {
3574         sv_recode_to_utf8(sv, PL_encoding);
3575         if (SvUTF8(sv))
3576             has_utf8 = TRUE;
3577     }
3578     if (has_utf8) {
3579         SvUTF8_on(sv);
3580         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3581             PL_sublex_info.sub_op->op_private |=
3582                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3583         }
3584     }
3585
3586     /* shrink the sv if we allocated more than we used */
3587     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3588         SvPV_shrink_to_cur(sv);
3589     }
3590
3591     /* return the substring (via pl_yylval) only if we parsed anything */
3592     if (s > PL_bufptr) {
3593         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3594             const char *const key = PL_lex_inpat ? "qr" : "q";
3595             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3596             const char *type;
3597             STRLEN typelen;
3598
3599             if (PL_lex_inwhat == OP_TRANS) {
3600                 type = "tr";
3601                 typelen = 2;
3602             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3603                 type = "s";
3604                 typelen = 1;
3605             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3606                 type = "q";
3607                 typelen = 1;
3608             } else  {
3609                 type = "qq";
3610                 typelen = 2;
3611             }
3612
3613             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3614                                 type, typelen);
3615         }
3616         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3617     } else
3618         SvREFCNT_dec(sv);
3619     return s;
3620 }
3621
3622 /* S_intuit_more
3623  * Returns TRUE if there's more to the expression (e.g., a subscript),
3624  * FALSE otherwise.
3625  *
3626  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3627  *
3628  * ->[ and ->{ return TRUE
3629  * { and [ outside a pattern are always subscripts, so return TRUE
3630  * if we're outside a pattern and it's not { or [, then return FALSE
3631  * if we're in a pattern and the first char is a {
3632  *   {4,5} (any digits around the comma) returns FALSE
3633  * if we're in a pattern and the first char is a [
3634  *   [] returns FALSE
3635  *   [SOMETHING] has a funky algorithm to decide whether it's a
3636  *      character class or not.  It has to deal with things like
3637  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3638  * anything else returns TRUE
3639  */
3640
3641 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3642
3643 STATIC int
3644 S_intuit_more(pTHX_ register char *s)
3645 {
3646     dVAR;
3647
3648     PERL_ARGS_ASSERT_INTUIT_MORE;
3649
3650     if (PL_lex_brackets)
3651         return TRUE;
3652     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3653         return TRUE;
3654     if (*s != '{' && *s != '[')
3655         return FALSE;
3656     if (!PL_lex_inpat)
3657         return TRUE;
3658
3659     /* In a pattern, so maybe we have {n,m}. */
3660     if (*s == '{') {
3661         if (regcurly(s)) {
3662             return FALSE;
3663         }
3664         return TRUE;
3665     }
3666
3667     /* On the other hand, maybe we have a character class */
3668
3669     s++;
3670     if (*s == ']' || *s == '^')
3671         return FALSE;
3672     else {
3673         /* this is terrifying, and it works */
3674         int weight = 2;         /* let's weigh the evidence */
3675         char seen[256];
3676         unsigned char un_char = 255, last_un_char;
3677         const char * const send = strchr(s,']');
3678         char tmpbuf[sizeof PL_tokenbuf * 4];
3679
3680         if (!send)              /* has to be an expression */
3681             return TRUE;
3682
3683         Zero(seen,256,char);
3684         if (*s == '$')
3685             weight -= 3;
3686         else if (isDIGIT(*s)) {
3687             if (s[1] != ']') {
3688                 if (isDIGIT(s[1]) && s[2] == ']')
3689                     weight -= 10;
3690             }
3691             else
3692                 weight -= 100;
3693         }
3694         for (; s < send; s++) {
3695             last_un_char = un_char;
3696             un_char = (unsigned char)*s;
3697             switch (*s) {
3698             case '@':
3699             case '&':
3700             case '$':
3701                 weight -= seen[un_char] * 10;
3702                 if (isALNUM_lazy_if(s+1,UTF)) {
3703                     int len;
3704                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3705                     len = (int)strlen(tmpbuf);
3706                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3707                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3708                         weight -= 100;
3709                     else
3710                         weight -= 10;
3711                 }
3712                 else if (*s == '$' && s[1] &&
3713                   strchr("[#!%*<>()-=",s[1])) {
3714                     if (/*{*/ strchr("])} =",s[2]))
3715                         weight -= 10;
3716                     else
3717                         weight -= 1;
3718                 }
3719                 break;
3720             case '\\':
3721                 un_char = 254;
3722                 if (s[1]) {
3723                     if (strchr("wds]",s[1]))
3724                         weight += 100;
3725                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3726                         weight += 1;
3727                     else if (strchr("rnftbxcav",s[1]))
3728                         weight += 40;
3729                     else if (isDIGIT(s[1])) {
3730                         weight += 40;
3731                         while (s[1] && isDIGIT(s[1]))
3732                             s++;
3733                     }
3734                 }
3735                 else
3736                     weight += 100;
3737                 break;
3738             case '-':
3739                 if (s[1] == '\\')
3740                     weight += 50;
3741                 if (strchr("aA01! ",last_un_char))
3742                     weight += 30;
3743                 if (strchr("zZ79~",s[1]))
3744                     weight += 30;
3745                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3746                     weight -= 5;        /* cope with negative subscript */
3747                 break;
3748             default:
3749                 if (!isALNUM(last_un_char)
3750                     && !(last_un_char == '$' || last_un_char == '@'
3751                          || last_un_char == '&')
3752                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3753                     char *d = tmpbuf;
3754                     while (isALPHA(*s))
3755                         *d++ = *s++;
3756                     *d = '\0';
3757                     if (keyword(tmpbuf, d - tmpbuf, 0))
3758                         weight -= 150;
3759                 }
3760                 if (un_char == last_un_char + 1)
3761                     weight += 5;
3762                 weight -= seen[un_char];
3763                 break;
3764             }
3765             seen[un_char]++;
3766         }
3767         if (weight >= 0)        /* probably a character class */
3768             return FALSE;
3769     }
3770
3771     return TRUE;
3772 }
3773
3774 /*
3775  * S_intuit_method
3776  *
3777  * Does all the checking to disambiguate
3778  *   foo bar
3779  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3780  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3781  *
3782  * First argument is the stuff after the first token, e.g. "bar".
3783  *
3784  * Not a method if foo is a filehandle.
3785  * Not a method if foo is a subroutine prototyped to take a filehandle.
3786  * Not a method if it's really "Foo $bar"
3787  * Method if it's "foo $bar"
3788  * Not a method if it's really "print foo $bar"
3789  * Method if it's really "foo package::" (interpreted as package->foo)
3790  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3791  * Not a method if bar is a filehandle or package, but is quoted with
3792  *   =>
3793  */
3794
3795 STATIC int
3796 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3797 {
3798     dVAR;
3799     char *s = start + (*start == '$');
3800     char tmpbuf[sizeof PL_tokenbuf];
3801     STRLEN len;
3802     GV* indirgv;
3803 #ifdef PERL_MAD
3804     int soff;
3805 #endif
3806
3807     PERL_ARGS_ASSERT_INTUIT_METHOD;
3808
3809     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3810             return 0;
3811     if (cv && SvPOK(cv)) {
3812                 const char *proto = CvPROTO(cv);
3813                 if (proto) {
3814                     if (*proto == ';')
3815                         proto++;
3816                     if (*proto == '*')
3817                         return 0;
3818                 }
3819     }
3820     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3821     /* start is the beginning of the possible filehandle/object,
3822      * and s is the end of it
3823      * tmpbuf is a copy of it
3824      */
3825
3826     if (*start == '$') {
3827         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3828                 isUPPER(*PL_tokenbuf))
3829             return 0;
3830 #ifdef PERL_MAD
3831         len = start - SvPVX(PL_linestr);
3832 #endif
3833         s = PEEKSPACE(s);
3834 #ifdef PERL_MAD
3835         start = SvPVX(PL_linestr) + len;
3836 #endif
3837         PL_bufptr = start;
3838         PL_expect = XREF;
3839         return *s == '(' ? FUNCMETH : METHOD;
3840     }
3841     if (!keyword(tmpbuf, len, 0)) {
3842         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3843             len -= 2;
3844             tmpbuf[len] = '\0';
3845 #ifdef PERL_MAD
3846             soff = s - SvPVX(PL_linestr);
3847 #endif
3848             goto bare_package;
3849         }
3850         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3851         if (indirgv && GvCVu(indirgv))
3852             return 0;
3853         /* filehandle or package name makes it a method */
3854         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3855 #ifdef PERL_MAD
3856             soff = s - SvPVX(PL_linestr);
3857 #endif
3858             s = PEEKSPACE(s);
3859             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3860                 return 0;       /* no assumptions -- "=>" quotes bareword */
3861       bare_package:
3862             start_force(PL_curforce);
3863             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3864                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3865             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3866             if (PL_madskills)
3867                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3868                                                             ( UTF ? SVf_UTF8 : 0 )));
3869             PL_expect = XTERM;
3870             force_next(WORD);
3871             PL_bufptr = s;
3872 #ifdef PERL_MAD
3873             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3874 #endif
3875             return *s == '(' ? FUNCMETH : METHOD;
3876         }
3877     }
3878     return 0;
3879 }
3880
3881 /* Encoded script support. filter_add() effectively inserts a
3882  * 'pre-processing' function into the current source input stream.
3883  * Note that the filter function only applies to the current source file
3884  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3885  *
3886  * The datasv parameter (which may be NULL) can be used to pass
3887  * private data to this instance of the filter. The filter function
3888  * can recover the SV using the FILTER_DATA macro and use it to
3889  * store private buffers and state information.
3890  *
3891  * The supplied datasv parameter is upgraded to a PVIO type
3892  * and the IoDIRP/IoANY field is used to store the function pointer,
3893  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3894  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3895  * private use must be set using malloc'd pointers.
3896  */
3897
3898 SV *
3899 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3900 {
3901     dVAR;
3902     if (!funcp)
3903         return NULL;
3904
3905     if (!PL_parser)
3906         return NULL;
3907
3908     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3909         Perl_croak(aTHX_ "Source filters apply only to byte streams");
3910
3911     if (!PL_rsfp_filters)
3912         PL_rsfp_filters = newAV();
3913     if (!datasv)
3914         datasv = newSV(0);
3915     SvUPGRADE(datasv, SVt_PVIO);
3916     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3917     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3918     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3919                           FPTR2DPTR(void *, IoANY(datasv)),
3920                           SvPV_nolen(datasv)));
3921     av_unshift(PL_rsfp_filters, 1);
3922     av_store(PL_rsfp_filters, 0, datasv) ;
3923     if (
3924         !PL_parser->filtered
3925      && PL_parser->lex_flags & LEX_EVALBYTES
3926      && PL_bufptr < PL_bufend
3927     ) {
3928         const char *s = PL_bufptr;
3929         while (s < PL_bufend) {
3930             if (*s == '\n') {
3931                 SV *linestr = PL_parser->linestr;
3932                 char *buf = SvPVX(linestr);
3933                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3934                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3935                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3936                 STRLEN const linestart_pos = PL_parser->linestart - buf;
3937                 STRLEN const last_uni_pos =
3938                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3939                 STRLEN const last_lop_pos =
3940                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3941                 av_push(PL_rsfp_filters, linestr);
3942                 PL_parser->linestr =
3943                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3944                 buf = SvPVX(PL_parser->linestr);
3945                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3946                 PL_parser->bufptr = buf + bufptr_pos;
3947                 PL_parser->oldbufptr = buf + oldbufptr_pos;
3948                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3949                 PL_parser->linestart = buf + linestart_pos;
3950                 if (PL_parser->last_uni)
3951                     PL_parser->last_uni = buf + last_uni_pos;
3952                 if (PL_parser->last_lop)
3953                     PL_parser->last_lop = buf + last_lop_pos;
3954                 SvLEN(linestr) = SvCUR(linestr);
3955                 SvCUR(linestr) = s-SvPVX(linestr);
3956                 PL_parser->filtered = 1;
3957                 break;
3958             }
3959             s++;
3960         }
3961     }
3962     return(datasv);
3963 }
3964
3965
3966 /* Delete most recently added instance of this filter function. */
3967 void
3968 Perl_filter_del(pTHX_ filter_t funcp)
3969 {
3970     dVAR;
3971     SV *datasv;
3972
3973     PERL_ARGS_ASSERT_FILTER_DEL;
3974
3975 #ifdef DEBUGGING
3976     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3977                           FPTR2DPTR(void*, funcp)));
3978 #endif
3979     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3980         return;
3981     /* if filter is on top of stack (usual case) just pop it off */
3982     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3983     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3984         sv_free(av_pop(PL_rsfp_filters));
3985
3986         return;
3987     }
3988     /* we need to search for the correct entry and clear it     */
3989     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3990 }
3991
3992
3993 /* Invoke the idxth filter function for the current rsfp.        */
3994 /* maxlen 0 = read one text line */
3995 I32
3996 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3997 {
3998     dVAR;
3999     filter_t funcp;
4000     SV *datasv = NULL;
4001     /* This API is bad. It should have been using unsigned int for maxlen.
4002        Not sure if we want to change the API, but if not we should sanity
4003        check the value here.  */
4004     unsigned int correct_length
4005         = maxlen < 0 ?
4006 #ifdef PERL_MICRO
4007         0x7FFFFFFF
4008 #else
4009         INT_MAX
4010 #endif
4011         : maxlen;
4012
4013     PERL_ARGS_ASSERT_FILTER_READ;
4014
4015     if (!PL_parser || !PL_rsfp_filters)
4016         return -1;
4017     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4018         /* Provide a default input filter to make life easy.    */
4019         /* Note that we append to the line. This is handy.      */
4020         DEBUG_P(PerlIO_printf(Perl_debug_log,
4021                               "filter_read %d: from rsfp\n", idx));
4022         if (correct_length) {
4023             /* Want a block */
4024             int len ;
4025             const int old_len = SvCUR(buf_sv);
4026
4027             /* ensure buf_sv is large enough */
4028             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4029             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4030                                    correct_length)) <= 0) {
4031                 if (PerlIO_error(PL_rsfp))
4032                     return -1;          /* error */
4033                 else
4034                     return 0 ;          /* end of file */
4035             }
4036             SvCUR_set(buf_sv, old_len + len) ;
4037             SvPVX(buf_sv)[old_len + len] = '\0';
4038         } else {
4039             /* Want a line */
4040             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4041                 if (PerlIO_error(PL_rsfp))
4042                     return -1;          /* error */
4043                 else
4044                     return 0 ;          /* end of file */
4045             }
4046         }
4047         return SvCUR(buf_sv);
4048     }
4049     /* Skip this filter slot if filter has been deleted */
4050     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4051         DEBUG_P(PerlIO_printf(Perl_debug_log,
4052                               "filter_read %d: skipped (filter deleted)\n",
4053                               idx));
4054         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4055     }
4056     if (SvTYPE(datasv) != SVt_PVIO) {
4057         if (correct_length) {
4058             /* Want a block */
4059             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4060             if (!remainder) return 0; /* eof */
4061             if (correct_length > remainder) correct_length = remainder;
4062             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4063             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4064         } else {
4065             /* Want a line */
4066             const char *s = SvEND(datasv);
4067             const char *send = SvPVX(datasv) + SvLEN(datasv);
4068             while (s < send) {
4069                 if (*s == '\n') {
4070                     s++;
4071                     break;
4072                 }
4073                 s++;
4074             }
4075             if (s == send) return 0; /* eof */
4076             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4077             SvCUR_set(datasv, s-SvPVX(datasv));
4078         }
4079         return SvCUR(buf_sv);
4080     }
4081     /* Get function pointer hidden within datasv        */
4082     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4083     DEBUG_P(PerlIO_printf(Perl_debug_log,
4084                           "filter_read %d: via function %p (%s)\n",
4085                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4086     /* Call function. The function is expected to       */
4087     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4088     /* Return: <0:error, =0:eof, >0:not eof             */
4089     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4090 }
4091
4092 STATIC char *
4093 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
4094 {
4095     dVAR;
4096
4097     PERL_ARGS_ASSERT_FILTER_GETS;
4098
4099 #ifdef PERL_CR_FILTER
4100     if (!PL_rsfp_filters) {
4101         filter_add(S_cr_textfilter,NULL);
4102     }
4103 #endif
4104     if (PL_rsfp_filters) {
4105         if (!append)
4106             SvCUR_set(sv, 0);   /* start with empty line        */
4107         if (FILTER_READ(0, sv, 0) > 0)
4108             return ( SvPVX(sv) ) ;
4109         else
4110             return NULL ;
4111     }
4112     else
4113         return (sv_gets(sv, PL_rsfp, append));
4114 }
4115
4116 STATIC HV *
4117 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4118 {
4119     dVAR;
4120     GV *gv;
4121
4122     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4123
4124     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4125         return PL_curstash;
4126
4127     if (len > 2 &&
4128         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4129         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4130     {
4131         return GvHV(gv);                        /* Foo:: */
4132     }
4133
4134     /* use constant CLASS => 'MyClass' */
4135     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4136     if (gv && GvCV(gv)) {
4137         SV * const sv = cv_const_sv(GvCV(gv));
4138         if (sv)
4139             pkgname = SvPV_const(sv, len);
4140     }
4141
4142     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4143 }
4144
4145 /*
4146  * S_readpipe_override
4147  * Check whether readpipe() is overridden, and generates the appropriate
4148  * optree, provided sublex_start() is called afterwards.
4149  */
4150 STATIC void
4151 S_readpipe_override(pTHX)
4152 {
4153     GV **gvp;
4154     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4155     pl_yylval.ival = OP_BACKTICK;
4156     if ((gv_readpipe
4157                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4158             ||
4159             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4160              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4161              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4162     {
4163         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4164             op_append_elem(OP_LIST,
4165                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4166                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4167     }
4168 }
4169
4170 #ifdef PERL_MAD
4171  /*
4172  * Perl_madlex
4173  * The intent of this yylex wrapper is to minimize the changes to the
4174  * tokener when we aren't interested in collecting madprops.  It remains
4175  * to be seen how successful this strategy will be...
4176  */
4177
4178 int
4179 Perl_madlex(pTHX)
4180 {
4181     int optype;
4182     char *s = PL_bufptr;
4183
4184     /* make sure PL_thiswhite is initialized */
4185     PL_thiswhite = 0;
4186     PL_thismad = 0;
4187
4188     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4189     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4190         return S_pending_ident(aTHX);
4191
4192     /* previous token ate up our whitespace? */
4193     if (!PL_lasttoke && PL_nextwhite) {
4194         PL_thiswhite = PL_nextwhite;
4195         PL_nextwhite = 0;
4196     }
4197
4198     /* isolate the token, and figure out where it is without whitespace */
4199     PL_realtokenstart = -1;
4200     PL_thistoken = 0;
4201     optype = yylex();
4202     s = PL_bufptr;
4203     assert(PL_curforce < 0);
4204
4205     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4206         if (!PL_thistoken) {
4207             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4208                 PL_thistoken = newSVpvs("");
4209             else {
4210                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4211                 PL_thistoken = newSVpvn(tstart, s - tstart);
4212             }
4213         }
4214         if (PL_thismad) /* install head */
4215             CURMAD('X', PL_thistoken);
4216     }
4217
4218     /* last whitespace of a sublex? */
4219     if (optype == ')' && PL_endwhite) {
4220         CURMAD('X', PL_endwhite);
4221     }
4222
4223     if (!PL_thismad) {
4224
4225         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4226         if (!PL_thiswhite && !PL_endwhite && !optype) {
4227             sv_free(PL_thistoken);
4228             PL_thistoken = 0;
4229             return 0;
4230         }
4231
4232         /* put off final whitespace till peg */
4233         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4234             PL_nextwhite = PL_thiswhite;
4235             PL_thiswhite = 0;
4236         }
4237         else if (PL_thisopen) {
4238             CURMAD('q', PL_thisopen);
4239             if (PL_thistoken)
4240                 sv_free(PL_thistoken);
4241             PL_thistoken = 0;
4242         }
4243         else {
4244             /* Store actual token text as madprop X */
4245             CURMAD('X', PL_thistoken);
4246         }
4247
4248         if (PL_thiswhite) {
4249             /* add preceding whitespace as madprop _ */
4250             CURMAD('_', PL_thiswhite);
4251         }
4252
4253         if (PL_thisstuff) {
4254             /* add quoted material as madprop = */
4255             CURMAD('=', PL_thisstuff);
4256         }
4257
4258         if (PL_thisclose) {
4259             /* add terminating quote as madprop Q */
4260             CURMAD('Q', PL_thisclose);
4261         }
4262     }
4263
4264     /* special processing based on optype */
4265
4266     switch (optype) {
4267
4268     /* opval doesn't need a TOKEN since it can already store mp */
4269     case WORD:
4270     case METHOD:
4271     case FUNCMETH:
4272     case THING:
4273     case PMFUNC:
4274     case PRIVATEREF:
4275     case FUNC0SUB:
4276     case UNIOPSUB:
4277     case LSTOPSUB:
4278     case LABEL:
4279         if (pl_yylval.opval)
4280             append_madprops(PL_thismad, pl_yylval.opval, 0);
4281         PL_thismad = 0;
4282         return optype;
4283
4284     /* fake EOF */
4285     case 0:
4286         optype = PEG;
4287         if (PL_endwhite) {
4288             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4289             PL_endwhite = 0;
4290         }
4291         break;
4292
4293     case ']':
4294     case '}':
4295         if (PL_faketokens)
4296             break;
4297         /* remember any fake bracket that lexer is about to discard */
4298         if (PL_lex_brackets == 1 &&
4299             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4300         {
4301             s = PL_bufptr;
4302             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4303                 s++;
4304             if (*s == '}') {
4305                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4306                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4307                 PL_thiswhite = 0;
4308                 PL_bufptr = s - 1;
4309                 break;  /* don't bother looking for trailing comment */
4310             }
4311             else
4312                 s = PL_bufptr;
4313         }
4314         if (optype == ']')
4315             break;
4316         /* FALLTHROUGH */
4317
4318     /* attach a trailing comment to its statement instead of next token */
4319     case ';':
4320         if (PL_faketokens)
4321             break;
4322         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4323             s = PL_bufptr;
4324             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4325                 s++;
4326             if (*s == '\n' || *s == '#') {
4327                 while (s < PL_bufend && *s != '\n')
4328                     s++;
4329                 if (s < PL_bufend)
4330                     s++;
4331                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4332                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4333                 PL_thiswhite = 0;
4334                 PL_bufptr = s;
4335             }
4336         }
4337         break;
4338
4339     /* ival */
4340     default:
4341         break;
4342
4343     }
4344
4345     /* Create new token struct.  Note: opvals return early above. */
4346     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4347     PL_thismad = 0;
4348     return optype;
4349 }
4350 #endif
4351
4352 STATIC char *
4353 S_tokenize_use(pTHX_ int is_use, char *s) {
4354     dVAR;
4355
4356     PERL_ARGS_ASSERT_TOKENIZE_USE;
4357
4358     if (PL_expect != XSTATE)
4359         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4360                     is_use ? "use" : "no"));
4361     PL_expect = XTERM;
4362     s = SKIPSPACE1(s);
4363     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4364         s = force_version(s, TRUE);
4365         if (*s == ';' || *s == '}'
4366                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4367             start_force(PL_curforce);
4368             NEXTVAL_NEXTTOKE.opval = NULL;
4369             force_next(WORD);
4370         }
4371         else if (*s == 'v') {
4372             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4373             s = force_version(s, FALSE);
4374         }
4375     }
4376     else {
4377         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4378         s = force_version(s, FALSE);
4379     }
4380     pl_yylval.ival = is_use;
4381     return s;
4382 }
4383 #ifdef DEBUGGING
4384     static const char* const exp_name[] =
4385         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4386           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4387         };
4388 #endif
4389
4390 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4391 STATIC bool
4392 S_word_takes_any_delimeter(char *p, STRLEN len)
4393 {
4394     return (len == 1 && strchr("msyq", p[0])) ||
4395            (len == 2 && (
4396             (p[0] == 't' && p[1] == 'r') ||
4397             (p[0] == 'q' && strchr("qwxr", p[1]))));
4398 }
4399
4400 /*
4401   yylex
4402
4403   Works out what to call the token just pulled out of the input
4404   stream.  The yacc parser takes care of taking the ops we return and
4405   stitching them into a tree.
4406
4407   Returns:
4408     PRIVATEREF
4409
4410   Structure:
4411       if read an identifier
4412           if we're in a my declaration
4413               croak if they tried to say my($foo::bar)
4414               build the ops for a my() declaration
4415           if it's an access to a my() variable
4416               are we in a sort block?
4417                   croak if my($a); $a <=> $b
4418               build ops for access to a my() variable
4419           if in a dq string, and they've said @foo and we can't find @foo
4420               croak
4421           build ops for a bareword
4422       if we already built the token before, use it.
4423 */
4424
4425
4426 #ifdef __SC__
4427 #pragma segment Perl_yylex
4428 #endif
4429 int
4430 Perl_yylex(pTHX)
4431 {
4432     dVAR;
4433     char *s = PL_bufptr;
4434     char *d;
4435     STRLEN len;
4436     bool bof = FALSE;
4437     U8 formbrack = 0;
4438     U32 fake_eof = 0;
4439
4440     /* orig_keyword, gvp, and gv are initialized here because
4441      * jump to the label just_a_word_zero can bypass their
4442      * initialization later. */
4443     I32 orig_keyword = 0;
4444     GV *gv = NULL;
4445     GV **gvp = NULL;
4446
4447     DEBUG_T( {
4448         SV* tmp = newSVpvs("");
4449         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4450             (IV)CopLINE(PL_curcop),
4451             lex_state_names[PL_lex_state],
4452             exp_name[PL_expect],
4453             pv_display(tmp, s, strlen(s), 0, 60));
4454         SvREFCNT_dec(tmp);
4455     } );
4456     /* check if there's an identifier for us to look at */
4457     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4458         return REPORT(S_pending_ident(aTHX));
4459
4460     /* no identifier pending identification */
4461
4462     switch (PL_lex_state) {
4463 #ifdef COMMENTARY
4464     case LEX_NORMAL:            /* Some compilers will produce faster */
4465     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4466         break;
4467 #endif
4468
4469     /* when we've already built the next token, just pull it out of the queue */
4470     case LEX_KNOWNEXT:
4471 #ifdef PERL_MAD
4472         PL_lasttoke--;
4473         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4474         if (PL_madskills) {
4475             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4476             PL_nexttoke[PL_lasttoke].next_mad = 0;
4477             if (PL_thismad && PL_thismad->mad_key == '_') {
4478                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4479                 PL_thismad->mad_val = 0;
4480                 mad_free(PL_thismad);
4481                 PL_thismad = 0;
4482             }
4483         }
4484         if (!PL_lasttoke) {
4485             PL_lex_state = PL_lex_defer;
4486             PL_expect = PL_lex_expect;
4487             PL_lex_defer = LEX_NORMAL;
4488             if (!PL_nexttoke[PL_lasttoke].next_type)
4489                 return yylex();
4490         }
4491 #else
4492         PL_nexttoke--;
4493         pl_yylval = PL_nextval[PL_nexttoke];
4494         if (!PL_nexttoke) {
4495             PL_lex_state = PL_lex_defer;
4496             PL_expect = PL_lex_expect;
4497             PL_lex_defer = LEX_NORMAL;
4498         }
4499 #endif
4500         {
4501             I32 next_type;
4502 #ifdef PERL_MAD
4503             next_type = PL_nexttoke[PL_lasttoke].next_type;
4504 #else
4505             next_type = PL_nexttype[PL_nexttoke];
4506 #endif
4507             if (next_type & (7<<24)) {
4508                 if (next_type & (1<<24)) {
4509                     if (PL_lex_brackets > 100)
4510                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4511                     PL_lex_brackstack[PL_lex_brackets++] =
4512                         (char) ((next_type >> 16) & 0xff);
4513                 }
4514                 if (next_type & (2<<24))
4515                     PL_lex_allbrackets++;
4516                 if (next_type & (4<<24))
4517                     PL_lex_allbrackets--;
4518                 next_type &= 0xffff;
4519             }
4520             if (S_is_opval_token(next_type) && pl_yylval.opval)
4521                 pl_yylval.opval->op_savefree = 0; /* release */
4522             return REPORT(next_type);
4523         }
4524
4525     /* interpolated case modifiers like \L \U, including \Q and \E.
4526        when we get here, PL_bufptr is at the \
4527     */
4528     case LEX_INTERPCASEMOD:
4529 #ifdef DEBUGGING
4530         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4531             Perl_croak(aTHX_
4532                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4533                        PL_bufptr, PL_bufend, *PL_bufptr);
4534 #endif
4535         /* handle \E or end of string */
4536         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4537             /* if at a \E */
4538             if (PL_lex_casemods) {
4539                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4540                 PL_lex_casestack[PL_lex_casemods] = '\0';
4541
4542                 if (PL_bufptr != PL_bufend
4543                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4544                         || oldmod == 'F')) {
4545                     PL_bufptr += 2;
4546                     PL_lex_state = LEX_INTERPCONCAT;
4547 #ifdef PERL_MAD
4548                     if (PL_madskills)
4549                         PL_thistoken = newSVpvs("\\E");
4550 #endif
4551                 }
4552                 PL_lex_allbrackets--;
4553                 return REPORT(')');
4554             }
4555             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4556                /* Got an unpaired \E */
4557                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4558                         "Useless use of \\E");
4559             }
4560 #ifdef PERL_MAD
4561             while (PL_bufptr != PL_bufend &&
4562               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4563                 if (!PL_thiswhite)
4564                     PL_thiswhite = newSVpvs("");
4565                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4566                 PL_bufptr += 2;
4567             }
4568 #else
4569             if (PL_bufptr != PL_bufend)
4570                 PL_bufptr += 2;
4571 #endif
4572             PL_lex_state = LEX_INTERPCONCAT;
4573             return yylex();
4574         }
4575         else {
4576             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4577               "### Saw case modifier\n"); });
4578             s = PL_bufptr + 1;
4579             if (s[1] == '\\' && s[2] == 'E') {
4580 #ifdef PERL_MAD
4581                 if (!PL_thiswhite)
4582                     PL_thiswhite = newSVpvs("");
4583                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4584 #endif
4585                 PL_bufptr = s + 3;
4586                 PL_lex_state = LEX_INTERPCONCAT;
4587                 return yylex();
4588             }
4589             else {
4590                 I32 tmp;
4591                 if (!PL_madskills) /* when just compiling don't need correct */
4592                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4593                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4594                 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4595                     (strchr(PL_lex_casestack, 'L')
4596                         || strchr(PL_lex_casestack, 'U')
4597                         || strchr(PL_lex_casestack, 'F'))) {
4598                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4599                     PL_lex_allbrackets--;
4600                     return REPORT(')');
4601                 }
4602                 if (PL_lex_casemods > 10)
4603                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4604                 PL_lex_casestack[PL_lex_casemods++] = *s;
4605                 PL_lex_casestack[PL_lex_casemods] = '\0';
4606                 PL_lex_state = LEX_INTERPCONCAT;
4607                 start_force(PL_curforce);
4608                 NEXTVAL_NEXTTOKE.ival = 0;
4609                 force_next((2<<24)|'(');
4610                 start_force(PL_curforce);
4611                 if (*s == 'l')
4612                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4613                 else if (*s == 'u')
4614                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4615                 else if (*s == 'L')
4616                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4617                 else if (*s == 'U')
4618                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4619                 else if (*s == 'Q')
4620                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4621                 else if (*s == 'F')
4622                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4623                 else
4624                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4625                 if (PL_madskills) {
4626                     SV* const tmpsv = newSVpvs("\\ ");
4627                     /* replace the space with the character we want to escape
4628                      */
4629                     SvPVX(tmpsv)[1] = *s;
4630                     curmad('_', tmpsv);
4631                 }
4632                 PL_bufptr = s + 1;
4633             }
4634             force_next(FUNC);
4635             if (PL_lex_starts) {
4636                 s = PL_bufptr;
4637                 PL_lex_starts = 0;
4638 #ifdef PERL_MAD
4639                 if (PL_madskills) {
4640                     if (PL_thistoken)
4641                         sv_free(PL_thistoken);
4642                     PL_thistoken = newSVpvs("");
4643                 }
4644 #endif
4645                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4646                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4647                     OPERATOR(',');
4648                 else
4649                     Aop(OP_CONCAT);
4650             }
4651             else
4652                 return yylex();
4653         }
4654
4655     case LEX_INTERPPUSH:
4656         return REPORT(sublex_push());
4657
4658     case LEX_INTERPSTART:
4659         if (PL_bufptr == PL_bufend)
4660             return REPORT(sublex_done());
4661         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4662               "### Interpolated variable\n"); });
4663         PL_expect = XTERM;
4664         PL_lex_dojoin = (*PL_bufptr == '@');
4665         PL_lex_state = LEX_INTERPNORMAL;
4666         if (PL_lex_dojoin) {
4667             start_force(PL_curforce);
4668             NEXTVAL_NEXTTOKE.ival = 0;
4669             force_next(',');
4670             start_force(PL_curforce);
4671             force_ident("\"", '$');
4672             start_force(PL_curforce);
4673             NEXTVAL_NEXTTOKE.ival = 0;
4674             force_next('$');
4675             start_force(PL_curforce);
4676             NEXTVAL_NEXTTOKE.ival = 0;
4677             force_next((2<<24)|'(');
4678             start_force(PL_curforce);
4679             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4680             force_next(FUNC);
4681         }
4682         /* Convert (?{...}) and friends to 'do {...}' */
4683         if (PL_lex_inpat && *PL_bufptr == '(') {
4684             PL_sublex_info.re_eval_start = PL_bufptr;
4685             PL_bufptr += 2;
4686             if (*PL_bufptr != '{')
4687                 PL_bufptr++;
4688             start_force(PL_curforce);
4689             /* XXX probably need a CURMAD(something) here */
4690             PL_expect = XTERMBLOCK;
4691             force_next(DO);
4692         }
4693
4694         if (PL_lex_starts++) {
4695             s = PL_bufptr;
4696 #ifdef PERL_MAD
4697             if (PL_madskills) {
4698                 if (PL_thistoken)
4699                     sv_free(PL_thistoken);
4700                 PL_thistoken = newSVpvs("");
4701             }
4702 #endif
4703             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4704             if (!PL_lex_casemods && PL_lex_inpat)
4705                 OPERATOR(',');
4706             else
4707                 Aop(OP_CONCAT);
4708         }
4709         return yylex();
4710
4711     case LEX_INTERPENDMAYBE:
4712         if (intuit_more(PL_bufptr)) {
4713             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4714             break;
4715         }
4716         /* FALL THROUGH */
4717
4718     case LEX_INTERPEND:
4719         if (PL_lex_dojoin) {
4720             PL_lex_dojoin = FALSE;
4721             PL_lex_state = LEX_INTERPCONCAT;
4722 #ifdef PERL_MAD
4723             if (PL_madskills) {
4724                 if (PL_thistoken)
4725                     sv_free(PL_thistoken);
4726                 PL_thistoken = newSVpvs("");
4727             }
4728 #endif
4729             PL_lex_allbrackets--;
4730             return REPORT(')');
4731         }
4732         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4733             && SvEVALED(PL_lex_repl))
4734         {
4735             if (PL_bufptr != PL_bufend)
4736                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4737             PL_lex_repl = NULL;
4738         }
4739         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4740            re_eval_str.  If the here-doc body’s length equals the previous
4741            value of re_eval_start, re_eval_start will now be null.  So
4742            check re_eval_str as well. */
4743         if (PL_sublex_info.re_eval_start || PL_sublex_info.re_eval_str) {
4744             SV *sv;
4745             if (*PL_bufptr != ')')
4746                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4747             PL_bufptr++;
4748             /* having compiled a (?{..}) expression, return the original
4749              * text too, as a const */
4750             if (PL_sublex_info.re_eval_str) {
4751                 sv = PL_sublex_info.re_eval_str;
4752                 PL_sublex_info.re_eval_str = NULL;
4753                 SvCUR_set(sv, PL_bufptr - PL_sublex_info.re_eval_start);
4754                 SvPV_shrink_to_cur(sv);
4755             }
4756             else sv = newSVpvn(PL_sublex_info.re_eval_start,
4757                                PL_bufptr - PL_sublex_info.re_eval_start);
4758             start_force(PL_curforce);
4759             /* XXX probably need a CURMAD(something) here */
4760             NEXTVAL_NEXTTOKE.opval =
4761                     (OP*)newSVOP(OP_CONST, 0,
4762                                  sv);
4763             force_next(THING);
4764             PL_sublex_info.re_eval_start = NULL;
4765             PL_expect = XTERM;
4766             return REPORT(',');
4767         }
4768
4769         /* FALLTHROUGH */
4770     case LEX_INTERPCONCAT:
4771 #ifdef DEBUGGING
4772         if (PL_lex_brackets)
4773             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4774                        (long) PL_lex_brackets);
4775 #endif
4776         if (PL_bufptr == PL_bufend)
4777             return REPORT(sublex_done());
4778
4779         /* m'foo' still needs to be parsed for possible (?{...}) */
4780         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4781             SV *sv = newSVsv(PL_linestr);
4782             sv = tokeq(sv);
4783             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4784             s = PL_bufend;
4785         }
4786         else {
4787             s = scan_const(PL_bufptr);
4788             if (*s == '\\')
4789                 PL_lex_state = LEX_INTERPCASEMOD;
4790             else
4791                 PL_lex_state = LEX_INTERPSTART;
4792         }
4793
4794         if (s != PL_bufptr) {
4795             start_force(PL_curforce);
4796             if (PL_madskills) {
4797                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4798             }
4799             NEXTVAL_NEXTTOKE = pl_yylval;
4800             PL_expect = XTERM;
4801             force_next(THING);
4802             if (PL_lex_starts++) {
4803 #ifdef PERL_MAD
4804                 if (PL_madskills) {
4805                     if (PL_thistoken)
4806                         sv_free(PL_thistoken);
4807                     PL_thistoken = newSVpvs("");
4808                 }
4809 #endif
4810                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4811                 if (!PL_lex_casemods && PL_lex_inpat)
4812                     OPERATOR(',');
4813                 else
4814                     Aop(OP_CONCAT);
4815             }
4816             else {
4817                 PL_bufptr = s;
4818                 return yylex();
4819             }
4820         }
4821
4822         return yylex();
4823     case LEX_FORMLINE:
4824         s = scan_formline(PL_bufptr);
4825         if (!PL_lex_formbrack)
4826         {
4827             formbrack = 1;
4828             goto rightbracket;
4829         }
4830         PL_bufptr = s;
4831         return yylex();
4832     }
4833
4834     s = PL_bufptr;
4835     PL_oldoldbufptr = PL_oldbufptr;
4836     PL_oldbufptr = s;
4837
4838   retry:
4839 #ifdef PERL_MAD
4840     if (PL_thistoken) {
4841         sv_free(PL_thistoken);
4842         PL_thistoken = 0;
4843     }
4844     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4845 #endif
4846     switch (*s) {
4847     default:
4848         if (isIDFIRST_lazy_if(s,UTF))
4849             goto keylookup;
4850         {
4851         SV *dsv = newSVpvs_flags("", SVs_TEMP);
4852         const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4853                                                     UTF8SKIP(s),
4854                                                     SVs_TEMP | SVf_UTF8),
4855                                             10, UNI_DISPLAY_ISPRINT))
4856                             : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4857         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4858         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4859             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4860         } else {
4861             d = PL_linestart;
4862         }
4863         *s = '\0';
4864         sv_setpv(dsv, d);
4865         if (UTF)
4866             SvUTF8_on(dsv);
4867         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
4868     }
4869     case 4:
4870     case 26:
4871         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4872     case 0:
4873 #ifdef PERL_MAD
4874         if (PL_madskills)
4875             PL_faketokens = 0;
4876 #endif
4877         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4878             PL_last_uni = 0;
4879             PL_last_lop = 0;
4880             if (PL_lex_brackets &&
4881                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4882                 yyerror((const char *)
4883                         (PL_lex_formbrack
4884                          ? "Format not terminated"
4885                          : "Missing right curly or square bracket"));
4886             }
4887             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4888                         "### Tokener got EOF\n");
4889             } );
4890             TOKEN(0);
4891         }
4892         if (s++ < PL_bufend)
4893             goto retry;                 /* ignore stray nulls */
4894         PL_last_uni = 0;
4895         PL_last_lop = 0;
4896         if (!PL_in_eval && !PL_preambled) {
4897             PL_preambled = TRUE;
4898 #ifdef PERL_MAD
4899             if (PL_madskills)
4900                 PL_faketokens = 1;
4901 #endif
4902             if (PL_perldb) {
4903                 /* Generate a string of Perl code to load the debugger.
4904                  * If PERL5DB is set, it will return the contents of that,
4905                  * otherwise a compile-time require of perl5db.pl.  */
4906
4907                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4908
4909                 if (pdb) {
4910                     sv_setpv(PL_linestr, pdb);
4911                     sv_catpvs(PL_linestr,";");
4912                 } else {
4913                     SETERRNO(0,SS_NORMAL);
4914                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4915                 }
4916             } else
4917                 sv_setpvs(PL_linestr,"");
4918             if (PL_preambleav) {
4919                 SV **svp = AvARRAY(PL_preambleav);
4920                 SV **const end = svp + AvFILLp(PL_preambleav);
4921                 while(svp <= end) {
4922                     sv_catsv(PL_linestr, *svp);
4923                     ++svp;
4924                     sv_catpvs(PL_linestr, ";");
4925                 }
4926                 sv_free(MUTABLE_SV(PL_preambleav));
4927                 PL_preambleav = NULL;
4928             }
4929             if (PL_minus_E)
4930                 sv_catpvs(PL_linestr,
4931                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4932             if (PL_minus_n || PL_minus_p) {
4933                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4934                 if (PL_minus_l)
4935                     sv_catpvs(PL_linestr,"chomp;");
4936                 if (PL_minus_a) {
4937                     if (PL_minus_F) {
4938                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4939                              || *PL_splitstr == '"')
4940                               && strchr(PL_splitstr + 1, *PL_splitstr))
4941                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4942                         else {
4943                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4944                                bytes can be used as quoting characters.  :-) */
4945                             const char *splits = PL_splitstr;
4946                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4947                             do {
4948                                 /* Need to \ \s  */
4949                                 if (*splits == '\\')
4950                                     sv_catpvn(PL_linestr, splits, 1);
4951                                 sv_catpvn(PL_linestr, splits, 1);
4952                             } while (*splits++);
4953                             /* This loop will embed the trailing NUL of
4954                                PL_linestr as the last thing it does before
4955                                terminating.  */
4956                             sv_catpvs(PL_linestr, ");");
4957                         }
4958                     }
4959                     else
4960                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4961                 }
4962             }
4963             sv_catpvs(PL_linestr, "\n");
4964             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4965             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4966             PL_last_lop = PL_last_uni = NULL;
4967             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4968                 update_debugger_info(PL_linestr, NULL, 0);
4969             goto retry;
4970         }
4971         do {
4972             fake_eof = 0;
4973             bof = PL_rsfp ? TRUE : FALSE;
4974             if (0) {
4975               fake_eof:
4976                 fake_eof = LEX_FAKE_EOF;
4977             }
4978             PL_bufptr = PL_bufend;
4979             CopLINE_inc(PL_curcop);
4980             if (!lex_next_chunk(fake_eof)) {
4981                 CopLINE_dec(PL_curcop);
4982                 s = PL_bufptr;
4983                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4984             }
4985             CopLINE_dec(PL_curcop);
4986 #ifdef PERL_MAD
4987             if (!PL_rsfp)
4988                 PL_realtokenstart = -1;
4989 #endif
4990             s = PL_bufptr;
4991             /* If it looks like the start of a BOM or raw UTF-16,
4992              * check if it in fact is. */
4993             if (bof && PL_rsfp &&
4994                      (*s == 0 ||
4995                       *(U8*)s == 0xEF ||
4996                       *(U8*)s >= 0xFE ||
4997                       s[1] == 0)) {
4998                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4999                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5000 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5001                 /* offset may include swallowed CR */
5002                 if (!bof)
5003                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5004 #endif
5005                 if (bof) {
5006                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5007                     s = swallow_bom((U8*)s);
5008                 }
5009             }
5010             if (PL_parser->in_pod) {
5011                 /* Incest with pod. */
5012 #ifdef PERL_MAD
5013                 if (PL_madskills)
5014                     sv_catsv(PL_thiswhite, PL_linestr);
5015 #endif
5016                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5017                     sv_setpvs(PL_linestr, "");
5018                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5019                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5020                     PL_last_lop = PL_last_uni = NULL;
5021                     PL_parser->in_pod = 0;
5022                 }
5023             }
5024             if (PL_rsfp || PL_parser->filtered)
5025                 incline(s);
5026         } while (PL_parser->in_pod);
5027         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5028         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5029         PL_last_lop = PL_last_uni = NULL;
5030         if (CopLINE(PL_curcop) == 1) {
5031             while (s < PL_bufend && isSPACE(*s))
5032                 s++;
5033             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5034                 s++;
5035 #ifdef PERL_MAD
5036             if (PL_madskills)
5037                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5038 #endif
5039             d = NULL;
5040             if (!PL_in_eval) {
5041                 if (*s == '#' && *(s+1) == '!')
5042                     d = s + 2;
5043 #ifdef ALTERNATE_SHEBANG
5044                 else {
5045                     static char const as[] = ALTERNATE_SHEBANG;
5046                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5047                         d = s + (sizeof(as) - 1);
5048                 }
5049 #endif /* ALTERNATE_SHEBANG */
5050             }
5051             if (d) {
5052                 char *ipath;
5053                 char *ipathend;
5054
5055                 while (isSPACE(*d))
5056                     d++;
5057                 ipath = d;
5058                 while (*d && !isSPACE(*d))
5059                     d++;
5060                 ipathend = d;
5061
5062 #ifdef ARG_ZERO_IS_SCRIPT
5063                 if (ipathend > ipath) {
5064                     /*
5065                      * HP-UX (at least) sets argv[0] to the script name,
5066                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5067                      * at least, set argv[0] to the basename of the Perl
5068                      * interpreter. So, having found "#!", we'll set it right.
5069                      */
5070                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5071                                                     SVt_PV)); /* $^X */
5072                     assert(SvPOK(x) || SvGMAGICAL(x));
5073                     if (sv_eq(x, CopFILESV(PL_curcop))) {
5074                         sv_setpvn(x, ipath, ipathend - ipath);
5075                         SvSETMAGIC(x);
5076                     }
5077                     else {
5078                         STRLEN blen;
5079                         STRLEN llen;
5080                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5081                         const char * const lstart = SvPV_const(x,llen);
5082                         if (llen < blen) {
5083                             bstart += blen - llen;
5084                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5085                                 sv_setpvn(x, ipath, ipathend - ipath);
5086                                 SvSETMAGIC(x);
5087                             }
5088                         }
5089                     }
5090                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5091                 }
5092 #endif /* ARG_ZERO_IS_SCRIPT */
5093
5094                 /*
5095                  * Look for options.
5096                  */
5097                 d = instr(s,"perl -");
5098                 if (!d) {
5099                     d = instr(s,"perl");
5100 #if defined(DOSISH)
5101                     /* avoid getting into infinite loops when shebang
5102                      * line contains "Perl" rather than "perl" */
5103                     if (!d) {
5104                         for (d = ipathend-4; d >= ipath; --d) {
5105                             if ((*d == 'p' || *d == 'P')
5106                                 && !ibcmp(d, "perl", 4))
5107                             {
5108                                 break;
5109                             }
5110                         }
5111                         if (d < ipath)
5112                             d = NULL;
5113                     }
5114 #endif
5115                 }
5116 #ifdef ALTERNATE_SHEBANG
5117                 /*
5118                  * If the ALTERNATE_SHEBANG on this system starts with a
5119                  * character that can be part of a Perl expression, then if
5120                  * we see it but not "perl", we're probably looking at the
5121                  * start of Perl code, not a request to hand off to some
5122                  * other interpreter.  Similarly, if "perl" is there, but
5123                  * not in the first 'word' of the line, we assume the line
5124                  * contains the start of the Perl program.
5125                  */
5126                 if (d && *s != '#') {
5127                     const char *c = ipath;
5128                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5129                         c++;
5130                     if (c < d)
5131                         d = NULL;       /* "perl" not in first word; ignore */
5132                     else
5133                         *s = '#';       /* Don't try to parse shebang line */
5134                 }
5135 #endif /* ALTERNATE_SHEBANG */
5136                 if (!d &&
5137                     *s == '#' &&
5138                     ipathend > ipath &&
5139                     !PL_minus_c &&
5140                     !instr(s,"indir") &&
5141                     instr(PL_origargv[0],"perl"))
5142                 {
5143                     dVAR;
5144                     char **newargv;
5145
5146                     *ipathend = '\0';
5147                     s = ipathend + 1;
5148                     while (s < PL_bufend && isSPACE(*s))
5149                         s++;
5150                     if (s < PL_bufend) {
5151                         Newx(newargv,PL_origargc+3,char*);
5152                         newargv[1] = s;
5153                         while (s < PL_bufend && !isSPACE(*s))
5154                             s++;
5155                         *s = '\0';
5156                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5157                     }
5158                     else
5159                         newargv = PL_origargv;
5160                     newargv[0] = ipath;
5161                     PERL_FPU_PRE_EXEC
5162                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5163                     PERL_FPU_POST_EXEC
5164                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5165                 }
5166                 if (d) {
5167                     while (*d && !isSPACE(*d))
5168                         d++;
5169                     while (SPACE_OR_TAB(*d))
5170                         d++;
5171
5172                     if (*d++ == '-') {
5173                         const bool switches_done = PL_doswitches;
5174                         const U32 oldpdb = PL_perldb;
5175                         const bool oldn = PL_minus_n;
5176                         const bool oldp = PL_minus_p;
5177                         const char *d1 = d;
5178
5179                         do {
5180                             bool baduni = FALSE;
5181                             if (*d1 == 'C') {
5182                                 const char *d2 = d1 + 1;
5183                                 if (parse_unicode_opts((const char **)&d2)
5184                                     != PL_unicode)
5185                                     baduni = TRUE;
5186                             }
5187                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5188                                 const char * const m = d1;
5189                                 while (*d1 && !isSPACE(*d1))
5190                                     d1++;
5191                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5192                                       (int)(d1 - m), m);
5193                             }
5194                             d1 = moreswitches(d1);
5195                         } while (d1);
5196                         if (PL_doswitches && !switches_done) {
5197                             int argc = PL_origargc;
5198                             char **argv = PL_origargv;
5199                             do {
5200                                 argc--,argv++;
5201                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5202                             init_argv_symbols(argc,argv);
5203                         }
5204                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5205                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5206                               /* if we have already added "LINE: while (<>) {",
5207                                  we must not do it again */
5208                         {
5209                             sv_setpvs(PL_linestr, "");
5210                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5211                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5212                             PL_last_lop = PL_last_uni = NULL;
5213                             PL_preambled = FALSE;
5214                             if (PERLDB_LINE || PERLDB_SAVESRC)
5215                                 (void)gv_fetchfile(PL_origfilename);
5216                             goto retry;
5217                         }
5218                     }
5219                 }
5220             }
5221         }
5222         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5223             PL_lex_state = LEX_FORMLINE;
5224             start_force(PL_curforce);
5225             NEXTVAL_NEXTTOKE.ival = 0;
5226             force_next(FORMRBRACK);
5227             TOKEN(';');
5228         }
5229         goto retry;
5230     case '\r':
5231 #ifdef PERL_STRICT_CR
5232         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5233         Perl_croak(aTHX_
5234       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5235 #endif
5236     case ' ': case '\t': case '\f': case 013:
5237 #ifdef PERL_MAD
5238         PL_realtokenstart = -1;
5239         if (!PL_thiswhite)
5240             PL_thiswhite = newSVpvs("");
5241         sv_catpvn(PL_thiswhite, s, 1);
5242 #endif
5243         s++;
5244         goto retry;
5245     case '#':
5246     case '\n':
5247 #ifdef PERL_MAD
5248         PL_realtokenstart = -1;
5249         if (PL_madskills)
5250             PL_faketokens = 0;
5251 #endif
5252         if (PL_lex_state != LEX_NORMAL ||
5253              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5254             if (*s == '#' && s == PL_linestart && PL_in_eval
5255              && !PL_rsfp && !PL_parser->filtered) {
5256                 /* handle eval qq[#line 1 "foo"\n ...] */
5257                 CopLINE_dec(PL_curcop);
5258                 incline(s);
5259             }
5260             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5261                 s = SKIPSPACE0(s);
5262                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5263                     incline(s);
5264             }
5265             else {
5266                 d = s;
5267                 while (d < PL_bufend && *d != '\n')
5268                     d++;
5269                 if (d < PL_bufend)
5270                     d++;
5271                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5272                     Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5273                                d, PL_bufend);
5274 #ifdef PERL_MAD
5275                 if (PL_madskills)
5276                     PL_thiswhite = newSVpvn(s, d - s);
5277 #endif
5278                 s = d;
5279                 incline(s);
5280             }
5281             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5282                 PL_lex_state = LEX_FORMLINE;
5283                 start_force(PL_curforce);
5284                 NEXTVAL_NEXTTOKE.ival = 0;
5285                 force_next(FORMRBRACK);
5286                 TOKEN(';');
5287             }
5288         }
5289         else {
5290 #ifdef PERL_MAD
5291             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5292                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5293                     PL_faketokens = 0;
5294                     s = SKIPSPACE0(s);
5295                     TOKEN(PEG); /* make sure any #! line is accessible */
5296                 }
5297                 s = SKIPSPACE0(s);
5298             }
5299             else {
5300 /*              if (PL_madskills && PL_lex_formbrack) { */
5301                     d = s;
5302                     while (d < PL_bufend && *d != '\n')
5303                         d++;
5304                     if (d < PL_bufend)
5305                         d++;
5306                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5307                       Perl_croak(aTHX_ "panic: input overflow");
5308                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5309                         if (!PL_thiswhite)
5310                             PL_thiswhite = newSVpvs("");
5311                         if (CopLINE(PL_curcop) == 1) {
5312                             sv_setpvs(PL_thiswhite, "");
5313                             PL_faketokens = 0;
5314                         }
5315                         sv_catpvn(PL_thiswhite, s, d - s);
5316                     }
5317                     s = d;
5318 /*              }
5319                 *s = '\0';
5320                 PL_bufend = s; */
5321             }
5322 #else
5323             *s = '\0';
5324             PL_bufend = s;
5325 #endif
5326         }
5327         goto retry;
5328     case '-':
5329         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5330             I32 ftst = 0;
5331             char tmp;
5332
5333             s++;
5334             PL_bufptr = s;
5335             tmp = *s++;
5336
5337             while (s < PL_bufend && SPACE_OR_TAB(*s))
5338                 s++;
5339
5340             if (strnEQ(s,"=>",2)) {
5341                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5342                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5343                 OPERATOR('-');          /* unary minus */
5344             }
5345             PL_last_uni = PL_oldbufptr;
5346             switch (tmp) {
5347             case 'r': ftst = OP_FTEREAD;        break;
5348             case 'w': ftst = OP_FTEWRITE;       break;
5349             case 'x': ftst = OP_FTEEXEC;        break;
5350             case 'o': ftst = OP_FTEOWNED;       break;
5351             case 'R': ftst = OP_FTRREAD;        break;
5352             case 'W': ftst = OP_FTRWRITE;       break;
5353             case 'X': ftst = OP_FTREXEC;        break;
5354             case 'O': ftst = OP_FTROWNED;       break;
5355             case 'e': ftst = OP_FTIS;           break;
5356             case 'z': ftst = OP_FTZERO;         break;
5357             case 's': ftst = OP_FTSIZE;         break;
5358             case 'f': ftst = OP_FTFILE;         break;
5359             case 'd': ftst = OP_FTDIR;          break;
5360             case 'l': ftst = OP_FTLINK;         break;
5361             case 'p': ftst = OP_FTPIPE;         break;
5362             case 'S': ftst = OP_FTSOCK;         break;
5363             case 'u': ftst = OP_FTSUID;         break;
5364             case 'g': ftst = OP_FTSGID;         break;
5365             case 'k': ftst = OP_FTSVTX;         break;
5366             case 'b': ftst = OP_FTBLK;          break;
5367             case 'c': ftst = OP_FTCHR;          break;
5368             case 't': ftst = OP_FTTTY;          break;
5369             case 'T': ftst = OP_FTTEXT;         break;
5370             case 'B': ftst = OP_FTBINARY;       break;
5371             case 'M': case 'A': case 'C':
5372                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5373                 switch (tmp) {
5374                 case 'M': ftst = OP_FTMTIME;    break;
5375                 case 'A': ftst = OP_FTATIME;    break;
5376                 case 'C': ftst = OP_FTCTIME;    break;
5377                 default:                        break;
5378                 }
5379                 break;
5380             default:
5381                 break;
5382             }
5383             if (ftst) {
5384                 PL_last_lop_op = (OPCODE)ftst;
5385                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5386                         "### Saw file test %c\n", (int)tmp);
5387                 } );
5388                 FTST(ftst);
5389             }
5390             else {
5391                 /* Assume it was a minus followed by a one-letter named
5392                  * subroutine call (or a -bareword), then. */
5393                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5394                         "### '-%c' looked like a file test but was not\n",
5395                         (int) tmp);
5396                 } );
5397                 s = --PL_bufptr;
5398             }
5399         }
5400         {
5401             const char tmp = *s++;
5402             if (*s == tmp) {
5403                 s++;
5404                 if (PL_expect == XOPERATOR)
5405                     TERM(POSTDEC);
5406                 else
5407                     OPERATOR(PREDEC);
5408             }
5409             else if (*s == '>') {
5410                 s++;
5411                 s = SKIPSPACE1(s);
5412                 if (isIDFIRST_lazy_if(s,UTF)) {
5413                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5414                     TOKEN(ARROW);
5415                 }
5416                 else if (*s == '$')
5417                     OPERATOR(ARROW);
5418                 else
5419                     TERM(ARROW);
5420             }
5421             if (PL_expect == XOPERATOR) {
5422                 if (*s == '=' && !PL_lex_allbrackets &&
5423                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5424                     s--;
5425                     TOKEN(0);
5426                 }
5427                 Aop(OP_SUBTRACT);
5428             }
5429             else {
5430                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5431                     check_uni();
5432                 OPERATOR('-');          /* unary minus */
5433             }
5434         }
5435
5436     case '+':
5437         {
5438             const char tmp = *s++;
5439             if (*s == tmp) {
5440                 s++;
5441                 if (PL_expect == XOPERATOR)
5442                     TERM(POSTINC);
5443                 else
5444                     OPERATOR(PREINC);
5445             }
5446             if (PL_expect == XOPERATOR) {
5447                 if (*s == '=' && !PL_lex_allbrackets &&
5448                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5449                     s--;
5450                     TOKEN(0);
5451                 }
5452                 Aop(OP_ADD);
5453             }
5454             else {
5455                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5456                     check_uni();
5457                 OPERATOR('+');
5458             }
5459         }
5460
5461     case '*':
5462         if (PL_expect != XOPERATOR) {
5463             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5464             PL_expect = XOPERATOR;
5465             force_ident(PL_tokenbuf, '*');
5466             if (!*PL_tokenbuf)
5467                 PREREF('*');
5468             TERM('*');
5469         }
5470         s++;
5471         if (*s == '*') {
5472             s++;
5473             if (*s == '=' && !PL_lex_allbrackets &&
5474                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5475                 s -= 2;
5476                 TOKEN(0);
5477             }
5478             PWop(OP_POW);
5479         }
5480         if (*s == '=' && !PL_lex_allbrackets &&
5481                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5482             s--;
5483             TOKEN(0);
5484         }
5485         Mop(OP_MULTIPLY);
5486
5487     case '%':
5488         if (PL_expect == XOPERATOR) {
5489             if (s[1] == '=' && !PL_lex_allbrackets &&
5490                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5491                 TOKEN(0);
5492             ++s;
5493             Mop(OP_MODULO);
5494         }
5495         PL_tokenbuf[0] = '%';
5496         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5497                 sizeof PL_tokenbuf - 1, FALSE);
5498         if (!PL_tokenbuf[1]) {
5499             PREREF('%');
5500         }
5501         PL_pending_ident = '%';
5502         TERM('%');
5503
5504     case '^':
5505         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5506                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5507             TOKEN(0);
5508         s++;
5509         BOop(OP_BIT_XOR);
5510     case '[':
5511         if (PL_lex_brackets > 100)
5512             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5513         PL_lex_brackstack[PL_lex_brackets++] = 0;
5514         PL_lex_allbrackets++;
5515         {
5516             const char tmp = *s++;
5517             OPERATOR(tmp);
5518         }
5519     case '~':
5520         if (s[1] == '~'
5521             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5522         {
5523             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5524                 TOKEN(0);
5525             s += 2;
5526             Eop(OP_SMARTMATCH);
5527         }
5528         s++;
5529         OPERATOR('~');
5530     case ',':
5531         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5532             TOKEN(0);
5533         s++;
5534         OPERATOR(',');
5535     case ':':
5536         if (s[1] == ':') {
5537             len = 0;
5538             goto just_a_word_zero_gv;
5539         }
5540         s++;
5541         switch (PL_expect) {
5542             OP *attrs;
5543 #ifdef PERL_MAD
5544             I32 stuffstart;
5545 #endif
5546         case XOPERATOR:
5547             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5548                 break;
5549             PL_bufptr = s;      /* update in case we back off */
5550             if (*s == '=') {
5551                 Perl_croak(aTHX_
5552                            "Use of := for an empty attribute list is not allowed");
5553             }
5554             goto grabattrs;
5555         case XATTRBLOCK:
5556             PL_expect = XBLOCK;
5557             goto grabattrs;
5558         case XATTRTERM:
5559             PL_expect = XTERMBLOCK;
5560          grabattrs:
5561 #ifdef PERL_MAD
5562             stuffstart = s - SvPVX(PL_linestr) - 1;
5563 #endif
5564             s = PEEKSPACE(s);
5565             attrs = NULL;
5566             while (isIDFIRST_lazy_if(s,UTF)) {
5567                 I32 tmp;
5568                 SV *sv;
5569                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5570                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5571                     if (tmp < 0) tmp = -tmp;
5572                     switch (tmp) {
5573                     case KEY_or:
5574                     case KEY_and:
5575                     case KEY_for:
5576                     case KEY_foreach:
5577                     case KEY_unless:
5578                     case KEY_if:
5579                     case KEY_while:
5580                     case KEY_until:
5581                         goto got_attrs;
5582                     default:
5583                         break;
5584                     }
5585                 }
5586                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5587                 if (*d == '(') {
5588                     d = scan_str(d,TRUE,TRUE,FALSE);
5589                     if (!d) {
5590                         /* MUST advance bufptr here to avoid bogus
5591                            "at end of line" context messages from yyerror().
5592                          */
5593                         PL_bufptr = s + len;
5594                         yyerror("Unterminated attribute parameter in attribute list");
5595                         if (attrs)
5596                             op_free(attrs);
5597                         sv_free(sv);
5598                         return REPORT(0);       /* EOF indicator */
5599                     }
5600                 }
5601                 if (PL_lex_stuff) {
5602                     sv_catsv(sv, PL_lex_stuff);
5603                     attrs = op_append_elem(OP_LIST, attrs,
5604                                         newSVOP(OP_CONST, 0, sv));
5605                     SvREFCNT_dec(PL_lex_stuff);
5606                     PL_lex_stuff = NULL;
5607                 }
5608                 else {
5609                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5610                         sv_free(sv);
5611                         if (PL_in_my == KEY_our) {
5612                             deprecate(":unique");
5613                         }
5614                         else
5615                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5616                     }
5617
5618                     /* NOTE: any CV attrs applied here need to be part of
5619                        the CVf_BUILTIN_ATTRS define in cv.h! */
5620                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5621                         sv_free(sv);
5622                         CvLVALUE_on(PL_compcv);
5623                     }
5624                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5625                         sv_free(sv);
5626                         deprecate(":locked");
5627                     }
5628                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5629                         sv_free(sv);
5630                         CvMETHOD_on(PL_compcv);
5631                     }
5632                     /* After we've set the flags, it could be argued that
5633                        we don't need to do the attributes.pm-based setting
5634                        process, and shouldn't bother appending recognized
5635                        flags.  To experiment with that, uncomment the
5636                        following "else".  (Note that's already been
5637                        uncommented.  That keeps the above-applied built-in
5638                        attributes from being intercepted (and possibly
5639                        rejected) by a package's attribute routines, but is
5640                        justified by the performance win for the common case
5641                        of applying only built-in attributes.) */
5642                     else
5643                         attrs = op_append_elem(OP_LIST, attrs,
5644                                             newSVOP(OP_CONST, 0,
5645                                                     sv));
5646                 }
5647                 s = PEEKSPACE(d);
5648                 if (*s == ':' && s[1] != ':')
5649                     s = PEEKSPACE(s+1);
5650                 else if (s == d)
5651                     break;      /* require real whitespace or :'s */
5652                 /* XXX losing whitespace on sequential attributes here */
5653             }
5654             {
5655                 const char tmp
5656                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5657                 if (*s != ';' && *s != '}' && *s != tmp
5658                     && (tmp != '=' || *s != ')')) {
5659                     const char q = ((*s == '\'') ? '"' : '\'');
5660                     /* If here for an expression, and parsed no attrs, back
5661                        off. */
5662                     if (tmp == '=' && !attrs) {
5663                         s = PL_bufptr;
5664                         break;
5665                     }
5666                     /* MUST advance bufptr here to avoid bogus "at end of line"
5667                        context messages from yyerror().
5668                     */
5669                     PL_bufptr = s;
5670                     yyerror( (const char *)
5671                              (*s
5672                               ? Perl_form(aTHX_ "Invalid separator character "
5673                                           "%c%c%c in attribute list", q, *s, q)
5674                               : "Unterminated attribute list" ) );
5675                     if (attrs)
5676                         op_free(attrs);
5677                     OPERATOR(':');
5678                 }
5679             }
5680         got_attrs:
5681             if (attrs) {
5682                 start_force(PL_curforce);
5683                 NEXTVAL_NEXTTOKE.opval = attrs;
5684                 CURMAD('_', PL_nextwhite);
5685                 force_next(THING);
5686             }
5687 #ifdef PERL_MAD
5688             if (PL_madskills) {
5689                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5690                                      (s - SvPVX(PL_linestr)) - stuffstart);
5691             }
5692 #endif
5693             TOKEN(COLONATTR);
5694         }
5695         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5696             s--;
5697             TOKEN(0);
5698         }
5699         PL_lex_allbrackets--;
5700         OPERATOR(':');
5701     case '(':
5702         s++;
5703         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5704             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5705         else
5706             PL_expect = XTERM;
5707         s = SKIPSPACE1(s);
5708         PL_lex_allbrackets++;
5709         TOKEN('(');
5710     case ';':
5711         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5712             TOKEN(0);
5713         CLINE;
5714         s++;
5715         OPERATOR(';');
5716     case ')':
5717         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5718             TOKEN(0);
5719         s++;
5720         PL_lex_allbrackets--;
5721         s = SKIPSPACE1(s);
5722         if (*s == '{')
5723             PREBLOCK(')');
5724         TERM(')');
5725     case ']':
5726         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5727             TOKEN(0);
5728         s++;
5729         if (PL_lex_brackets <= 0)
5730             yyerror("Unmatched right square bracket");
5731         else
5732             --PL_lex_brackets;
5733         PL_lex_allbrackets--;
5734         if (PL_lex_state == LEX_INTERPNORMAL) {
5735             if (PL_lex_brackets == 0) {
5736                 if (*s == '-' && s[1] == '>')
5737                     PL_lex_state = LEX_INTERPENDMAYBE;
5738                 else if (*s != '[' && *s != '{')
5739                     PL_lex_state = LEX_INTERPEND;
5740             }
5741         }
5742         TERM(']');
5743     case '{':
5744         s++;
5745       leftbracket:
5746         if (PL_lex_brackets > 100) {
5747             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5748         }
5749         switch (PL_expect) {
5750         case XTERM:
5751             if (PL_oldoldbufptr == PL_last_lop)
5752                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5753             else
5754                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5755             PL_lex_allbrackets++;
5756             OPERATOR(HASHBRACK);
5757         case XOPERATOR:
5758             while (s < PL_bufend && SPACE_OR_TAB(*s))
5759                 s++;
5760             d = s;
5761             PL_tokenbuf[0] = '\0';
5762             if (d < PL_bufend && *d == '-') {
5763                 PL_tokenbuf[0] = '-';
5764                 d++;
5765                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5766                     d++;
5767             }
5768             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5769                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5770                               FALSE, &len);
5771                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5772                     d++;
5773                 if (*d == '}') {
5774                     const char minus = (PL_tokenbuf[0] == '-');
5775                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5776                     if (minus)
5777                         force_next('-');
5778                 }
5779             }
5780             /* FALL THROUGH */
5781         case XATTRBLOCK:
5782         case XBLOCK:
5783             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5784             PL_lex_allbrackets++;
5785             PL_expect = XSTATE;
5786             break;
5787         case XATTRTERM:
5788         case XTERMBLOCK:
5789             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5790             PL_lex_allbrackets++;
5791             PL_expect = XSTATE;
5792             break;
5793         default: {
5794                 const char *t;
5795                 if (PL_oldoldbufptr == PL_last_lop)
5796                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5797                 else
5798                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5799                 PL_lex_allbrackets++;
5800                 s = SKIPSPACE1(s);
5801                 if (*s == '}') {
5802                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5803                         PL_expect = XTERM;
5804                         /* This hack is to get the ${} in the message. */
5805                         PL_bufptr = s+1;
5806                         yyerror("syntax error");
5807                         break;
5808                     }
5809                     OPERATOR(HASHBRACK);
5810                 }
5811                 /* This hack serves to disambiguate a pair of curlies
5812                  * as being a block or an anon hash.  Normally, expectation
5813                  * determines that, but in cases where we're not in a
5814                  * position to expect anything in particular (like inside
5815                  * eval"") we have to resolve the ambiguity.  This code
5816                  * covers the case where the first term in the curlies is a
5817                  * quoted string.  Most other cases need to be explicitly
5818                  * disambiguated by prepending a "+" before the opening
5819                  * curly in order to force resolution as an anon hash.
5820                  *
5821                  * XXX should probably propagate the outer expectation
5822                  * into eval"" to rely less on this hack, but that could
5823                  * potentially break current behavior of eval"".
5824                  * GSAR 97-07-21
5825                  */
5826                 t = s;
5827                 if (*s == '\'' || *s == '"' || *s == '`') {
5828                     /* common case: get past first string, handling escapes */
5829                     for (t++; t < PL_bufend && *t != *s;)
5830                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5831                             t++;
5832                     t++;
5833                 }
5834                 else if (*s == 'q') {
5835                     if (++t < PL_bufend
5836                         && (!isALNUM(*t)
5837                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5838                                 && !isALNUM(*t))))
5839                     {
5840                         /* skip q//-like construct */
5841                         const char *tmps;
5842                         char open, close, term;
5843                         I32 brackets = 1;
5844
5845                         while (t < PL_bufend && isSPACE(*t))
5846                             t++;
5847                         /* check for q => */
5848                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5849                             OPERATOR(HASHBRACK);
5850                         }
5851                         term = *t;
5852                         open = term;
5853                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5854                             term = tmps[5];
5855                         close = term;
5856                         if (open == close)
5857                             for (t++; t < PL_bufend; t++) {
5858                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5859                                     t++;
5860                                 else if (*t == open)
5861                                     break;
5862                             }
5863                         else {
5864                             for (t++; t < PL_bufend; t++) {
5865                                 if (*t == '\\' && t+1 < PL_bufend)
5866                                     t++;
5867                                 else if (*t == close && --brackets <= 0)
5868                                     break;
5869                                 else if (*t == open)
5870                                     brackets++;
5871                             }
5872                         }
5873                         t++;
5874                     }
5875                     else
5876                         /* skip plain q word */
5877                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5878                              t += UTF8SKIP(t);
5879                 }
5880                 else if (isALNUM_lazy_if(t,UTF)) {
5881                     t += UTF8SKIP(t);
5882                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5883                          t += UTF8SKIP(t);
5884                 }
5885                 while (t < PL_bufend && isSPACE(*t))
5886                     t++;
5887                 /* if comma follows first term, call it an anon hash */
5888                 /* XXX it could be a comma expression with loop modifiers */
5889                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5890                                    || (*t == '=' && t[1] == '>')))
5891                     OPERATOR(HASHBRACK);
5892                 if (PL_expect == XREF)
5893                     PL_expect = XTERM;
5894                 else {
5895                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5896                     PL_expect = XSTATE;
5897                 }
5898             }
5899             break;
5900         }
5901         pl_yylval.ival = CopLINE(PL_curcop);
5902         if (isSPACE(*s) || *s == '#')
5903             PL_copline = NOLINE;   /* invalidate current command line number */
5904         TOKEN(formbrack ? '=' : '{');
5905     case '}':
5906         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5907             TOKEN(0);
5908       rightbracket:
5909         s++;
5910         if (PL_lex_brackets <= 0)
5911             yyerror("Unmatched right curly bracket");
5912         else
5913             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5914         PL_lex_allbrackets--;
5915         if (PL_lex_state == LEX_INTERPNORMAL) {
5916             if (PL_lex_brackets == 0) {
5917                 if (PL_expect & XFAKEBRACK) {
5918                     PL_expect &= XENUMMASK;
5919                     PL_lex_state = LEX_INTERPEND;
5920                     PL_bufptr = s;
5921 #if 0
5922                     if (PL_madskills) {
5923                         if (!PL_thiswhite)
5924                             PL_thiswhite = newSVpvs("");
5925                         sv_catpvs(PL_thiswhite,"}");
5926                     }
5927 #endif
5928                     return yylex();     /* ignore fake brackets */
5929                 }
5930                 if (*s == '-' && s[1] == '>')
5931                     PL_lex_state = LEX_INTERPENDMAYBE;
5932                 else if (*s != '[' && *s != '{')
5933                     PL_lex_state = LEX_INTERPEND;
5934             }
5935         }
5936         if (PL_expect & XFAKEBRACK) {
5937             PL_expect &= XENUMMASK;
5938             PL_bufptr = s;
5939             return yylex();             /* ignore fake brackets */
5940         }
5941         start_force(PL_curforce);
5942         if (PL_madskills) {
5943             curmad('X', newSVpvn(s-1,1));
5944             CURMAD('_', PL_thiswhite);
5945         }
5946         force_next(formbrack ? '.' : '}');
5947         if (formbrack) LEAVE;
5948 #ifdef PERL_MAD
5949         if (!PL_thistoken)
5950             PL_thistoken = newSVpvs("");
5951 #endif
5952         if (formbrack == 2) { /* means . where arguments were expected */
5953             start_force(PL_curforce);
5954             force_next(';');
5955             TOKEN(FORMRBRACK);
5956         }
5957         TOKEN(';');
5958     case '&':
5959         s++;
5960         if (*s++ == '&') {
5961             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5962                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5963                 s -= 2;
5964                 TOKEN(0);
5965             }
5966             AOPERATOR(ANDAND);
5967         }
5968         s--;
5969         if (PL_expect == XOPERATOR) {
5970             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5971                 && isIDFIRST_lazy_if(s,UTF))
5972             {
5973                 CopLINE_dec(PL_curcop);
5974                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5975                 CopLINE_inc(PL_curcop);
5976             }
5977             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5978                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5979                 s--;
5980                 TOKEN(0);
5981             }
5982             BAop(OP_BIT_AND);
5983         }
5984
5985         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5986         if (*PL_tokenbuf) {
5987             PL_expect = XOPERATOR;
5988             force_ident(PL_tokenbuf, '&');
5989         }
5990         else
5991             PREREF('&');
5992         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5993         TERM('&');
5994
5995     case '|':
5996         s++;
5997         if (*s++ == '|') {
5998             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5999                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6000                 s -= 2;
6001                 TOKEN(0);
6002             }
6003             AOPERATOR(OROR);
6004         }
6005         s--;
6006         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6007                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6008             s--;
6009             TOKEN(0);
6010         }
6011         BOop(OP_BIT_OR);
6012     case '=':
6013         s++;
6014         {
6015             const char tmp = *s++;
6016             if (tmp == '=') {
6017                 if (!PL_lex_allbrackets &&
6018                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6019                     s -= 2;
6020                     TOKEN(0);
6021                 }
6022                 Eop(OP_EQ);
6023             }
6024             if (tmp == '>') {
6025                 if (!PL_lex_allbrackets &&
6026                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6027                     s -= 2;
6028                     TOKEN(0);
6029                 }
6030                 OPERATOR(',');
6031             }
6032             if (tmp == '~')
6033                 PMop(OP_MATCH);
6034             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6035                 && strchr("+-*/%.^&|<",tmp))
6036                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6037                             "Reversed %c= operator",(int)tmp);
6038             s--;
6039             if (PL_expect == XSTATE && isALPHA(tmp) &&
6040                 (s == PL_linestart+1 || s[-2] == '\n') )
6041                 {
6042                     if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6043                         || PL_lex_state != LEX_NORMAL) {
6044                         d = PL_bufend;
6045                         while (s < d) {
6046                             if (*s++ == '\n') {
6047                                 incline(s);
6048                                 if (strnEQ(s,"=cut",4)) {
6049                                     s = strchr(s,'\n');
6050                                     if (s)
6051                                         s++;
6052                                     else
6053                                         s = d;
6054                                     incline(s);
6055                                     goto retry;
6056                                 }
6057                             }
6058                         }
6059                         goto retry;
6060                     }
6061 #ifdef PERL_MAD
6062                     if (PL_madskills) {
6063                         if (!PL_thiswhite)
6064                             PL_thiswhite = newSVpvs("");
6065                         sv_catpvn(PL_thiswhite, PL_linestart,
6066                                   PL_bufend - PL_linestart);
6067                     }
6068 #endif
6069                     s = PL_bufend;
6070                     PL_parser->in_pod = 1;
6071                     goto retry;
6072                 }
6073         }
6074         if (PL_expect == XBLOCK) {
6075             const char *t = s;
6076 #ifdef PERL_STRICT_CR
6077             while (SPACE_OR_TAB(*t))
6078 #else
6079             while (SPACE_OR_TAB(*t) || *t == '\r')
6080 #endif
6081                 t++;
6082             if (*t == '\n' || *t == '#') {
6083                 formbrack = 1;
6084                 ENTER;
6085                 SAVEI8(PL_parser->form_lex_state);
6086                 SAVEI32(PL_lex_formbrack);
6087                 PL_parser->form_lex_state = PL_lex_state;
6088                 PL_lex_formbrack = PL_lex_brackets + 1;
6089                 goto leftbracket;
6090             }
6091         }
6092         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6093             s--;
6094             TOKEN(0);
6095         }
6096         pl_yylval.ival = 0;
6097         OPERATOR(ASSIGNOP);
6098     case '!':
6099         s++;
6100         {
6101             const char tmp = *s++;
6102             if (tmp == '=') {
6103                 /* was this !=~ where !~ was meant?
6104                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6105
6106                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6107                     const char *t = s+1;
6108
6109                     while (t < PL_bufend && isSPACE(*t))
6110                         ++t;
6111
6112                     if (*t == '/' || *t == '?' ||
6113                         ((*t == 'm' || *t == 's' || *t == 'y')
6114                          && !isALNUM(t[1])) ||
6115                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6116                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6117                                     "!=~ should be !~");
6118                 }
6119                 if (!PL_lex_allbrackets &&
6120                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6121                     s -= 2;
6122                     TOKEN(0);
6123                 }
6124                 Eop(OP_NE);
6125             }
6126             if (tmp == '~')
6127                 PMop(OP_NOT);
6128         }
6129         s--;
6130         OPERATOR('!');
6131     case '<':
6132         if (PL_expect != XOPERATOR) {
6133             if (s[1] != '<' && !strchr(s,'>'))
6134                 check_uni();
6135             if (s[1] == '<')
6136                 s = scan_heredoc(s);
6137             else
6138                 s = scan_inputsymbol(s);
6139             TERM(sublex_start());
6140         }
6141         s++;
6142         {
6143             char tmp = *s++;
6144             if (tmp == '<') {
6145                 if (*s == '=' && !PL_lex_allbrackets &&
6146                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6147                     s -= 2;
6148                     TOKEN(0);
6149                 }
6150                 SHop(OP_LEFT_SHIFT);
6151             }
6152             if (tmp == '=') {
6153                 tmp = *s++;
6154                 if (tmp == '>') {
6155                     if (!PL_lex_allbrackets &&
6156                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6157                         s -= 3;
6158                         TOKEN(0);
6159                     }
6160                     Eop(OP_NCMP);
6161                 }
6162                 s--;
6163                 if (!PL_lex_allbrackets &&
6164                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6165                     s -= 2;
6166                     TOKEN(0);
6167                 }
6168                 Rop(OP_LE);
6169             }
6170         }
6171         s--;
6172         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6173             s--;
6174             TOKEN(0);
6175         }
6176         Rop(OP_LT);
6177     case '>':
6178         s++;
6179         {
6180             const char tmp = *s++;
6181             if (tmp == '>') {
6182                 if (*s == '=' && !PL_lex_allbrackets &&
6183                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6184                     s -= 2;
6185                     TOKEN(0);
6186                 }
6187                 SHop(OP_RIGHT_SHIFT);
6188             }
6189             else if (tmp == '=') {
6190                 if (!PL_lex_allbrackets &&
6191                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6192                     s -= 2;
6193                     TOKEN(0);
6194                 }
6195                 Rop(OP_GE);
6196             }
6197         }
6198         s--;
6199         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6200             s--;
6201             TOKEN(0);
6202         }
6203         Rop(OP_GT);
6204
6205     case '$':
6206         CLINE;
6207
6208         if (PL_expect == XOPERATOR) {
6209             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6210                 return deprecate_commaless_var_list();
6211             }
6212         }
6213
6214         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6215             PL_tokenbuf[0] = '@';
6216             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6217                            sizeof PL_tokenbuf - 1, FALSE);
6218             if (PL_expect == XOPERATOR)
6219                 no_op("Array length", s);
6220             if (!PL_tokenbuf[1])
6221                 PREREF(DOLSHARP);
6222             PL_expect = XOPERATOR;
6223             PL_pending_ident = '#';
6224             TOKEN(DOLSHARP);
6225         }
6226
6227         PL_tokenbuf[0] = '$';
6228         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6229                        sizeof PL_tokenbuf - 1, FALSE);
6230         if (PL_expect == XOPERATOR)
6231             no_op("Scalar", s);
6232         if (!PL_tokenbuf[1]) {
6233             if (s == PL_bufend)
6234                 yyerror("Final $ should be \\$ or $name");
6235             PREREF('$');
6236         }
6237
6238         d = s;
6239         {
6240             const char tmp = *s;
6241             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6242                 s = SKIPSPACE1(s);
6243
6244             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6245                 && intuit_more(s)) {
6246                 if (*s == '[') {
6247                     PL_tokenbuf[0] = '@';
6248                     if (ckWARN(WARN_SYNTAX)) {
6249                         char *t = s+1;
6250
6251                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6252                             t++;
6253                         if (*t++ == ',') {
6254                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6255                             while (t < PL_bufend && *t != ']')
6256                                 t++;
6257                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6258                                         "Multidimensional syntax %.*s not supported",
6259                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6260                         }
6261                     }
6262                 }
6263                 else if (*s == '{') {
6264                     char *t;
6265                     PL_tokenbuf[0] = '%';
6266                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6267                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6268                         {
6269                             char tmpbuf[sizeof PL_tokenbuf];
6270                             do {
6271                                 t++;
6272                             } while (isSPACE(*t));
6273                             if (isIDFIRST_lazy_if(t,UTF)) {
6274                                 STRLEN len;
6275                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6276                                               &len);
6277                                 while (isSPACE(*t))
6278                                     t++;
6279                                 if (*t == ';'
6280                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6281                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6282                                                 "You need to quote \"%"SVf"\"",
6283                                                   SVfARG(newSVpvn_flags(tmpbuf, len,
6284                                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
6285                             }
6286                         }
6287                 }
6288             }
6289
6290             PL_expect = XOPERATOR;
6291             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6292                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6293                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6294                     PL_expect = XOPERATOR;
6295                 else if (strchr("$@\"'`q", *s))
6296                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6297                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6298                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6299                 else if (isIDFIRST_lazy_if(s,UTF)) {
6300                     char tmpbuf[sizeof PL_tokenbuf];
6301                     int t2;
6302                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6303                     if ((t2 = keyword(tmpbuf, len, 0))) {
6304                         /* binary operators exclude handle interpretations */
6305                         switch (t2) {
6306                         case -KEY_x:
6307                         case -KEY_eq:
6308                         case -KEY_ne:
6309                         case -KEY_gt:
6310                         case -KEY_lt:
6311                         case -KEY_ge:
6312                         case -KEY_le:
6313                         case -KEY_cmp:
6314                             break;
6315                         default:
6316                             PL_expect = XTERM;  /* e.g. print $fh length() */
6317                             break;
6318                         }
6319                     }
6320                     else {
6321                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6322                     }
6323                 }
6324                 else if (isDIGIT(*s))
6325                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6326                 else if (*s == '.' && isDIGIT(s[1]))
6327                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6328                 else if ((*s == '?' || *s == '-' || *s == '+')
6329                          && !isSPACE(s[1]) && s[1] != '=')
6330                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6331                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6332                          && s[1] != '/')
6333                     PL_expect = XTERM;          /* e.g. print $fh /.../
6334                                                    XXX except DORDOR operator
6335                                                 */
6336                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6337                          && s[2] != '=')
6338                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6339             }
6340         }
6341         PL_pending_ident = '$';
6342         TOKEN('$');
6343
6344     case '@':
6345         if (PL_expect == XOPERATOR)
6346             no_op("Array", s);
6347         PL_tokenbuf[0] = '@';
6348         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6349         if (!PL_tokenbuf[1]) {
6350             PREREF('@');
6351         }
6352         if (PL_lex_state == LEX_NORMAL)
6353             s = SKIPSPACE1(s);
6354         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6355             if (*s == '{')
6356                 PL_tokenbuf[0] = '%';
6357
6358             /* Warn about @ where they meant $. */
6359             if (*s == '[' || *s == '{') {
6360                 if (ckWARN(WARN_SYNTAX)) {
6361                     const char *t = s + 1;
6362                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6363                         t += UTF ? UTF8SKIP(t) : 1;
6364                     if (*t == '}' || *t == ']') {
6365                         t++;
6366                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6367        /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6368                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6369                             "Scalar value %"SVf" better written as $%"SVf,
6370                             SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6371                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6372                             SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6373                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
6374                     }
6375                 }
6376             }
6377         }
6378         PL_pending_ident = '@';
6379         TERM('@');
6380
6381      case '/':                  /* may be division, defined-or, or pattern */
6382         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6383             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6384                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6385                 TOKEN(0);
6386             s += 2;
6387             AOPERATOR(DORDOR);
6388         }
6389      case '?':                  /* may either be conditional or pattern */
6390         if (PL_expect == XOPERATOR) {
6391              char tmp = *s++;
6392              if(tmp == '?') {
6393                 if (!PL_lex_allbrackets &&
6394                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6395                     s--;
6396                     TOKEN(0);
6397                 }
6398                 PL_lex_allbrackets++;
6399                 OPERATOR('?');
6400              }
6401              else {
6402                  tmp = *s++;
6403                  if(tmp == '/') {
6404                      /* A // operator. */
6405                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6406                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6407                                             LEX_FAKEEOF_LOGIC)) {
6408                         s -= 2;
6409                         TOKEN(0);
6410                     }
6411                     AOPERATOR(DORDOR);
6412                  }
6413                  else {
6414                      s--;
6415                      if (*s == '=' && !PL_lex_allbrackets &&
6416                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6417                          s--;
6418                          TOKEN(0);
6419                      }
6420                      Mop(OP_DIVIDE);
6421                  }
6422              }
6423          }
6424          else {
6425              /* Disable warning on "study /blah/" */
6426              if (PL_oldoldbufptr == PL_last_uni
6427               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6428                   || memNE(PL_last_uni, "study", 5)
6429                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6430               ))
6431                  check_uni();
6432              if (*s == '?')
6433                  deprecate("?PATTERN? without explicit operator");
6434              s = scan_pat(s,OP_MATCH);
6435              TERM(sublex_start());
6436          }
6437
6438     case '.':
6439         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6440 #ifdef PERL_STRICT_CR
6441             && s[1] == '\n'
6442 #else
6443             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6444 #endif
6445             && (s == PL_linestart || s[-1] == '\n') )
6446         {
6447             PL_expect = XSTATE;
6448             formbrack = 2; /* dot seen where arguments expected */
6449             goto rightbracket;
6450         }
6451         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6452             s += 3;
6453             OPERATOR(YADAYADA);
6454         }
6455         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6456             char tmp = *s++;
6457             if (*s == tmp) {
6458                 if (!PL_lex_allbrackets &&
6459                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6460                     s--;
6461                     TOKEN(0);
6462                 }
6463                 s++;
6464                 if (*s == tmp) {
6465                     s++;
6466                     pl_yylval.ival = OPf_SPECIAL;
6467                 }
6468                 else
6469                     pl_yylval.ival = 0;
6470                 OPERATOR(DOTDOT);
6471             }
6472             if (*s == '=' && !PL_lex_allbrackets &&
6473                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6474                 s--;
6475                 TOKEN(0);
6476             }
6477             Aop(OP_CONCAT);
6478         }
6479         /* FALL THROUGH */
6480     case '0': case '1': case '2': case '3': case '4':
6481     case '5': case '6': case '7': case '8': case '9':
6482         s = scan_num(s, &pl_yylval);
6483         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6484         if (PL_expect == XOPERATOR)
6485             no_op("Number",s);
6486         TERM(THING);
6487
6488     case '\'':
6489         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6490         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6491         if (PL_expect == XOPERATOR) {
6492             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6493                 return deprecate_commaless_var_list();
6494             }
6495             else
6496                 no_op("String",s);
6497         }
6498         if (!s)
6499             missingterm(NULL);
6500         pl_yylval.ival = OP_CONST;
6501         TERM(sublex_start());
6502
6503     case '"':
6504         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6505         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6506         if (PL_expect == XOPERATOR) {
6507             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6508                 return deprecate_commaless_var_list();
6509             }
6510             else
6511                 no_op("String",s);
6512         }
6513         if (!s)
6514             missingterm(NULL);
6515         pl_yylval.ival = OP_CONST;
6516         /* FIXME. I think that this can be const if char *d is replaced by
6517            more localised variables.  */
6518         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6519             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6520                 pl_yylval.ival = OP_STRINGIFY;
6521                 break;
6522             }
6523         }
6524         TERM(sublex_start());
6525
6526     case '`':
6527         s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6528         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6529         if (PL_expect == XOPERATOR)
6530             no_op("Backticks",s);
6531         if (!s)
6532             missingterm(NULL);
6533         readpipe_override();
6534         TERM(sublex_start());
6535
6536     case '\\':
6537         s++;
6538         if (PL_lex_inwhat && isDIGIT(*s))
6539             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6540                            *s, *s);
6541         if (PL_expect == XOPERATOR)
6542             no_op("Backslash",s);
6543         OPERATOR(REFGEN);
6544
6545     case 'v':
6546         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6547             char *start = s + 2;
6548             while (isDIGIT(*start) || *start == '_')
6549                 start++;
6550             if (*start == '.' && isDIGIT(start[1])) {
6551                 s = scan_num(s, &pl_yylval);
6552                 TERM(THING);
6553             }
6554             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6555             else if (!isALPHA(*start) && (PL_expect == XTERM
6556                         || PL_expect == XREF || PL_expect == XSTATE
6557                         || PL_expect == XTERMORDORDOR)) {
6558                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6559                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6560                 if (!gv) {
6561                     s = scan_num(s, &pl_yylval);
6562                     TERM(THING);
6563                 }
6564             }
6565         }
6566         goto keylookup;
6567     case 'x':
6568         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6569             s++;
6570             Mop(OP_REPEAT);
6571         }
6572         goto keylookup;
6573
6574     case '_':
6575     case 'a': case 'A':
6576     case 'b': case 'B':
6577     case 'c': case 'C':
6578     case 'd': case 'D':
6579     case 'e': case 'E':
6580     case 'f': case 'F':
6581     case 'g': case 'G':
6582     case 'h': case 'H':
6583     case 'i': case 'I':
6584     case 'j': case 'J':
6585     case 'k': case 'K':
6586     case 'l': case 'L':
6587     case 'm': case 'M':
6588     case 'n': case 'N':
6589     case 'o': case 'O':
6590     case 'p': case 'P':
6591     case 'q': case 'Q':
6592     case 'r': case 'R':
6593     case 's': case 'S':
6594     case 't': case 'T':
6595     case 'u': case 'U':
6596               case 'V':
6597     case 'w': case 'W':
6598               case 'X':
6599     case 'y': case 'Y':
6600     case 'z': case 'Z':
6601
6602       keylookup: {
6603         bool anydelim;
6604         I32 tmp;
6605
6606         orig_keyword = 0;
6607         gv = NULL;
6608         gvp = NULL;
6609
6610         PL_bufptr = s;
6611         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6612
6613         /* Some keywords can be followed by any delimiter, including ':' */
6614         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6615
6616         /* x::* is just a word, unless x is "CORE" */
6617         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6618             goto just_a_word;
6619
6620         d = s;
6621         while (d < PL_bufend && isSPACE(*d))
6622                 d++;    /* no comments skipped here, or s### is misparsed */
6623
6624         /* Is this a word before a => operator? */
6625         if (*d == '=' && d[1] == '>') {
6626             CLINE;
6627             pl_yylval.opval
6628                 = (OP*)newSVOP(OP_CONST, 0,
6629                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6630             pl_yylval.opval->op_private = OPpCONST_BARE;
6631             TERM(WORD);
6632         }
6633
6634         /* Check for plugged-in keyword */
6635         {
6636             OP *o;
6637             int result;
6638             char *saved_bufptr = PL_bufptr;
6639             PL_bufptr = s;
6640             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6641             s = PL_bufptr;
6642             if (result == KEYWORD_PLUGIN_DECLINE) {
6643                 /* not a plugged-in keyword */
6644                 PL_bufptr = saved_bufptr;
6645             } else if (result == KEYWORD_PLUGIN_STMT) {
6646                 pl_yylval.opval = o;
6647                 CLINE;
6648                 PL_expect = XSTATE;
6649                 return REPORT(PLUGSTMT);
6650             } else if (result == KEYWORD_PLUGIN_EXPR) {
6651                 pl_yylval.opval = o;
6652                 CLINE;
6653                 PL_expect = XOPERATOR;
6654                 return REPORT(PLUGEXPR);
6655             } else {
6656                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6657                                         PL_tokenbuf);
6658             }
6659         }
6660
6661         /* Check for built-in keyword */
6662         tmp = keyword(PL_tokenbuf, len, 0);
6663
6664         /* Is this a label? */
6665         if (!anydelim && PL_expect == XSTATE
6666               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6667             s = d + 1;
6668             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6669                                             newSVpvn_flags(PL_tokenbuf,
6670                                                         len, UTF ? SVf_UTF8 : 0));
6671             CLINE;
6672             TOKEN(LABEL);
6673         }
6674
6675         if (tmp < 0) {                  /* second-class keyword? */
6676             GV *ogv = NULL;     /* override (winner) */
6677             GV *hgv = NULL;     /* hidden (loser) */
6678             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6679                 CV *cv;
6680                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6681                                             UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6682                     (cv = GvCVu(gv)))
6683                 {
6684                     if (GvIMPORTED_CV(gv))
6685                         ogv = gv;
6686                     else if (! CvMETHOD(cv))
6687                         hgv = gv;
6688                 }
6689                 if (!ogv &&
6690                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6691                                             UTF ? -(I32)len : (I32)len, FALSE)) &&
6692                     (gv = *gvp) && isGV_with_GP(gv) &&
6693                     GvCVu(gv) && GvIMPORTED_CV(gv))
6694                 {
6695                     ogv = gv;
6696                 }
6697             }
6698             if (ogv) {
6699                 orig_keyword = tmp;
6700                 tmp = 0;                /* overridden by import or by GLOBAL */
6701             }
6702             else if (gv && !gvp
6703                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6704                      && GvCVu(gv))
6705             {
6706                 tmp = 0;                /* any sub overrides "weak" keyword */
6707             }
6708             else {                      /* no override */
6709                 tmp = -tmp;
6710                 if (tmp == KEY_dump) {
6711                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6712                                    "dump() better written as CORE::dump()");
6713                 }
6714                 gv = NULL;
6715                 gvp = 0;
6716                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6717                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6718                                    "Ambiguous call resolved as CORE::%s(), "
6719                                    "qualify as such or use &",
6720                                    GvENAME(hgv));
6721             }
6722         }
6723
6724       reserved_word:
6725         switch (tmp) {
6726
6727         default:                        /* not a keyword */
6728             /* Trade off - by using this evil construction we can pull the
6729                variable gv into the block labelled keylookup. If not, then
6730                we have to give it function scope so that the goto from the
6731                earlier ':' case doesn't bypass the initialisation.  */
6732             if (0) {
6733             just_a_word_zero_gv:
6734                 gv = NULL;
6735                 gvp = NULL;
6736                 orig_keyword = 0;
6737             }
6738           just_a_word: {
6739                 SV *sv;
6740                 int pkgname = 0;
6741                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6742                 OP *rv2cv_op;
6743                 CV *cv;
6744 #ifdef PERL_MAD
6745                 SV *nextPL_nextwhite = 0;
6746 #endif
6747
6748
6749                 /* Get the rest if it looks like a package qualifier */
6750
6751                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6752                     STRLEN morelen;
6753                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6754                                   TRUE, &morelen);
6755                     if (!morelen)
6756                         Perl_croak(aTHX_ "Bad name after %"SVf"%s",
6757                                         SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6758                                             (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
6759                                 *s == '\'' ? "'" : "::");
6760                     len += morelen;
6761                     pkgname = 1;
6762                 }
6763
6764                 if (PL_expect == XOPERATOR) {
6765                     if (PL_bufptr == PL_linestart) {
6766                         CopLINE_dec(PL_curcop);
6767                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6768                         CopLINE_inc(PL_curcop);
6769                     }
6770                     else
6771                         no_op("Bareword",s);
6772                 }
6773
6774                 /* Look for a subroutine with this name in current package,
6775                    unless name is "Foo::", in which case Foo is a bareword
6776                    (and a package name). */
6777
6778                 if (len > 2 && !PL_madskills &&
6779                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6780                 {
6781                     if (ckWARN(WARN_BAREWORD)
6782                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6783                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6784                             "Bareword \"%"SVf"\" refers to nonexistent package",
6785                              SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6786                                         (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
6787                     len -= 2;
6788                     PL_tokenbuf[len] = '\0';
6789                     gv = NULL;
6790                     gvp = 0;
6791                 }
6792                 else {
6793                     if (!gv) {
6794                         /* Mustn't actually add anything to a symbol table.
6795                            But also don't want to "initialise" any placeholder
6796                            constants that might already be there into full
6797                            blown PVGVs with attached PVCV.  */
6798                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6799                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6800                                                SVt_PVCV);
6801                     }
6802                     len = 0;
6803                 }
6804
6805                 /* if we saw a global override before, get the right name */
6806
6807                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6808                     len ? len : strlen(PL_tokenbuf));
6809                 if (gvp) {
6810                     SV * const tmp_sv = sv;
6811                     sv = newSVpvs("CORE::GLOBAL::");
6812                     sv_catsv(sv, tmp_sv);
6813                     SvREFCNT_dec(tmp_sv);
6814                 }
6815
6816 #ifdef PERL_MAD
6817                 if (PL_madskills && !PL_thistoken) {
6818                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6819                     PL_thistoken = newSVpvn(start,s - start);
6820                     PL_realtokenstart = s - SvPVX(PL_linestr);
6821                 }
6822 #endif
6823
6824                 /* Presume this is going to be a bareword of some sort. */
6825                 CLINE;
6826                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6827                 pl_yylval.opval->op_private = OPpCONST_BARE;
6828
6829                 /* And if "Foo::", then that's what it certainly is. */
6830                 if (len)
6831                     goto safe_bareword;
6832
6833                 {
6834                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6835                     const_op->op_private = OPpCONST_BARE;
6836                     rv2cv_op = newCVREF(0, const_op);
6837                 }
6838                 cv = rv2cv_op_cv(rv2cv_op, 0);
6839
6840                 /* See if it's the indirect object for a list operator. */
6841
6842                 if (PL_oldoldbufptr &&
6843                     PL_oldoldbufptr < PL_bufptr &&
6844                     (PL_oldoldbufptr == PL_last_lop
6845                      || PL_oldoldbufptr == PL_last_uni) &&
6846                     /* NO SKIPSPACE BEFORE HERE! */
6847                     (PL_expect == XREF ||
6848                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6849                 {
6850                     bool immediate_paren = *s == '(';
6851
6852                     /* (Now we can afford to cross potential line boundary.) */
6853                     s = SKIPSPACE2(s,nextPL_nextwhite);
6854 #ifdef PERL_MAD
6855                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6856 #endif
6857
6858                     /* Two barewords in a row may indicate method call. */
6859
6860                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6861                         (tmp = intuit_method(s, gv, cv))) {
6862                         op_free(rv2cv_op);
6863                         if (tmp == METHOD && !PL_lex_allbrackets &&
6864                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6865                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6866                         return REPORT(tmp);
6867                     }
6868
6869                     /* If not a declared subroutine, it's an indirect object. */
6870                     /* (But it's an indir obj regardless for sort.) */
6871                     /* Also, if "_" follows a filetest operator, it's a bareword */
6872
6873                     if (
6874                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6875                          (!cv &&
6876                         (PL_last_lop_op != OP_MAPSTART &&
6877                          PL_last_lop_op != OP_GREPSTART))))
6878                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6879                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6880                        )
6881                     {
6882                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6883                         goto bareword;
6884                     }
6885                 }
6886
6887                 PL_expect = XOPERATOR;
6888 #ifdef PERL_MAD
6889                 if (isSPACE(*s))
6890                     s = SKIPSPACE2(s,nextPL_nextwhite);
6891                 PL_nextwhite = nextPL_nextwhite;
6892 #else
6893                 s = skipspace(s);
6894 #endif
6895
6896                 /* Is this a word before a => operator? */
6897                 if (*s == '=' && s[1] == '>' && !pkgname) {
6898                     op_free(rv2cv_op);
6899                     CLINE;
6900                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6901                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6902                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6903                     TERM(WORD);
6904                 }
6905
6906                 /* If followed by a paren, it's certainly a subroutine. */
6907                 if (*s == '(') {
6908                     CLINE;
6909                     if (cv) {
6910                         d = s + 1;
6911                         while (SPACE_OR_TAB(*d))
6912                             d++;
6913                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6914                             s = d + 1;
6915                             goto its_constant;
6916                         }
6917                     }
6918 #ifdef PERL_MAD
6919                     if (PL_madskills) {
6920                         PL_nextwhite = PL_thiswhite;
6921                         PL_thiswhite = 0;
6922                     }
6923                     start_force(PL_curforce);
6924 #endif
6925                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6926                     PL_expect = XOPERATOR;
6927 #ifdef PERL_MAD
6928                     if (PL_madskills) {
6929                         PL_nextwhite = nextPL_nextwhite;
6930                         curmad('X', PL_thistoken);
6931                         PL_thistoken = newSVpvs("");
6932                     }
6933 #endif
6934                     op_free(rv2cv_op);
6935                     force_next(WORD);
6936                     pl_yylval.ival = 0;
6937                     TOKEN('&');
6938                 }
6939
6940                 /* If followed by var or block, call it a method (unless sub) */
6941
6942                 if ((*s == '$' || *s == '{') && !cv) {
6943                     op_free(rv2cv_op);
6944                     PL_last_lop = PL_oldbufptr;
6945                     PL_last_lop_op = OP_METHOD;
6946                     if (!PL_lex_allbrackets &&
6947                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6948                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6949                     PREBLOCK(METHOD);
6950                 }
6951
6952                 /* If followed by a bareword, see if it looks like indir obj. */
6953
6954                 if (!orig_keyword
6955                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6956                         && (tmp = intuit_method(s, gv, cv))) {
6957                     op_free(rv2cv_op);
6958                     if (tmp == METHOD && !PL_lex_allbrackets &&
6959                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6960                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6961                     return REPORT(tmp);
6962                 }
6963
6964                 /* Not a method, so call it a subroutine (if defined) */
6965
6966                 if (cv) {
6967                     if (lastchar == '-') {
6968                         const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
6969                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6970                                 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
6971                                 SVfARG(tmpsv), SVfARG(tmpsv));
6972                     }
6973                     /* Check for a constant sub */
6974                     if ((sv = cv_const_sv(cv))) {
6975                   its_constant:
6976                         op_free(rv2cv_op);
6977                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6978                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6979                         pl_yylval.opval->op_private = OPpCONST_FOLDED;
6980                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
6981                         TOKEN(WORD);
6982                     }
6983
6984                     op_free(pl_yylval.opval);
6985                     pl_yylval.opval = rv2cv_op;
6986                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6987                     PL_last_lop = PL_oldbufptr;
6988                     PL_last_lop_op = OP_ENTERSUB;
6989                     /* Is there a prototype? */
6990                     if (
6991 #ifdef PERL_MAD
6992                         cv &&
6993 #endif
6994                         SvPOK(cv))
6995                     {
6996                         STRLEN protolen = CvPROTOLEN(cv);
6997                         const char *proto = CvPROTO(cv);
6998                         bool optional;
6999                         if (!protolen)
7000                             TERM(FUNC0SUB);
7001                         if ((optional = *proto == ';'))
7002                           do
7003                             proto++;
7004                           while (*proto == ';');
7005                         if (
7006                             (
7007                                 (
7008                                     *proto == '$' || *proto == '_'
7009                                  || *proto == '*' || *proto == '+'
7010                                 )
7011                              && proto[1] == '\0'
7012                             )
7013                          || (
7014                              *proto == '\\' && proto[1] && proto[2] == '\0'
7015                             )
7016                         )
7017                             UNIPROTO(UNIOPSUB,optional);
7018                         if (*proto == '\\' && proto[1] == '[') {
7019                             const char *p = proto + 2;
7020                             while(*p && *p != ']')
7021                                 ++p;
7022                             if(*p == ']' && !p[1])
7023                                 UNIPROTO(UNIOPSUB,optional);
7024                         }
7025                         if (*proto == '&' && *s == '{') {
7026                             if (PL_curstash)
7027                                 sv_setpvs(PL_subname, "__ANON__");
7028                             else
7029                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7030                             if (!PL_lex_allbrackets &&
7031                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7032                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7033                             PREBLOCK(LSTOPSUB);
7034                         }
7035                     }
7036 #ifdef PERL_MAD
7037                     {
7038                         if (PL_madskills) {
7039                             PL_nextwhite = PL_thiswhite;
7040                             PL_thiswhite = 0;
7041                         }
7042                         start_force(PL_curforce);
7043                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7044                         PL_expect = XTERM;
7045                         if (PL_madskills) {
7046                             PL_nextwhite = nextPL_nextwhite;
7047                             curmad('X', PL_thistoken);
7048                             PL_thistoken = newSVpvs("");
7049                         }
7050                         force_next(WORD);
7051                         if (!PL_lex_allbrackets &&
7052                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7053                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7054                         TOKEN(NOAMP);
7055                     }
7056                 }
7057
7058                 /* Guess harder when madskills require "best effort". */
7059                 if (PL_madskills && (!gv || !GvCVu(gv))) {
7060                     int probable_sub = 0;
7061                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
7062                         probable_sub = 1;
7063                     else if (isALPHA(*s)) {
7064                         char tmpbuf[1024];
7065                         STRLEN tmplen;
7066                         d = s;
7067                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7068                         if (!keyword(tmpbuf, tmplen, 0))
7069                             probable_sub = 1;
7070                         else {
7071                             while (d < PL_bufend && isSPACE(*d))
7072                                 d++;
7073                             if (*d == '=' && d[1] == '>')
7074                                 probable_sub = 1;
7075                         }
7076                     }
7077                     if (probable_sub) {
7078                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7079                                         SVt_PVCV);
7080                         op_free(pl_yylval.opval);
7081                         pl_yylval.opval = rv2cv_op;
7082                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7083                         PL_last_lop = PL_oldbufptr;
7084                         PL_last_lop_op = OP_ENTERSUB;
7085                         PL_nextwhite = PL_thiswhite;
7086                         PL_thiswhite = 0;
7087                         start_force(PL_curforce);
7088                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7089                         PL_expect = XTERM;
7090                         PL_nextwhite = nextPL_nextwhite;
7091                         curmad('X', PL_thistoken);
7092                         PL_thistoken = newSVpvs("");
7093                         force_next(WORD);
7094                         if (!PL_lex_allbrackets &&
7095                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7096                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7097                         TOKEN(NOAMP);
7098                     }
7099 #else
7100                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7101                     PL_expect = XTERM;
7102                     force_next(WORD);
7103                     if (!PL_lex_allbrackets &&
7104                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7105                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7106                     TOKEN(NOAMP);
7107 #endif
7108                 }
7109
7110                 /* Call it a bare word */
7111
7112                 if (PL_hints & HINT_STRICT_SUBS)
7113                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7114                 else {
7115                 bareword:
7116                     /* after "print" and similar functions (corresponding to
7117                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7118                      * a filehandle should be subject to "strict subs".
7119                      * Likewise for the optional indirect-object argument to system
7120                      * or exec, which can't be a bareword */
7121                     if ((PL_last_lop_op == OP_PRINT
7122                             || PL_last_lop_op == OP_PRTF
7123                             || PL_last_lop_op == OP_SAY
7124                             || PL_last_lop_op == OP_SYSTEM
7125                             || PL_last_lop_op == OP_EXEC)
7126                             && (PL_hints & HINT_STRICT_SUBS))
7127                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7128                     if (lastchar != '-') {
7129                         if (ckWARN(WARN_RESERVED)) {
7130                             d = PL_tokenbuf;
7131                             while (isLOWER(*d))
7132                                 d++;
7133                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7134                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7135                                        PL_tokenbuf);
7136                         }
7137                     }
7138                 }
7139                 op_free(rv2cv_op);
7140
7141             safe_bareword:
7142                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7143                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7144                                      "Operator or semicolon missing before %c%"SVf,
7145                                      lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7146                                                     strlen(PL_tokenbuf),
7147                                                     SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
7148                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7149                                      "Ambiguous use of %c resolved as operator %c",
7150                                      lastchar, lastchar);
7151                 }
7152                 TOKEN(WORD);
7153             }
7154
7155         case KEY___FILE__:
7156             FUN0OP(
7157                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7158             );
7159
7160         case KEY___LINE__:
7161             FUN0OP(
7162                 (OP*)newSVOP(OP_CONST, 0,
7163                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7164             );
7165
7166         case KEY___PACKAGE__:
7167             FUN0OP(
7168                 (OP*)newSVOP(OP_CONST, 0,
7169                                         (PL_curstash
7170                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7171                                          : &PL_sv_undef))
7172             );
7173
7174         case KEY___DATA__:
7175         case KEY___END__: {
7176             GV *gv;
7177             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7178                 const char *pname = "main";
7179                 STRLEN plen = 4;
7180                 U32 putf8 = 0;
7181                 if (PL_tokenbuf[2] == 'D')
7182                 {
7183                     HV * const stash =
7184                         PL_curstash ? PL_curstash : PL_defstash;
7185                     pname = HvNAME_get(stash);
7186                     plen  = HvNAMELEN (stash);
7187                     if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7188                 }
7189                 gv = gv_fetchpvn_flags(
7190                         Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7191                         plen+6, GV_ADD|putf8, SVt_PVIO
7192                 );
7193                 GvMULTI_on(gv);
7194                 if (!GvIO(gv))
7195                     GvIOp(gv) = newIO();
7196                 IoIFP(GvIOp(gv)) = PL_rsfp;
7197 #if defined(HAS_FCNTL) && defined(F_SETFD)
7198                 {
7199                     const int fd = PerlIO_fileno(PL_rsfp);
7200                     fcntl(fd,F_SETFD,fd >= 3);
7201                 }
7202 #endif
7203                 /* Mark this internal pseudo-handle as clean */
7204                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7205                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7206                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7207                 else
7208                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7209 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7210                 /* if the script was opened in binmode, we need to revert
7211                  * it to text mode for compatibility; but only iff it has CRs
7212                  * XXX this is a questionable hack at best. */
7213                 if (PL_bufend-PL_bufptr > 2
7214                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7215                 {
7216                     Off_t loc = 0;
7217                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7218                         loc = PerlIO_tell(PL_rsfp);
7219                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7220                     }
7221 #ifdef NETWARE
7222                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7223 #else
7224                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7225 #endif  /* NETWARE */
7226                         if (loc > 0)
7227                             PerlIO_seek(PL_rsfp, loc, 0);
7228                     }
7229                 }
7230 #endif
7231 #ifdef PERLIO_LAYERS
7232                 if (!IN_BYTES) {
7233                     if (UTF)
7234                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7235                     else if (PL_encoding) {
7236                         SV *name;
7237                         dSP;
7238                         ENTER;
7239                         SAVETMPS;
7240                         PUSHMARK(sp);
7241                         EXTEND(SP, 1);
7242                         XPUSHs(PL_encoding);
7243                         PUTBACK;
7244                         call_method("name", G_SCALAR);
7245                         SPAGAIN;
7246                         name = POPs;
7247                         PUTBACK;
7248                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7249                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7250                                                       SVfARG(name)));
7251                         FREETMPS;
7252                         LEAVE;
7253                     }
7254                 }
7255 #endif
7256 #ifdef PERL_MAD
7257                 if (PL_madskills) {
7258                     if (PL_realtokenstart >= 0) {
7259                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7260                         if (!PL_endwhite)
7261                             PL_endwhite = newSVpvs("");
7262                         sv_catsv(PL_endwhite, PL_thiswhite);
7263                         PL_thiswhite = 0;
7264                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7265                         PL_realtokenstart = -1;
7266                     }
7267                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7268                            != NULL) ;
7269                 }
7270 #endif
7271                 PL_rsfp = NULL;
7272             }
7273             goto fake_eof;
7274         }
7275
7276         case KEY___SUB__:
7277             FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7278
7279         case KEY_AUTOLOAD:
7280         case KEY_DESTROY:
7281         case KEY_BEGIN:
7282         case KEY_UNITCHECK:
7283         case KEY_CHECK:
7284         case KEY_INIT:
7285         case KEY_END:
7286             if (PL_expect == XSTATE) {
7287                 s = PL_bufptr;
7288                 goto really_sub;
7289             }
7290             goto just_a_word;
7291
7292         case KEY_CORE:
7293             if (*s == ':' && s[1] == ':') {
7294                 STRLEN olen = len;
7295                 d = s;
7296                 s += 2;
7297                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7298                 if ((*s == ':' && s[1] == ':')
7299                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7300                 {
7301                     s = d;
7302                     len = olen;
7303                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7304                     goto just_a_word;
7305                 }
7306                 if (!tmp)
7307                     Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7308                                     SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7309                                                 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
7310                 if (tmp < 0)
7311                     tmp = -tmp;
7312                 else if (tmp == KEY_require || tmp == KEY_do
7313                       || tmp == KEY_glob)
7314                     /* that's a way to remember we saw "CORE::" */
7315                     orig_keyword = tmp;
7316                 goto reserved_word;
7317             }
7318             goto just_a_word;
7319
7320         case KEY_abs:
7321             UNI(OP_ABS);
7322
7323         case KEY_alarm:
7324             UNI(OP_ALARM);
7325
7326         case KEY_accept:
7327             LOP(OP_ACCEPT,XTERM);
7328
7329         case KEY_and:
7330             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7331                 return REPORT(0);
7332             OPERATOR(ANDOP);
7333
7334         case KEY_atan2:
7335             LOP(OP_ATAN2,XTERM);
7336
7337         case KEY_bind:
7338             LOP(OP_BIND,XTERM);
7339
7340         case KEY_binmode:
7341             LOP(OP_BINMODE,XTERM);
7342
7343         case KEY_bless:
7344             LOP(OP_BLESS,XTERM);
7345
7346         case KEY_break:
7347             FUN0(OP_BREAK);
7348
7349         case KEY_chop:
7350             UNI(OP_CHOP);
7351
7352         case KEY_continue:
7353                     /* We have to disambiguate the two senses of
7354                       "continue". If the next token is a '{' then
7355                       treat it as the start of a continue block;
7356                       otherwise treat it as a control operator.
7357                      */
7358                     s = skipspace(s);
7359                     if (*s == '{')
7360             PREBLOCK(CONTINUE);
7361                     else
7362                         FUN0(OP_CONTINUE);
7363
7364         case KEY_chdir:
7365             /* may use HOME */
7366             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7367             UNI(OP_CHDIR);
7368
7369         case KEY_close:
7370             UNI(OP_CLOSE);
7371
7372         case KEY_closedir:
7373             UNI(OP_CLOSEDIR);
7374
7375         case KEY_cmp:
7376             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7377                 return REPORT(0);
7378             Eop(OP_SCMP);
7379
7380         case KEY_caller:
7381             UNI(OP_CALLER);
7382
7383         case KEY_crypt:
7384 #ifdef FCRYPT
7385             if (!PL_cryptseen) {
7386                 PL_cryptseen = TRUE;
7387                 init_des();
7388             }
7389 #endif
7390             LOP(OP_CRYPT,XTERM);
7391
7392         case KEY_chmod:
7393             LOP(OP_CHMOD,XTERM);
7394
7395         case KEY_chown:
7396             LOP(OP_CHOWN,XTERM);
7397
7398         case KEY_connect:
7399             LOP(OP_CONNECT,XTERM);
7400
7401         case KEY_chr:
7402             UNI(OP_CHR);
7403
7404         case KEY_cos:
7405             UNI(OP_COS);
7406
7407         case KEY_chroot:
7408             UNI(OP_CHROOT);
7409
7410         case KEY_default:
7411             PREBLOCK(DEFAULT);
7412
7413         case KEY_do:
7414             s = SKIPSPACE1(s);
7415             if (*s == '{')
7416                 PRETERMBLOCK(DO);
7417             if (*s != '\'') {
7418                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, 1, &len);
7419                 if (len) {
7420                     d = SKIPSPACE1(d);
7421                     if (*d == '(') s = force_word(s,WORD,TRUE,TRUE,FALSE);
7422                 }
7423             }
7424             if (orig_keyword == KEY_do) {
7425                 orig_keyword = 0;
7426                 pl_yylval.ival = 1;
7427             }
7428             else
7429                 pl_yylval.ival = 0;
7430             OPERATOR(DO);
7431
7432         case KEY_die:
7433             PL_hints |= HINT_BLOCK_SCOPE;
7434             LOP(OP_DIE,XTERM);
7435
7436         case KEY_defined:
7437             UNI(OP_DEFINED);
7438
7439         case KEY_delete:
7440             UNI(OP_DELETE);
7441
7442         case KEY_dbmopen:
7443             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7444                               STR_WITH_LEN("NDBM_File::"),
7445                               STR_WITH_LEN("DB_File::"),
7446                               STR_WITH_LEN("GDBM_File::"),
7447                               STR_WITH_LEN("SDBM_File::"),
7448                               STR_WITH_LEN("ODBM_File::"),
7449                               NULL);
7450             LOP(OP_DBMOPEN,XTERM);
7451
7452         case KEY_dbmclose:
7453             UNI(OP_DBMCLOSE);
7454
7455         case KEY_dump:
7456             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7457             LOOPX(OP_DUMP);
7458
7459         case KEY_else:
7460             PREBLOCK(ELSE);
7461
7462         case KEY_elsif:
7463             pl_yylval.ival = CopLINE(PL_curcop);
7464             OPERATOR(ELSIF);
7465
7466         case KEY_eq:
7467             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7468                 return REPORT(0);
7469             Eop(OP_SEQ);
7470
7471         case KEY_exists:
7472             UNI(OP_EXISTS);
7473
7474         case KEY_exit:
7475             if (PL_madskills)
7476                 UNI(OP_INT);
7477             UNI(OP_EXIT);
7478
7479         case KEY_eval:
7480             s = SKIPSPACE1(s);
7481             if (*s == '{') { /* block eval */
7482                 PL_expect = XTERMBLOCK;
7483                 UNIBRACK(OP_ENTERTRY);
7484             }
7485             else { /* string eval */
7486                 PL_expect = XTERM;
7487                 UNIBRACK(OP_ENTEREVAL);
7488             }
7489
7490         case KEY_evalbytes:
7491             PL_expect = XTERM;
7492             UNIBRACK(-OP_ENTEREVAL);
7493
7494         case KEY_eof:
7495             UNI(OP_EOF);
7496
7497         case KEY_exp:
7498             UNI(OP_EXP);
7499
7500         case KEY_each:
7501             UNI(OP_EACH);
7502
7503         case KEY_exec:
7504             LOP(OP_EXEC,XREF);
7505
7506         case KEY_endhostent:
7507             FUN0(OP_EHOSTENT);
7508
7509         case KEY_endnetent:
7510             FUN0(OP_ENETENT);
7511
7512         case KEY_endservent:
7513             FUN0(OP_ESERVENT);
7514
7515         case KEY_endprotoent:
7516             FUN0(OP_EPROTOENT);
7517
7518         case KEY_endpwent:
7519             FUN0(OP_EPWENT);
7520
7521         case KEY_endgrent:
7522             FUN0(OP_EGRENT);
7523
7524         case KEY_for:
7525         case KEY_foreach:
7526             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7527                 return REPORT(0);
7528             pl_yylval.ival = CopLINE(PL_curcop);
7529             s = SKIPSPACE1(s);
7530             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7531                 char *p = s;
7532 #ifdef PERL_MAD
7533                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7534 #endif
7535
7536                 if ((PL_bufend - p) >= 3 &&
7537                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7538                     p += 2;
7539                 else if ((PL_bufend - p) >= 4 &&
7540                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7541                     p += 3;
7542                 p = PEEKSPACE(p);
7543                 if (isIDFIRST_lazy_if(p,UTF)) {
7544                     p = scan_ident(p, PL_bufend,
7545                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7546                     p = PEEKSPACE(p);
7547                 }
7548                 if (*p != '$')
7549                     Perl_croak(aTHX_ "Missing $ on loop variable");
7550 #ifdef PERL_MAD
7551                 s = SvPVX(PL_linestr) + soff;
7552 #endif
7553             }
7554             OPERATOR(FOR);
7555
7556         case KEY_formline:
7557             LOP(OP_FORMLINE,XTERM);
7558
7559         case KEY_fork:
7560             FUN0(OP_FORK);
7561
7562         case KEY_fc:
7563             UNI(OP_FC);
7564
7565         case KEY_fcntl:
7566             LOP(OP_FCNTL,XTERM);
7567
7568         case KEY_fileno:
7569             UNI(OP_FILENO);
7570
7571         case KEY_flock:
7572             LOP(OP_FLOCK,XTERM);
7573
7574         case KEY_gt:
7575             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7576                 return REPORT(0);
7577             Rop(OP_SGT);
7578
7579         case KEY_ge:
7580             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7581                 return REPORT(0);
7582             Rop(OP_SGE);
7583
7584         case KEY_grep:
7585             LOP(OP_GREPSTART, XREF);
7586
7587         case KEY_goto:
7588             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7589             LOOPX(OP_GOTO);
7590
7591         case KEY_gmtime:
7592             UNI(OP_GMTIME);
7593
7594         case KEY_getc:
7595             UNIDOR(OP_GETC);
7596
7597         case KEY_getppid:
7598             FUN0(OP_GETPPID);
7599
7600         case KEY_getpgrp:
7601             UNI(OP_GETPGRP);
7602
7603         case KEY_getpriority:
7604             LOP(OP_GETPRIORITY,XTERM);
7605
7606         case KEY_getprotobyname:
7607             UNI(OP_GPBYNAME);
7608
7609         case KEY_getprotobynumber:
7610             LOP(OP_GPBYNUMBER,XTERM);
7611
7612         case KEY_getprotoent:
7613             FUN0(OP_GPROTOENT);
7614
7615         case KEY_getpwent:
7616             FUN0(OP_GPWENT);
7617
7618         case KEY_getpwnam:
7619             UNI(OP_GPWNAM);
7620
7621         case KEY_getpwuid:
7622             UNI(OP_GPWUID);
7623
7624         case KEY_getpeername:
7625             UNI(OP_GETPEERNAME);
7626
7627         case KEY_gethostbyname:
7628             UNI(OP_GHBYNAME);
7629
7630         case KEY_gethostbyaddr:
7631             LOP(OP_GHBYADDR,XTERM);
7632
7633         case KEY_gethostent:
7634             FUN0(OP_GHOSTENT);
7635
7636         case KEY_getnetbyname:
7637             UNI(OP_GNBYNAME);
7638
7639         case KEY_getnetbyaddr:
7640             LOP(OP_GNBYADDR,XTERM);
7641
7642         case KEY_getnetent:
7643             FUN0(OP_GNETENT);
7644
7645         case KEY_getservbyname:
7646             LOP(OP_GSBYNAME,XTERM);
7647
7648         case KEY_getservbyport:
7649             LOP(OP_GSBYPORT,XTERM);
7650
7651         case KEY_getservent:
7652             FUN0(OP_GSERVENT);
7653
7654         case KEY_getsockname:
7655             UNI(OP_GETSOCKNAME);
7656
7657         case KEY_getsockopt:
7658             LOP(OP_GSOCKOPT,XTERM);
7659
7660         case KEY_getgrent:
7661             FUN0(OP_GGRENT);
7662
7663         case KEY_getgrnam:
7664             UNI(OP_GGRNAM);
7665
7666         case KEY_getgrgid:
7667             UNI(OP_GGRGID);
7668
7669         case KEY_getlogin:
7670             FUN0(OP_GETLOGIN);
7671
7672         case KEY_given:
7673             pl_yylval.ival = CopLINE(PL_curcop);
7674             OPERATOR(GIVEN);
7675
7676         case KEY_glob:
7677             LOP(
7678              orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7679              XTERM
7680             );
7681
7682         case KEY_hex:
7683             UNI(OP_HEX);
7684
7685         case KEY_if:
7686             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7687                 return REPORT(0);
7688             pl_yylval.ival = CopLINE(PL_curcop);
7689             OPERATOR(IF);
7690
7691         case KEY_index:
7692             LOP(OP_INDEX,XTERM);
7693
7694         case KEY_int:
7695             UNI(OP_INT);
7696
7697         case KEY_ioctl:
7698             LOP(OP_IOCTL,XTERM);
7699
7700         case KEY_join:
7701             LOP(OP_JOIN,XTERM);
7702
7703         case KEY_keys:
7704             UNI(OP_KEYS);
7705
7706         case KEY_kill:
7707             LOP(OP_KILL,XTERM);
7708
7709         case KEY_last:
7710             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7711             LOOPX(OP_LAST);
7712
7713         case KEY_lc:
7714             UNI(OP_LC);
7715
7716         case KEY_lcfirst:
7717             UNI(OP_LCFIRST);
7718
7719         case KEY_local:
7720             pl_yylval.ival = 0;
7721             OPERATOR(LOCAL);
7722
7723         case KEY_length:
7724             UNI(OP_LENGTH);
7725
7726         case KEY_lt:
7727             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7728                 return REPORT(0);
7729             Rop(OP_SLT);
7730
7731         case KEY_le:
7732             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7733                 return REPORT(0);
7734             Rop(OP_SLE);
7735
7736         case KEY_localtime:
7737             UNI(OP_LOCALTIME);
7738
7739         case KEY_log:
7740             UNI(OP_LOG);
7741
7742         case KEY_link:
7743             LOP(OP_LINK,XTERM);
7744
7745         case KEY_listen:
7746             LOP(OP_LISTEN,XTERM);
7747
7748         case KEY_lock:
7749             UNI(OP_LOCK);
7750
7751         case KEY_lstat:
7752             UNI(OP_LSTAT);
7753
7754         case KEY_m:
7755             s = scan_pat(s,OP_MATCH);
7756             TERM(sublex_start());
7757
7758         case KEY_map:
7759             LOP(OP_MAPSTART, XREF);
7760
7761         case KEY_mkdir:
7762             LOP(OP_MKDIR,XTERM);
7763
7764         case KEY_msgctl:
7765             LOP(OP_MSGCTL,XTERM);
7766
7767         case KEY_msgget:
7768             LOP(OP_MSGGET,XTERM);
7769
7770         case KEY_msgrcv:
7771             LOP(OP_MSGRCV,XTERM);
7772
7773         case KEY_msgsnd:
7774             LOP(OP_MSGSND,XTERM);
7775
7776         case KEY_our:
7777         case KEY_my:
7778         case KEY_state:
7779             PL_in_my = (U16)tmp;
7780             s = SKIPSPACE1(s);
7781             if (isIDFIRST_lazy_if(s,UTF)) {
7782 #ifdef PERL_MAD
7783                 char* start = s;
7784 #endif
7785                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7786                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7787                     goto really_sub;
7788                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7789                 if (!PL_in_my_stash) {
7790                     char tmpbuf[1024];
7791                     PL_bufptr = s;
7792                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7793                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7794                 }
7795 #ifdef PERL_MAD
7796                 if (PL_madskills) {     /* just add type to declarator token */
7797                     sv_catsv(PL_thistoken, PL_nextwhite);
7798                     PL_nextwhite = 0;
7799                     sv_catpvn(PL_thistoken, start, s - start);
7800                 }
7801 #endif
7802             }
7803             pl_yylval.ival = 1;
7804             OPERATOR(MY);
7805
7806         case KEY_next:
7807             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7808             LOOPX(OP_NEXT);
7809
7810         case KEY_ne:
7811             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7812                 return REPORT(0);
7813             Eop(OP_SNE);
7814
7815         case KEY_no:
7816             s = tokenize_use(0, s);
7817             TERM(USE);
7818
7819         case KEY_not:
7820             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7821                 FUN1(OP_NOT);
7822             else {
7823                 if (!PL_lex_allbrackets &&
7824                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7825                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7826                 OPERATOR(NOTOP);
7827             }
7828
7829         case KEY_open:
7830             s = SKIPSPACE1(s);
7831             if (isIDFIRST_lazy_if(s,UTF)) {
7832                 const char *t;
7833                 for (d = s; isALNUM_lazy_if(d,UTF);) {
7834                     d += UTF ? UTF8SKIP(d) : 1;
7835                     if (UTF) {
7836                         while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
7837                             d += UTF ? UTF8SKIP(d) : 1;
7838                         }
7839                     }
7840                 }
7841                 for (t=d; isSPACE(*t);)
7842                     t++;
7843                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7844                     /* [perl #16184] */
7845                     && !(t[0] == '=' && t[1] == '>')
7846                     && !(t[0] == ':' && t[1] == ':')
7847                     && !keyword(s, d-s, 0)
7848                 ) {
7849                     SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
7850                                                 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
7851                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7852                            "Precedence problem: open %"SVf" should be open(%"SVf")",
7853                             SVfARG(tmpsv), SVfARG(tmpsv));
7854                 }
7855             }
7856             LOP(OP_OPEN,XTERM);
7857
7858         case KEY_or:
7859             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7860                 return REPORT(0);
7861             pl_yylval.ival = OP_OR;
7862             OPERATOR(OROP);
7863
7864         case KEY_ord:
7865             UNI(OP_ORD);
7866
7867         case KEY_oct:
7868             UNI(OP_OCT);
7869
7870         case KEY_opendir:
7871             LOP(OP_OPEN_DIR,XTERM);
7872
7873         case KEY_print:
7874             checkcomma(s,PL_tokenbuf,"filehandle");
7875             LOP(OP_PRINT,XREF);
7876
7877         case KEY_printf:
7878             checkcomma(s,PL_tokenbuf,"filehandle");
7879             LOP(OP_PRTF,XREF);
7880
7881         case KEY_prototype:
7882             UNI(OP_PROTOTYPE);
7883
7884         case KEY_push:
7885             LOP(OP_PUSH,XTERM);
7886
7887         case KEY_pop:
7888             UNIDOR(OP_POP);
7889
7890         case KEY_pos:
7891             UNIDOR(OP_POS);
7892
7893         case KEY_pack:
7894             LOP(OP_PACK,XTERM);
7895
7896         case KEY_package:
7897             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7898             s = SKIPSPACE1(s);
7899             s = force_strict_version(s);
7900             PL_lex_expect = XBLOCK;
7901             OPERATOR(PACKAGE);
7902
7903         case KEY_pipe:
7904             LOP(OP_PIPE_OP,XTERM);
7905
7906         case KEY_q:
7907             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7908             if (!s)
7909                 missingterm(NULL);
7910             pl_yylval.ival = OP_CONST;
7911             TERM(sublex_start());
7912
7913         case KEY_quotemeta:
7914             UNI(OP_QUOTEMETA);
7915
7916         case KEY_qw: {
7917             OP *words = NULL;
7918             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7919             if (!s)
7920                 missingterm(NULL);
7921             PL_expect = XOPERATOR;
7922             if (SvCUR(PL_lex_stuff)) {
7923                 int warned_comma = !ckWARN(WARN_QW);
7924                 int warned_comment = warned_comma;
7925                 d = SvPV_force(PL_lex_stuff, len);
7926                 while (len) {
7927                     for (; isSPACE(*d) && len; --len, ++d)
7928                         /**/;
7929                     if (len) {
7930                         SV *sv;
7931                         const char *b = d;
7932                         if (!warned_comma || !warned_comment) {
7933                             for (; !isSPACE(*d) && len; --len, ++d) {
7934                                 if (!warned_comma && *d == ',') {
7935                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7936                                         "Possible attempt to separate words with commas");
7937                                     ++warned_comma;
7938                                 }
7939                                 else if (!warned_comment && *d == '#') {
7940                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7941                                         "Possible attempt to put comments in qw() list");
7942                                     ++warned_comment;
7943                                 }
7944                             }
7945                         }
7946                         else {
7947                             for (; !isSPACE(*d) && len; --len, ++d)
7948                                 /**/;
7949                         }
7950                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7951                         words = op_append_elem(OP_LIST, words,
7952                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7953                     }
7954                 }
7955             }
7956             if (!words)
7957                 words = newNULLLIST();
7958             if (PL_lex_stuff) {
7959                 SvREFCNT_dec(PL_lex_stuff);
7960                 PL_lex_stuff = NULL;
7961             }
7962             PL_expect = XOPERATOR;
7963             pl_yylval.opval = sawparens(words);
7964             TOKEN(QWLIST);
7965         }
7966
7967         case KEY_qq:
7968             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7969             if (!s)
7970                 missingterm(NULL);
7971             pl_yylval.ival = OP_STRINGIFY;
7972             if (SvIVX(PL_lex_stuff) == '\'')
7973                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7974             TERM(sublex_start());
7975
7976         case KEY_qr:
7977             s = scan_pat(s,OP_QR);
7978             TERM(sublex_start());
7979
7980         case KEY_qx:
7981             s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7982             if (!s)
7983                 missingterm(NULL);
7984             readpipe_override();
7985             TERM(sublex_start());
7986
7987         case KEY_return:
7988             OLDLOP(OP_RETURN);
7989
7990         case KEY_require:
7991             s = SKIPSPACE1(s);
7992             if (isDIGIT(*s)) {
7993                 s = force_version(s, FALSE);
7994             }
7995             else if (*s != 'v' || !isDIGIT(s[1])
7996                     || (s = force_version(s, TRUE), *s == 'v'))
7997             {
7998                 *PL_tokenbuf = '\0';
7999                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8000                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8001                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8002                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8003                 else if (*s == '<')
8004                     yyerror("<> should be quotes");
8005             }
8006             if (orig_keyword == KEY_require) {
8007                 orig_keyword = 0;
8008                 pl_yylval.ival = 1;
8009             }
8010             else
8011                 pl_yylval.ival = 0;
8012             PL_expect = XTERM;
8013             PL_bufptr = s;
8014             PL_last_uni = PL_oldbufptr;
8015             PL_last_lop_op = OP_REQUIRE;
8016             s = skipspace(s);
8017             return REPORT( (int)REQUIRE );
8018
8019         case KEY_reset:
8020             UNI(OP_RESET);
8021
8022         case KEY_redo:
8023             s = force_word(s,WORD,TRUE,FALSE,FALSE);
8024             LOOPX(OP_REDO);
8025
8026         case KEY_rename:
8027             LOP(OP_RENAME,XTERM);
8028
8029         case KEY_rand:
8030             UNI(OP_RAND);
8031
8032         case KEY_rmdir:
8033             UNI(OP_RMDIR);
8034
8035         case KEY_rindex:
8036             LOP(OP_RINDEX,XTERM);
8037
8038         case KEY_read:
8039             LOP(OP_READ,XTERM);
8040
8041         case KEY_readdir:
8042             UNI(OP_READDIR);
8043
8044         case KEY_readline:
8045             UNIDOR(OP_READLINE);
8046
8047         case KEY_readpipe:
8048             UNIDOR(OP_BACKTICK);
8049
8050         case KEY_rewinddir:
8051             UNI(OP_REWINDDIR);
8052
8053         case KEY_recv:
8054             LOP(OP_RECV,XTERM);
8055
8056         case KEY_reverse:
8057             LOP(OP_REVERSE,XTERM);
8058
8059         case KEY_readlink:
8060             UNIDOR(OP_READLINK);
8061
8062         case KEY_ref:
8063             UNI(OP_REF);
8064
8065         case KEY_s:
8066             s = scan_subst(s);
8067             if (pl_yylval.opval)
8068                 TERM(sublex_start());
8069             else
8070                 TOKEN(1);       /* force error */
8071
8072         case KEY_say:
8073             checkcomma(s,PL_tokenbuf,"filehandle");
8074             LOP(OP_SAY,XREF);
8075
8076         case KEY_chomp:
8077             UNI(OP_CHOMP);
8078
8079         case KEY_scalar:
8080             UNI(OP_SCALAR);
8081
8082         case KEY_select:
8083             LOP(OP_SELECT,XTERM);
8084
8085         case KEY_seek:
8086             LOP(OP_SEEK,XTERM);
8087
8088         case KEY_semctl:
8089             LOP(OP_SEMCTL,XTERM);
8090
8091         case KEY_semget:
8092             LOP(OP_SEMGET,XTERM);
8093
8094         case KEY_semop:
8095             LOP(OP_SEMOP,XTERM);
8096
8097         case KEY_send:
8098             LOP(OP_SEND,XTERM);
8099
8100         case KEY_setpgrp:
8101             LOP(OP_SETPGRP,XTERM);
8102
8103         case KEY_setpriority:
8104             LOP(OP_SETPRIORITY,XTERM);
8105
8106         case KEY_sethostent:
8107             UNI(OP_SHOSTENT);
8108
8109         case KEY_setnetent:
8110             UNI(OP_SNETENT);
8111
8112         case KEY_setservent:
8113             UNI(OP_SSERVENT);
8114
8115         case KEY_setprotoent:
8116             UNI(OP_SPROTOENT);
8117
8118         case KEY_setpwent:
8119             FUN0(OP_SPWENT);
8120
8121         case KEY_setgrent:
8122             FUN0(OP_SGRENT);
8123
8124         case KEY_seekdir:
8125             LOP(OP_SEEKDIR,XTERM);
8126
8127         case KEY_setsockopt:
8128             LOP(OP_SSOCKOPT,XTERM);
8129
8130         case KEY_shift:
8131             UNIDOR(OP_SHIFT);
8132
8133         case KEY_shmctl:
8134             LOP(OP_SHMCTL,XTERM);
8135
8136         case KEY_shmget:
8137             LOP(OP_SHMGET,XTERM);
8138
8139         case KEY_shmread:
8140             LOP(OP_SHMREAD,XTERM);
8141
8142         case KEY_shmwrite:
8143             LOP(OP_SHMWRITE,XTERM);
8144
8145         case KEY_shutdown:
8146             LOP(OP_SHUTDOWN,XTERM);
8147
8148         case KEY_sin:
8149             UNI(OP_SIN);
8150
8151         case KEY_sleep:
8152             UNI(OP_SLEEP);
8153
8154         case KEY_socket:
8155             LOP(OP_SOCKET,XTERM);
8156
8157         case KEY_socketpair:
8158             LOP(OP_SOCKPAIR,XTERM);
8159
8160         case KEY_sort:
8161             checkcomma(s,PL_tokenbuf,"subroutine name");
8162             s = SKIPSPACE1(s);
8163             PL_expect = XTERM;
8164             s = force_word(s,WORD,TRUE,TRUE,FALSE);
8165             LOP(OP_SORT,XREF);
8166
8167         case KEY_split:
8168             LOP(OP_SPLIT,XTERM);
8169
8170         case KEY_sprintf:
8171             LOP(OP_SPRINTF,XTERM);
8172
8173         case KEY_splice:
8174             LOP(OP_SPLICE,XTERM);
8175
8176         case KEY_sqrt:
8177             UNI(OP_SQRT);
8178
8179         case KEY_srand:
8180             UNI(OP_SRAND);
8181
8182         case KEY_stat:
8183             UNI(OP_STAT);
8184
8185         case KEY_study:
8186             UNI(OP_STUDY);
8187
8188         case KEY_substr:
8189             LOP(OP_SUBSTR,XTERM);
8190
8191         case KEY_format:
8192         case KEY_sub:
8193           really_sub:
8194             {
8195                 char tmpbuf[sizeof PL_tokenbuf];
8196                 SSize_t tboffset = 0;
8197                 expectation attrful;
8198                 bool have_name, have_proto;
8199                 const int key = tmp;
8200
8201 #ifdef PERL_MAD
8202                 SV *tmpwhite = 0;
8203
8204                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8205                 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
8206                 PL_thistoken = 0;
8207
8208                 d = s;
8209                 s = SKIPSPACE2(s,tmpwhite);
8210 #else
8211                 s = skipspace(s);
8212 #endif
8213
8214                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8215                     (*s == ':' && s[1] == ':'))
8216                 {
8217 #ifdef PERL_MAD
8218                     SV *nametoke = NULL;
8219 #endif
8220
8221                     PL_expect = XBLOCK;
8222                     attrful = XATTRBLOCK;
8223                     /* remember buffer pos'n for later force_word */
8224                     tboffset = s - PL_oldbufptr;
8225                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
8226 #ifdef PERL_MAD
8227                     if (PL_madskills)
8228                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8229 #endif
8230                     if (memchr(tmpbuf, ':', len))
8231                         sv_setpvn(PL_subname, tmpbuf, len);
8232                     else {
8233                         sv_setsv(PL_subname,PL_curstname);
8234                         sv_catpvs(PL_subname,"::");
8235                         sv_catpvn(PL_subname,tmpbuf,len);
8236                     }
8237                     if (SvUTF8(PL_linestr))
8238                         SvUTF8_on(PL_subname);
8239                     have_name = TRUE;
8240
8241 #ifdef PERL_MAD
8242
8243                     start_force(0);
8244                     CURMAD('X', nametoke);
8245                     CURMAD('_', tmpwhite);
8246                     (void) force_word(PL_oldbufptr + tboffset, WORD,
8247                                       FALSE, TRUE, TRUE);
8248
8249                     s = SKIPSPACE2(d,tmpwhite);
8250 #else
8251                     s = skipspace(d);
8252 #endif
8253                 }
8254                 else {
8255                     if (key == KEY_my)
8256                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
8257                     PL_expect = XTERMBLOCK;
8258                     attrful = XATTRTERM;
8259                     sv_setpvs(PL_subname,"?");
8260                     have_name = FALSE;
8261                 }
8262
8263                 if (key == KEY_format) {
8264 #ifdef PERL_MAD
8265                     PL_thistoken = subtoken;
8266                     s = d;
8267 #else
8268                     if (have_name)
8269                         (void) force_word(PL_oldbufptr + tboffset, WORD,
8270                                           FALSE, TRUE, TRUE);
8271 #endif
8272                     PREBLOCK(FORMAT);
8273                 }
8274
8275                 /* Look for a prototype */
8276                 if (*s == '(') {
8277                     char *p;
8278                     bool bad_proto = FALSE;
8279                     bool in_brackets = FALSE;
8280                     char greedy_proto = ' ';
8281                     bool proto_after_greedy_proto = FALSE;
8282                     bool must_be_last = FALSE;
8283                     bool underscore = FALSE;
8284                     bool seen_underscore = FALSE;
8285                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8286                     STRLEN tmplen;
8287
8288                     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8289                     if (!s)
8290                         Perl_croak(aTHX_ "Prototype not terminated");
8291                     /* strip spaces and check for bad characters */
8292                     d = SvPV(PL_lex_stuff, tmplen);
8293                     tmp = 0;
8294                     for (p = d; tmplen; tmplen--, ++p) {
8295                         if (!isSPACE(*p)) {
8296                             d[tmp++] = *p;
8297
8298                             if (warnillegalproto) {
8299                                 if (must_be_last)
8300                                     proto_after_greedy_proto = TRUE;
8301                                 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8302                                     bad_proto = TRUE;
8303                                 }
8304                                 else {
8305                                     if ( underscore ) {
8306                                         if ( !strchr(";@%", *p) )
8307                                             bad_proto = TRUE;
8308                                         underscore = FALSE;
8309                                     }
8310                                     if ( *p == '[' ) {
8311                                         in_brackets = TRUE;
8312                                     }
8313                                     else if ( *p == ']' ) {
8314                                         in_brackets = FALSE;
8315                                     }
8316                                     else if ( (*p == '@' || *p == '%') &&
8317                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8318                                          !in_brackets ) {
8319                                         must_be_last = TRUE;
8320                                         greedy_proto = *p;
8321                                     }
8322                                     else if ( *p == '_' ) {
8323                                         underscore = seen_underscore = TRUE;
8324                                     }
8325                                 }
8326                             }
8327                         }
8328                     }
8329                     d[tmp] = '\0';
8330                     if (proto_after_greedy_proto)
8331                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8332                                     "Prototype after '%c' for %"SVf" : %s",
8333                                     greedy_proto, SVfARG(PL_subname), d);
8334                     if (bad_proto) {
8335                         SV *dsv = newSVpvs_flags("", SVs_TEMP);
8336                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8337                                     "Illegal character %sin prototype for %"SVf" : %s",
8338                                     seen_underscore ? "after '_' " : "",
8339                                     SVfARG(PL_subname),
8340                                     SvUTF8(PL_lex_stuff)
8341                                         ? sv_uni_display(dsv,
8342                                             newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8343                                             tmp,
8344                                             UNI_DISPLAY_ISPRINT)
8345                                         : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8346                                             PERL_PV_ESCAPE_NONASCII));
8347                     }
8348                     SvCUR_set(PL_lex_stuff, tmp);
8349                     have_proto = TRUE;
8350
8351 #ifdef PERL_MAD
8352                     start_force(0);
8353                     CURMAD('q', PL_thisopen);
8354                     CURMAD('_', tmpwhite);
8355                     CURMAD('=', PL_thisstuff);
8356                     CURMAD('Q', PL_thisclose);
8357                     NEXTVAL_NEXTTOKE.opval =
8358                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8359                     PL_lex_stuff = NULL;
8360                     force_next(THING);
8361
8362                     s = SKIPSPACE2(s,tmpwhite);
8363 #else
8364                     s = skipspace(s);
8365 #endif
8366                 }
8367                 else
8368                     have_proto = FALSE;
8369
8370                 if (*s == ':' && s[1] != ':')
8371                     PL_expect = attrful;
8372                 else if (*s != '{' && key == KEY_sub) {
8373                     if (!have_name)
8374                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8375                     else if (*s != ';' && *s != '}')
8376                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8377                 }
8378
8379 #ifdef PERL_MAD
8380                 start_force(0);
8381                 if (tmpwhite) {
8382                     if (PL_madskills)
8383                         curmad('^', newSVpvs(""));
8384                     CURMAD('_', tmpwhite);
8385                 }
8386                 force_next(0);
8387
8388                 PL_thistoken = subtoken;
8389 #else
8390                 if (have_proto) {
8391                     NEXTVAL_NEXTTOKE.opval =
8392                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8393                     PL_lex_stuff = NULL;
8394                     force_next(THING);
8395                 }
8396 #endif
8397                 if (!have_name) {
8398                     if (PL_curstash)
8399                         sv_setpvs(PL_subname, "__ANON__");
8400                     else
8401                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8402                     TOKEN(ANONSUB);
8403                 }
8404 #ifndef PERL_MAD
8405                 (void) force_word(PL_oldbufptr + tboffset, WORD,
8406                                   FALSE, TRUE, TRUE);
8407 #endif
8408                 if (key == KEY_my)
8409                     TOKEN(MYSUB);
8410                 TOKEN(SUB);
8411             }
8412
8413         case KEY_system:
8414             LOP(OP_SYSTEM,XREF);
8415
8416         case KEY_symlink:
8417             LOP(OP_SYMLINK,XTERM);
8418
8419         case KEY_syscall:
8420             LOP(OP_SYSCALL,XTERM);
8421
8422         case KEY_sysopen:
8423             LOP(OP_SYSOPEN,XTERM);
8424
8425         case KEY_sysseek:
8426             LOP(OP_SYSSEEK,XTERM);
8427
8428         case KEY_sysread:
8429             LOP(OP_SYSREAD,XTERM);
8430
8431         case KEY_syswrite:
8432             LOP(OP_SYSWRITE,XTERM);
8433
8434         case KEY_tr:
8435             s = scan_trans(s);
8436             TERM(sublex_start());
8437
8438         case KEY_tell:
8439             UNI(OP_TELL);
8440
8441         case KEY_telldir:
8442             UNI(OP_TELLDIR);
8443
8444         case KEY_tie:
8445             LOP(OP_TIE,XTERM);
8446
8447         case KEY_tied:
8448             UNI(OP_TIED);
8449
8450         case KEY_time:
8451             FUN0(OP_TIME);
8452
8453         case KEY_times:
8454             FUN0(OP_TMS);
8455
8456         case KEY_truncate:
8457             LOP(OP_TRUNCATE,XTERM);
8458
8459         case KEY_uc:
8460             UNI(OP_UC);
8461
8462         case KEY_ucfirst:
8463             UNI(OP_UCFIRST);
8464
8465         case KEY_untie:
8466             UNI(OP_UNTIE);
8467
8468         case KEY_until:
8469             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8470                 return REPORT(0);
8471             pl_yylval.ival = CopLINE(PL_curcop);
8472             OPERATOR(UNTIL);
8473
8474         case KEY_unless:
8475             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8476                 return REPORT(0);
8477             pl_yylval.ival = CopLINE(PL_curcop);
8478             OPERATOR(UNLESS);
8479
8480         case KEY_unlink:
8481             LOP(OP_UNLINK,XTERM);
8482
8483         case KEY_undef:
8484             UNIDOR(OP_UNDEF);
8485
8486         case KEY_unpack:
8487             LOP(OP_UNPACK,XTERM);
8488
8489         case KEY_utime:
8490             LOP(OP_UTIME,XTERM);
8491
8492         case KEY_umask:
8493             UNIDOR(OP_UMASK);
8494
8495         case KEY_unshift:
8496             LOP(OP_UNSHIFT,XTERM);
8497
8498         case KEY_use:
8499             s = tokenize_use(1, s);
8500             OPERATOR(USE);
8501
8502         case KEY_values:
8503             UNI(OP_VALUES);
8504
8505         case KEY_vec:
8506             LOP(OP_VEC,XTERM);
8507
8508         case KEY_when:
8509             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8510                 return REPORT(0);
8511             pl_yylval.ival = CopLINE(PL_curcop);
8512             OPERATOR(WHEN);
8513
8514         case KEY_while:
8515             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8516                 return REPORT(0);
8517             pl_yylval.ival = CopLINE(PL_curcop);
8518             OPERATOR(WHILE);
8519
8520         case KEY_warn:
8521             PL_hints |= HINT_BLOCK_SCOPE;
8522             LOP(OP_WARN,XTERM);
8523
8524         case KEY_wait:
8525             FUN0(OP_WAIT);
8526
8527         case KEY_waitpid:
8528             LOP(OP_WAITPID,XTERM);
8529
8530         case KEY_wantarray:
8531             FUN0(OP_WANTARRAY);
8532
8533         case KEY_write:
8534 #ifdef EBCDIC
8535         {
8536             char ctl_l[2];
8537             ctl_l[0] = toCTRL('L');
8538             ctl_l[1] = '\0';
8539             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8540         }
8541 #else
8542             /* Make sure $^L is defined */
8543             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8544 #endif
8545             UNI(OP_ENTERWRITE);
8546
8547         case KEY_x:
8548             if (PL_expect == XOPERATOR) {
8549                 if (*s == '=' && !PL_lex_allbrackets &&
8550                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8551                     return REPORT(0);
8552                 Mop(OP_REPEAT);
8553             }
8554             check_uni();
8555             goto just_a_word;
8556
8557         case KEY_xor:
8558             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8559                 return REPORT(0);
8560             pl_yylval.ival = OP_XOR;
8561             OPERATOR(OROP);
8562
8563         case KEY_y:
8564             s = scan_trans(s);
8565             TERM(sublex_start());
8566         }
8567     }}
8568 }
8569 #ifdef __SC__
8570 #pragma segment Main
8571 #endif
8572
8573 static int
8574 S_pending_ident(pTHX)
8575 {
8576     dVAR;
8577     PADOFFSET tmp = 0;
8578     /* pit holds the identifier we read and pending_ident is reset */
8579     char pit = PL_pending_ident;
8580     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8581     /* All routes through this function want to know if there is a colon.  */
8582     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8583     PL_pending_ident = 0;
8584
8585     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8586     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8587           "### Pending identifier '%s'\n", PL_tokenbuf); });
8588
8589     /* if we're in a my(), we can't allow dynamics here.
8590        $foo'bar has already been turned into $foo::bar, so
8591        just check for colons.
8592
8593        if it's a legal name, the OP is a PADANY.
8594     */
8595     if (PL_in_my) {
8596         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8597             if (has_colon)
8598                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8599                                   "variable %s in \"our\"",
8600                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8601             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8602         }
8603         else {
8604             if (has_colon)
8605                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8606                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8607                             UTF ? SVf_UTF8 : 0);
8608
8609             pl_yylval.opval = newOP(OP_PADANY, 0);
8610             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8611                                                         UTF ? SVf_UTF8 : 0);
8612             return PRIVATEREF;
8613         }
8614     }
8615
8616     /*
8617        build the ops for accesses to a my() variable.
8618     */
8619
8620     if (!has_colon) {
8621         if (!PL_in_my)
8622             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8623                                     UTF ? SVf_UTF8 : 0);
8624         if (tmp != NOT_IN_PAD) {
8625             /* might be an "our" variable" */
8626             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8627                 /* build ops for a bareword */
8628                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8629                 HEK * const stashname = HvNAME_HEK(stash);
8630                 SV *  const sym = newSVhek(stashname);
8631                 sv_catpvs(sym, "::");
8632                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8633                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8634                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8635                 gv_fetchsv(sym,
8636                     (PL_in_eval
8637                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8638                         : GV_ADDMULTI
8639                     ),
8640                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8641                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8642                      : SVt_PVHV));
8643                 return WORD;
8644             }
8645
8646             pl_yylval.opval = newOP(OP_PADANY, 0);
8647             pl_yylval.opval->op_targ = tmp;
8648             return PRIVATEREF;
8649         }
8650     }
8651
8652     /*
8653        Whine if they've said @foo in a doublequoted string,
8654        and @foo isn't a variable we can find in the symbol
8655        table.
8656     */
8657     if (ckWARN(WARN_AMBIGUOUS) &&
8658         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8659         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8660                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8661         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8662                 /* DO NOT warn for @- and @+ */
8663                 && !( PL_tokenbuf[2] == '\0' &&
8664                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8665            )
8666         {
8667             /* Downgraded from fatal to warning 20000522 mjd */
8668             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8669                         "Possible unintended interpolation of %"SVf" in string",
8670                         SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8671                                         SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8672         }
8673     }
8674
8675     /* build ops for a bareword */
8676     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
8677                                                       tokenbuf_len - 1,
8678                                                       UTF ? SVf_UTF8 : 0 ));
8679     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8680     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8681                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8682                      | ( UTF ? SVf_UTF8 : 0 ),
8683                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8684                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8685                       : SVt_PVHV));
8686     return WORD;
8687 }
8688
8689 STATIC void
8690 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8691 {
8692     dVAR;
8693
8694     PERL_ARGS_ASSERT_CHECKCOMMA;
8695
8696     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8697         if (ckWARN(WARN_SYNTAX)) {
8698             int level = 1;
8699             const char *w;
8700             for (w = s+2; *w && level; w++) {
8701                 if (*w == '(')
8702                     ++level;
8703                 else if (*w == ')')
8704                     --level;
8705             }
8706             while (isSPACE(*w))
8707                 ++w;
8708             /* the list of chars below is for end of statements or
8709              * block / parens, boolean operators (&&, ||, //) and branch
8710              * constructs (or, and, if, until, unless, while, err, for).
8711              * Not a very solid hack... */
8712             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8713                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8714                             "%s (...) interpreted as function",name);
8715         }
8716     }
8717     while (s < PL_bufend && isSPACE(*s))
8718         s++;
8719     if (*s == '(')
8720         s++;
8721     while (s < PL_bufend && isSPACE(*s))
8722         s++;
8723     if (isIDFIRST_lazy_if(s,UTF)) {
8724         const char * const w = s;
8725         s += UTF ? UTF8SKIP(s) : 1;
8726         while (isALNUM_lazy_if(s,UTF))
8727             s += UTF ? UTF8SKIP(s) : 1;
8728         while (s < PL_bufend && isSPACE(*s))
8729             s++;
8730         if (*s == ',') {
8731             GV* gv;
8732             if (keyword(w, s - w, 0))
8733                 return;
8734
8735             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8736             if (gv && GvCVu(gv))
8737                 return;
8738             Perl_croak(aTHX_ "No comma allowed after %s", what);
8739         }
8740     }
8741 }
8742
8743 /* Either returns sv, or mortalizes sv and returns a new SV*.
8744    Best used as sv=new_constant(..., sv, ...).
8745    If s, pv are NULL, calls subroutine with one argument,
8746    and type is used with error messages only. */
8747
8748 STATIC SV *
8749 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8750                SV *sv, SV *pv, const char *type, STRLEN typelen)
8751 {
8752     dVAR; dSP;
8753     HV * table = GvHV(PL_hintgv);                /* ^H */
8754     SV *res;
8755     SV **cvp;
8756     SV *cv, *typesv;
8757     const char *why1 = "", *why2 = "", *why3 = "";
8758
8759     PERL_ARGS_ASSERT_NEW_CONSTANT;
8760
8761     /* charnames doesn't work well if there have been errors found */
8762     if (PL_error_count > 0 && strEQ(key,"charnames"))
8763         return &PL_sv_undef;
8764
8765     if (!table
8766         || ! (PL_hints & HINT_LOCALIZE_HH)
8767         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8768         || ! SvOK(*cvp))
8769     {
8770         SV *msg;
8771
8772         /* Here haven't found what we're looking for.  If it is charnames,
8773          * perhaps it needs to be loaded.  Try doing that before giving up */
8774         if (strEQ(key,"charnames")) {
8775             Perl_load_module(aTHX_
8776                             0,
8777                             newSVpvs("_charnames"),
8778                              /* version parameter; no need to specify it, as if
8779                               * we get too early a version, will fail anyway,
8780                               * not being able to find '_charnames' */
8781                             NULL,
8782                             newSVpvs(":full"),
8783                             newSVpvs(":short"),
8784                             NULL);
8785             SPAGAIN;
8786             table = GvHV(PL_hintgv);
8787             if (table
8788                 && (PL_hints & HINT_LOCALIZE_HH)
8789                 && (cvp = hv_fetch(table, key, keylen, FALSE))
8790                 && SvOK(*cvp))
8791             {
8792                 goto now_ok;
8793             }
8794         }
8795         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8796             msg = Perl_newSVpvf(aTHX_
8797                             "Constant(%s) unknown", (type ? type: "undef"));
8798         }
8799         else {
8800         why1 = "$^H{";
8801         why2 = key;
8802         why3 = "} is not defined";
8803     report:
8804         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8805                             (type ? type: "undef"), why1, why2, why3);
8806         }
8807         yyerror(SvPVX_const(msg));
8808         SvREFCNT_dec(msg);
8809         return sv;
8810     }
8811 now_ok:
8812     sv_2mortal(sv);                     /* Parent created it permanently */
8813     cv = *cvp;
8814     if (!pv && s)
8815         pv = newSVpvn_flags(s, len, SVs_TEMP);
8816     if (type && pv)
8817         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8818     else
8819         typesv = &PL_sv_undef;
8820
8821     PUSHSTACKi(PERLSI_OVERLOAD);
8822     ENTER ;
8823     SAVETMPS;
8824
8825     PUSHMARK(SP) ;
8826     EXTEND(sp, 3);
8827     if (pv)
8828         PUSHs(pv);
8829     PUSHs(sv);
8830     if (pv)
8831         PUSHs(typesv);
8832     PUTBACK;
8833     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8834
8835     SPAGAIN ;
8836
8837     /* Check the eval first */
8838     if (!PL_in_eval && SvTRUE(ERRSV)) {
8839         sv_catpvs(ERRSV, "Propagated");
8840         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8841         (void)POPs;
8842         res = SvREFCNT_inc_simple(sv);
8843     }
8844     else {
8845         res = POPs;
8846         SvREFCNT_inc_simple_void(res);
8847     }
8848
8849     PUTBACK ;
8850     FREETMPS ;
8851     LEAVE ;
8852     POPSTACK;
8853
8854     if (!SvOK(res)) {
8855         why1 = "Call to &{$^H{";
8856         why2 = key;
8857         why3 = "}} did not return a defined value";
8858         sv = res;
8859         goto report;
8860     }
8861
8862     return res;
8863 }
8864
8865 /* Returns a NUL terminated string, with the length of the string written to
8866    *slp
8867    */
8868 STATIC char *
8869 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8870 {
8871     dVAR;
8872     char *d = dest;
8873     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8874
8875     PERL_ARGS_ASSERT_SCAN_WORD;
8876
8877     for (;;) {
8878         if (d >= e)
8879             Perl_croak(aTHX_ ident_too_long);
8880         if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s)))   /* UTF handled below */
8881             *d++ = *s++;
8882         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8883             *d++ = ':';
8884             *d++ = ':';
8885             s++;
8886         }
8887         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8888             *d++ = *s++;
8889             *d++ = *s++;
8890         }
8891         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8892             char *t = s + UTF8SKIP(s);
8893             size_t len;
8894             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8895                 t += UTF8SKIP(t);
8896             len = t - s;
8897             if (d + len > e)
8898                 Perl_croak(aTHX_ ident_too_long);
8899             Copy(s, d, len, char);
8900             d += len;
8901             s = t;
8902         }
8903         else {
8904             *d = '\0';
8905             *slp = d - dest;
8906             return s;
8907         }
8908     }
8909 }
8910
8911 STATIC char *
8912 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8913 {
8914     dVAR;
8915     char *bracket = NULL;
8916     char funny = *s++;
8917     char *d = dest;
8918     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8919
8920     PERL_ARGS_ASSERT_SCAN_IDENT;
8921
8922     if (isSPACE(*s))
8923         s = PEEKSPACE(s);
8924     if (isDIGIT(*s)) {
8925         while (isDIGIT(*s)) {
8926             if (d >= e)
8927                 Perl_croak(aTHX_ ident_too_long);
8928             *d++ = *s++;
8929         }
8930     }
8931     else {
8932         for (;;) {
8933             if (d >= e)
8934                 Perl_croak(aTHX_ ident_too_long);
8935             if (isALNUM(*s))    /* UTF handled below */
8936                 *d++ = *s++;
8937             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8938                 *d++ = ':';
8939                 *d++ = ':';
8940                 s++;
8941             }
8942             else if (*s == ':' && s[1] == ':') {
8943                 *d++ = *s++;
8944                 *d++ = *s++;
8945             }
8946             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8947                 char *t = s + UTF8SKIP(s);
8948                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8949                     t += UTF8SKIP(t);
8950                 if (d + (t - s) > e)
8951                     Perl_croak(aTHX_ ident_too_long);
8952                 Copy(s, d, t - s, char);
8953                 d += t - s;
8954                 s = t;
8955             }
8956             else
8957                 break;
8958         }
8959     }
8960     *d = '\0';
8961     d = dest;
8962     if (*d) {
8963         if (PL_lex_state != LEX_NORMAL)
8964             PL_lex_state = LEX_INTERPENDMAYBE;
8965         return s;
8966     }
8967     if (*s == '$' && s[1] &&
8968         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8969     {
8970         return s;
8971     }
8972     if (*s == '{') {
8973         bracket = s;
8974         s++;
8975     }
8976     if (s < send) {
8977         if (UTF) {
8978             const STRLEN skip = UTF8SKIP(s);
8979             STRLEN i;
8980             d[skip] = '\0';
8981             for ( i = 0; i < skip; i++ )
8982                 d[i] = *s++;
8983         }
8984         else {
8985             *d = *s++;
8986             d[1] = '\0';
8987         }
8988     }
8989     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8990         *d = toCTRL(*s);
8991         s++;
8992     }
8993     else if (ck_uni && !bracket)
8994         check_uni();
8995     if (bracket) {
8996         if (isSPACE(s[-1])) {
8997             while (s < send) {
8998                 const char ch = *s++;
8999                 if (!SPACE_OR_TAB(ch)) {
9000                     *d = ch;
9001                     break;
9002                 }
9003             }
9004         }
9005         if (isIDFIRST_lazy_if(d,UTF)) {
9006             d += UTF8SKIP(d);
9007             if (UTF) {
9008                 char *end = s;
9009                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9010                     end += UTF8SKIP(end);
9011                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9012                         end += UTF8SKIP(end);
9013                 }
9014                 Copy(s, d, end - s, char);
9015                 d += end - s;
9016                 s = end;
9017             }
9018             else {
9019                 while ((isALNUM(*s) || *s == ':') && d < e)
9020                     *d++ = *s++;
9021                 if (d >= e)
9022                     Perl_croak(aTHX_ ident_too_long);
9023             }
9024             *d = '\0';
9025             while (s < send && SPACE_OR_TAB(*s))
9026                 s++;
9027             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9028                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9029                     const char * const brack =
9030                         (const char *)
9031                         ((*s == '[') ? "[...]" : "{...}");
9032    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9033                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9034                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9035                         funny, dest, brack, funny, dest, brack);
9036                 }
9037                 bracket++;
9038                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9039                 PL_lex_allbrackets++;
9040                 return s;
9041             }
9042         }
9043         /* Handle extended ${^Foo} variables
9044          * 1999-02-27 mjd-perl-patch@plover.com */
9045         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9046                  && isALNUM(*s))
9047         {
9048             d++;
9049             while (isALNUM(*s) && d < e) {
9050                 *d++ = *s++;
9051             }
9052             if (d >= e)
9053                 Perl_croak(aTHX_ ident_too_long);
9054             *d = '\0';
9055         }
9056         if (*s == '}') {
9057             s++;
9058             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9059                 PL_lex_state = LEX_INTERPEND;
9060                 PL_expect = XREF;
9061             }
9062             if (PL_lex_state == LEX_NORMAL) {
9063                 if (ckWARN(WARN_AMBIGUOUS) &&
9064                     (keyword(dest, d - dest, 0)
9065                      || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
9066                 {
9067                     SV *tmp = newSVpvn_flags( dest, d - dest,
9068                                             SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
9069                     if (funny == '#')
9070                         funny = '@';
9071                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9072                         "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9073                         funny, tmp, funny, tmp);
9074                 }
9075             }
9076         }
9077         else {
9078             s = bracket;                /* let the parser handle it */
9079             *dest = '\0';
9080         }
9081     }
9082     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9083         PL_lex_state = LEX_INTERPEND;
9084     return s;
9085 }
9086
9087 static bool
9088 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9089
9090     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9091      * the parse starting at 's', based on the subset that are valid in this
9092      * context input to this routine in 'valid_flags'. Advances s.  Returns
9093      * TRUE if the input should be treated as a valid flag, so the next char
9094      * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9095      * first call on the current regex.  This routine will set it to any
9096      * charset modifier found.  The caller shouldn't change it.  This way,
9097      * another charset modifier encountered in the parse can be detected as an
9098      * error, as we have decided to allow only one */
9099
9100     const char c = **s;
9101     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9102
9103     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9104         if (isALNUM_lazy_if(*s, UTF)) {
9105             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9106                        UTF ? SVf_UTF8 : 0);
9107             (*s) += charlen;
9108             /* Pretend that it worked, so will continue processing before
9109              * dieing */
9110             return TRUE;
9111         }
9112         return FALSE;
9113     }
9114
9115     switch (c) {
9116
9117         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9118         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9119         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9120         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9121         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9122         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9123         case LOCALE_PAT_MOD:
9124             if (*charset) {
9125                 goto multiple_charsets;
9126             }
9127             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9128             *charset = c;
9129             break;
9130         case UNICODE_PAT_MOD:
9131             if (*charset) {
9132                 goto multiple_charsets;
9133             }
9134             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9135             *charset = c;
9136             break;
9137         case ASCII_RESTRICT_PAT_MOD:
9138             if (! *charset) {
9139                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9140             }
9141             else {
9142
9143                 /* Error if previous modifier wasn't an 'a', but if it was, see
9144                  * if, and accept, a second occurrence (only) */
9145                 if (*charset != 'a'
9146                     || get_regex_charset(*pmfl)
9147                         != REGEX_ASCII_RESTRICTED_CHARSET)
9148                 {
9149                         goto multiple_charsets;
9150                 }
9151                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9152             }
9153             *charset = c;
9154             break;
9155         case DEPENDS_PAT_MOD:
9156             if (*charset) {
9157                 goto multiple_charsets;
9158             }
9159             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9160             *charset = c;
9161             break;
9162     }
9163
9164     (*s)++;
9165     return TRUE;
9166
9167     multiple_charsets:
9168         if (*charset != c) {
9169             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9170         }
9171         else if (c == 'a') {
9172             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9173         }
9174         else {
9175             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9176         }
9177
9178         /* Pretend that it worked, so will continue processing before dieing */
9179         (*s)++;
9180         return TRUE;
9181 }
9182
9183 STATIC char *
9184 S_scan_pat(pTHX_ char *start, I32 type)
9185 {
9186     dVAR;
9187     PMOP *pm;
9188     char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
9189     const char * const valid_flags =
9190         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9191     char charset = '\0';    /* character set modifier */
9192 #ifdef PERL_MAD
9193     char *modstart;
9194 #endif
9195
9196     PERL_ARGS_ASSERT_SCAN_PAT;
9197
9198     /* this was only needed for the initial scan_str; set it to false
9199      * so that any (?{}) code blocks etc are parsed normally */
9200     PL_reg_state.re_reparsing = FALSE;
9201     if (!s) {
9202         const char * const delimiter = skipspace(start);
9203         Perl_croak(aTHX_
9204                    (const char *)
9205                    (*delimiter == '?'
9206                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9207                     : "Search pattern not terminated" ));
9208     }
9209
9210     pm = (PMOP*)newPMOP(type, 0);
9211     if (PL_multi_open == '?') {
9212         /* This is the only point in the code that sets PMf_ONCE:  */
9213         pm->op_pmflags |= PMf_ONCE;
9214
9215         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9216            allows us to restrict the list needed by reset to just the ??
9217            matches.  */
9218         assert(type != OP_TRANS);
9219         if (PL_curstash) {
9220             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9221             U32 elements;
9222             if (!mg) {
9223                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9224                                  0);
9225             }
9226             elements = mg->mg_len / sizeof(PMOP**);
9227             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9228             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9229             mg->mg_len = elements * sizeof(PMOP**);
9230             PmopSTASH_set(pm,PL_curstash);
9231         }
9232     }
9233 #ifdef PERL_MAD
9234     modstart = s;
9235 #endif
9236
9237     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9238      * anon CV. False positives like qr/[(?{]/ are harmless */
9239
9240     if (type == OP_QR) {
9241         STRLEN len;
9242         char *e, *p = SvPV(PL_lex_stuff, len);
9243         e = p + len;
9244         for (; p < e; p++) {
9245             if (p[0] == '(' && p[1] == '?'
9246                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9247             {
9248                 pm->op_pmflags |= PMf_HAS_CV;
9249                 break;
9250             }
9251         }
9252         pm->op_pmflags |= PMf_IS_QR;
9253     }
9254
9255     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9256 #ifdef PERL_MAD
9257     if (PL_madskills && modstart != s) {
9258         SV* tmptoken = newSVpvn(modstart, s - modstart);
9259         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9260     }
9261 #endif
9262     /* issue a warning if /c is specified,but /g is not */
9263     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9264     {
9265         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9266                        "Use of /c modifier is meaningless without /g" );
9267     }
9268
9269     PL_lex_op = (OP*)pm;
9270     pl_yylval.ival = OP_MATCH;
9271     return s;
9272 }
9273
9274 STATIC char *
9275 S_scan_subst(pTHX_ char *start)
9276 {
9277     dVAR;
9278     char *s;
9279     PMOP *pm;
9280     I32 first_start;
9281     I32 es = 0;
9282     char charset = '\0';    /* character set modifier */
9283 #ifdef PERL_MAD
9284     char *modstart;
9285 #endif
9286
9287     PERL_ARGS_ASSERT_SCAN_SUBST;
9288
9289     pl_yylval.ival = OP_NULL;
9290
9291     s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9292
9293     if (!s)
9294         Perl_croak(aTHX_ "Substitution pattern not terminated");
9295
9296     if (s[-1] == PL_multi_open)
9297         s--;
9298 #ifdef PERL_MAD
9299     if (PL_madskills) {
9300         CURMAD('q', PL_thisopen);
9301         CURMAD('_', PL_thiswhite);
9302         CURMAD('E', PL_thisstuff);
9303         CURMAD('Q', PL_thisclose);
9304         PL_realtokenstart = s - SvPVX(PL_linestr);
9305     }
9306 #endif
9307
9308     first_start = PL_multi_start;
9309     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9310     if (!s) {
9311         if (PL_lex_stuff) {
9312             SvREFCNT_dec(PL_lex_stuff);
9313             PL_lex_stuff = NULL;
9314         }
9315         Perl_croak(aTHX_ "Substitution replacement not terminated");
9316     }
9317     PL_multi_start = first_start;       /* so whole substitution is taken together */
9318
9319     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9320
9321 #ifdef PERL_MAD
9322     if (PL_madskills) {
9323         CURMAD('z', PL_thisopen);
9324         CURMAD('R', PL_thisstuff);
9325         CURMAD('Z', PL_thisclose);
9326     }
9327     modstart = s;
9328 #endif
9329
9330     while (*s) {
9331         if (*s == EXEC_PAT_MOD) {
9332             s++;
9333             es++;
9334         }
9335         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9336         {
9337             break;
9338         }
9339     }
9340
9341 #ifdef PERL_MAD
9342     if (PL_madskills) {
9343         if (modstart != s)
9344             curmad('m', newSVpvn(modstart, s - modstart));
9345         append_madprops(PL_thismad, (OP*)pm, 0);
9346         PL_thismad = 0;
9347     }
9348 #endif
9349     if ((pm->op_pmflags & PMf_CONTINUE)) {
9350         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9351     }
9352
9353     if (es) {
9354         SV * const repl = newSVpvs("");
9355
9356         PL_multi_end = 0;
9357         pm->op_pmflags |= PMf_EVAL;
9358         while (es-- > 0) {
9359             if (es)
9360                 sv_catpvs(repl, "eval ");
9361             else
9362                 sv_catpvs(repl, "do ");
9363         }
9364         sv_catpvs(repl, "{");
9365         sv_catsv(repl, PL_sublex_info.repl);
9366         if (strchr(SvPVX(PL_sublex_info.repl), '#'))
9367             sv_catpvs(repl, "\n");
9368         sv_catpvs(repl, "}");
9369         SvEVALED_on(repl);
9370         SvREFCNT_dec(PL_sublex_info.repl);
9371         PL_sublex_info.repl = repl;
9372     }
9373
9374     PL_lex_op = (OP*)pm;
9375     pl_yylval.ival = OP_SUBST;
9376     return s;
9377 }
9378
9379 STATIC char *
9380 S_scan_trans(pTHX_ char *start)
9381 {
9382     dVAR;
9383     char* s;
9384     OP *o;
9385     U8 squash;
9386     U8 del;
9387     U8 complement;
9388     bool nondestruct = 0;
9389 #ifdef PERL_MAD
9390     char *modstart;
9391 #endif
9392
9393     PERL_ARGS_ASSERT_SCAN_TRANS;
9394
9395     pl_yylval.ival = OP_NULL;
9396
9397     s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9398     if (!s)
9399         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9400
9401     if (s[-1] == PL_multi_open)
9402         s--;
9403 #ifdef PERL_MAD
9404     if (PL_madskills) {
9405         CURMAD('q', PL_thisopen);
9406         CURMAD('_', PL_thiswhite);
9407         CURMAD('E', PL_thisstuff);
9408         CURMAD('Q', PL_thisclose);
9409         PL_realtokenstart = s - SvPVX(PL_linestr);
9410     }
9411 #endif
9412
9413     s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9414     if (!s) {
9415         if (PL_lex_stuff) {
9416             SvREFCNT_dec(PL_lex_stuff);
9417             PL_lex_stuff = NULL;
9418         }
9419         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9420     }
9421     if (PL_madskills) {
9422         CURMAD('z', PL_thisopen);
9423         CURMAD('R', PL_thisstuff);
9424         CURMAD('Z', PL_thisclose);
9425     }
9426
9427     complement = del = squash = 0;
9428 #ifdef PERL_MAD
9429     modstart = s;
9430 #endif
9431     while (1) {
9432         switch (*s) {
9433         case 'c':
9434             complement = OPpTRANS_COMPLEMENT;
9435             break;
9436         case 'd':
9437             del = OPpTRANS_DELETE;
9438             break;
9439         case 's':
9440             squash = OPpTRANS_SQUASH;
9441             break;
9442         case 'r':
9443             nondestruct = 1;
9444             break;
9445         default:
9446             goto no_more;
9447         }
9448         s++;
9449     }
9450   no_more:
9451
9452     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9453     o->op_private &= ~OPpTRANS_ALL;
9454     o->op_private |= del|squash|complement|
9455       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9456       (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF   : 0);
9457
9458     PL_lex_op = o;
9459     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9460
9461 #ifdef PERL_MAD
9462     if (PL_madskills) {
9463         if (modstart != s)
9464             curmad('m', newSVpvn(modstart, s - modstart));
9465         append_madprops(PL_thismad, o, 0);
9466         PL_thismad = 0;
9467     }
9468 #endif
9469
9470     return s;
9471 }
9472
9473 /* scan_heredoc
9474    Takes a pointer to the first < in <<FOO.
9475    Returns a pointer to the byte following <<FOO.
9476
9477    This function scans a heredoc, which involves different methods
9478    depending on whether we are in a string eval, quoted construct, etc.
9479    This is because PL_linestr could containing a single line of input, or
9480    a whole string being evalled, or the contents of the current quote-
9481    like operator.
9482
9483    The three methods are:
9484     - Steal lines from the input stream (stream)
9485     - Scan the heredoc in PL_linestr and remove it therefrom (linestr)
9486     - Peek at the PL_linestr of outer lexing scopes (peek)
9487
9488    They are used in these cases:
9489      file scope or filtered eval                        stream
9490      string eval                                        linestr
9491      multiline quoted construct                         linestr
9492      single-line quoted construct in file               stream
9493      single-line quoted construct in eval or quote      peek
9494
9495    Single-line also applies to heredocs that begin on the last line of a
9496    quote-like operator.
9497
9498    Peeking within a quote also involves falling back to the stream method,
9499    if the outer quote-like operators are all on one line (or the heredoc
9500    marker is on the last line).
9501 */
9502
9503 STATIC char *
9504 S_scan_heredoc(pTHX_ register char *s)
9505 {
9506     dVAR;
9507     SV *herewas;
9508     I32 op_type = OP_SCALAR;
9509     I32 len;
9510     SV *tmpstr;
9511     char term;
9512     const char *found_newline = 0;
9513     char *d;
9514     char *e;
9515     char *peek;
9516     const bool infile = PL_rsfp || PL_parser->filtered;
9517 #ifdef PERL_MAD
9518     I32 stuffstart = s - SvPVX(PL_linestr);
9519     char *tstart;
9520
9521     PL_realtokenstart = -1;
9522 #endif
9523
9524     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9525
9526     s += 2;
9527     d = PL_tokenbuf + 1;
9528     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9529     *PL_tokenbuf = '\n';
9530     peek = s;
9531     while (SPACE_OR_TAB(*peek))
9532         peek++;
9533     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9534         s = peek;
9535         term = *s++;
9536         s = delimcpy(d, e, s, PL_bufend, term, &len);
9537         if (s == PL_bufend)
9538             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9539         d += len;
9540         s++;
9541     }
9542     else {
9543         if (*s == '\\')
9544             /* <<\FOO is equivalent to <<'FOO' */
9545             s++, term = '\'';
9546         else
9547             term = '"';
9548         if (!isALNUM_lazy_if(s,UTF))
9549             deprecate("bare << to mean <<\"\"");
9550         for (; isALNUM_lazy_if(s,UTF); s++) {
9551             if (d < e)
9552                 *d++ = *s;
9553         }
9554     }
9555     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9556         Perl_croak(aTHX_ "Delimiter for here document is too long");
9557     *d++ = '\n';
9558     *d = '\0';
9559     len = d - PL_tokenbuf;
9560
9561 #ifdef PERL_MAD
9562     if (PL_madskills) {
9563         tstart = PL_tokenbuf + 1;
9564         PL_thisclose = newSVpvn(tstart, len - 1);
9565         tstart = SvPVX(PL_linestr) + stuffstart;
9566         PL_thisopen = newSVpvn(tstart, s - tstart);
9567         stuffstart = s - SvPVX(PL_linestr);
9568     }
9569 #endif
9570 #ifndef PERL_STRICT_CR
9571     d = strchr(s, '\r');
9572     if (d) {
9573         char * const olds = s;
9574         s = d;
9575         while (s < PL_bufend) {
9576             if (*s == '\r') {
9577                 *d++ = '\n';
9578                 if (*++s == '\n')
9579                     s++;
9580             }
9581             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9582                 *d++ = *s++;
9583                 s++;
9584             }
9585             else
9586                 *d++ = *s++;
9587         }
9588         *d = '\0';
9589         PL_bufend = d;
9590         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9591         s = olds;
9592     }
9593 #endif
9594     if ((infile && !PL_lex_inwhat)
9595      || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s))) {
9596         herewas = newSVpvn(s,PL_bufend-s);
9597     }
9598     else {
9599 #ifdef PERL_MAD
9600         herewas = newSVpvn(s-1,found_newline-s+1);
9601 #else
9602         s--;
9603         herewas = newSVpvn(s,found_newline-s);
9604 #endif
9605     }
9606 #ifdef PERL_MAD
9607     if (PL_madskills) {
9608         tstart = SvPVX(PL_linestr) + stuffstart;
9609         if (PL_thisstuff)
9610             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9611         else
9612             PL_thisstuff = newSVpvn(tstart, s - tstart);
9613     }
9614 #endif
9615     s += SvCUR(herewas);
9616
9617 #ifdef PERL_MAD
9618     stuffstart = s - SvPVX(PL_linestr);
9619
9620     if (found_newline)
9621         s--;
9622 #endif
9623
9624     tmpstr = newSV_type(SVt_PVIV);
9625     SvGROW(tmpstr, 80);
9626     if (term == '\'') {
9627         op_type = OP_CONST;
9628         SvIV_set(tmpstr, -1);
9629     }
9630     else if (term == '`') {
9631         op_type = OP_BACKTICK;
9632         SvIV_set(tmpstr, '\\');
9633     }
9634
9635     CLINE;
9636     PL_multi_start = CopLINE(PL_curcop);
9637     PL_multi_open = PL_multi_close = '<';
9638     if (PL_lex_inwhat && !found_newline) {
9639         /* Peek into the line buffer of the parent lexing scope, going up
9640            as many levels as necessary to find one with a newline after
9641            bufptr.  See the comments in sublex_push for how IVX and NVX
9642            are abused.
9643          */
9644         SV *linestr = NUM2PTR(SV *, SvNVX(PL_linestr));
9645         char *bufptr = PL_sublex_info.super_bufptr;
9646         char *bufend = SvEND(linestr);
9647         char * const olds = s - SvCUR(herewas);
9648         char * const real_olds = s;
9649         if (!bufptr) {
9650             s = real_olds;
9651             goto streaming;
9652         }
9653         while (!(s = (char *)memchr((void *)bufptr, '\n', bufend-bufptr))){
9654             if (SvIVX(linestr)) {
9655                 bufptr = INT2PTR(char *, SvIVX(linestr));
9656                 linestr = NUM2PTR(SV *, SvNVX(linestr));
9657                 bufend = SvEND(linestr);
9658             }
9659             else if (infile) {
9660                 s = real_olds;
9661                 goto streaming;
9662             }
9663             else {
9664                 s = bufend;
9665                 break;
9666             }
9667         }
9668         d = s;
9669         while (s < bufend &&
9670           (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
9671             if (*s++ == '\n')
9672                 CopLINE_inc(PL_curcop);
9673         }
9674         if (s >= bufend) {
9675             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9676             missingterm(PL_tokenbuf + 1);
9677         }
9678         sv_setpvn(herewas,bufptr,d-bufptr+1);
9679         sv_setpvn(tmpstr,d+1,s-d);
9680         s += len - 1;
9681         sv_catpvn(herewas,s,bufend-s);
9682         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9683         SvCUR_set(linestr,
9684                   bufptr-SvPVX_const(linestr)
9685                    + SvCUR(herewas));
9686
9687         s = olds;
9688         goto retval;
9689     }
9690     else if (!infile || found_newline) {
9691         char * const olds = s - SvCUR(herewas);
9692         d = s;
9693         while (s < PL_bufend &&
9694           (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
9695             if (*s++ == '\n')
9696                 CopLINE_inc(PL_curcop);
9697         }
9698         if (s >= PL_bufend) {
9699             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9700             missingterm(PL_tokenbuf + 1);
9701         }
9702         sv_setpvn(tmpstr,d+1,s-d);
9703 #ifdef PERL_MAD
9704         if (PL_madskills) {
9705             if (PL_thisstuff)
9706                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9707             else
9708                 PL_thisstuff = newSVpvn(d + 1, s - d);
9709             stuffstart = s - SvPVX(PL_linestr);
9710         }
9711 #endif
9712         s += len - 1;
9713         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9714
9715         /* s now points to the newline after the heredoc terminator.
9716            d points to the newline before the body of the heredoc.
9717          */
9718         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9719            check PL_sublex_info.re_eval_str. */
9720         if (PL_sublex_info.re_eval_start || PL_sublex_info.re_eval_str) {
9721             /* Set aside the rest of the regexp */
9722             if (!PL_sublex_info.re_eval_str)
9723                 PL_sublex_info.re_eval_str =
9724                        newSVpvn(PL_sublex_info.re_eval_start,
9725                                 PL_bufend - PL_sublex_info.re_eval_start);
9726             PL_sublex_info.re_eval_start -= s-d;
9727         }
9728         /* Copy everything from s onwards back to d. */
9729         Move(s,d,PL_bufend-s + 1,char);
9730         SvCUR_set(PL_linestr, SvCUR(PL_linestr) - (s-d));
9731         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9732         s = olds;
9733     }
9734     else
9735         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9736   streaming:
9737     term = PL_tokenbuf[1];
9738     len--;
9739     while (s >= PL_bufend) {    /* multiple line string? */
9740 #ifdef PERL_MAD
9741         if (PL_madskills) {
9742             tstart = SvPVX(PL_linestr) + stuffstart;
9743             if (PL_thisstuff)
9744                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9745             else
9746                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9747         }
9748 #endif
9749         PL_bufptr = s;
9750         CopLINE_inc(PL_curcop);
9751         if (!lex_next_chunk(LEX_NO_TERM)
9752          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9753             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9754             missingterm(PL_tokenbuf + 1);
9755         }
9756         CopLINE_dec(PL_curcop);
9757         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9758             lex_grow_linestr(SvCUR(PL_linestr) + 2);
9759             sv_catpvs(PL_linestr, "\n\0");
9760         }
9761         s = PL_bufptr;
9762 #ifdef PERL_MAD
9763         stuffstart = s - SvPVX(PL_linestr);
9764 #endif
9765         CopLINE_inc(PL_curcop);
9766         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9767         PL_last_lop = PL_last_uni = NULL;
9768 #ifndef PERL_STRICT_CR
9769         if (PL_bufend - PL_linestart >= 2) {
9770             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9771                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9772             {
9773                 PL_bufend[-2] = '\n';
9774                 PL_bufend--;
9775                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9776             }
9777             else if (PL_bufend[-1] == '\r')
9778                 PL_bufend[-1] = '\n';
9779         }
9780         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9781             PL_bufend[-1] = '\n';
9782 #endif
9783         if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
9784             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9785             *(SvPVX(PL_linestr) + off ) = ' ';
9786             lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
9787             sv_catsv(PL_linestr,herewas);
9788             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9789             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9790         }
9791         else {
9792             s = PL_bufend;
9793             sv_catsv(tmpstr,PL_linestr);
9794         }
9795     }
9796     s++;
9797 retval:
9798     PL_multi_end = CopLINE(PL_curcop);
9799     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9800         SvPV_shrink_to_cur(tmpstr);
9801     }
9802     SvREFCNT_dec(herewas);
9803     if (!IN_BYTES) {
9804         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9805             SvUTF8_on(tmpstr);
9806         else if (PL_encoding)
9807             sv_recode_to_utf8(tmpstr, PL_encoding);
9808     }
9809     PL_lex_stuff = tmpstr;
9810     pl_yylval.ival = op_type;
9811     return s;
9812 }
9813
9814 /* scan_inputsymbol
9815    takes: current position in input buffer
9816    returns: new position in input buffer
9817    side-effects: pl_yylval and lex_op are set.
9818
9819    This code handles:
9820
9821    <>           read from ARGV
9822    <FH>         read from filehandle
9823    <pkg::FH>    read from package qualified filehandle
9824    <pkg'FH>     read from package qualified filehandle
9825    <$fh>        read from filehandle in $fh
9826    <*.h>        filename glob
9827
9828 */
9829
9830 STATIC char *
9831 S_scan_inputsymbol(pTHX_ char *start)
9832 {
9833     dVAR;
9834     char *s = start;            /* current position in buffer */
9835     char *end;
9836     I32 len;
9837     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9838     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9839
9840     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9841
9842     end = strchr(s, '\n');
9843     if (!end)
9844         end = PL_bufend;
9845     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9846
9847     /* die if we didn't have space for the contents of the <>,
9848        or if it didn't end, or if we see a newline
9849     */
9850
9851     if (len >= (I32)sizeof PL_tokenbuf)
9852         Perl_croak(aTHX_ "Excessively long <> operator");
9853     if (s >= end)
9854         Perl_croak(aTHX_ "Unterminated <> operator");
9855
9856     s++;
9857
9858     /* check for <$fh>
9859        Remember, only scalar variables are interpreted as filehandles by
9860        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9861        treated as a glob() call.
9862        This code makes use of the fact that except for the $ at the front,
9863        a scalar variable and a filehandle look the same.
9864     */
9865     if (*d == '$' && d[1]) d++;
9866
9867     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9868     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9869         d += UTF ? UTF8SKIP(d) : 1;
9870
9871     /* If we've tried to read what we allow filehandles to look like, and
9872        there's still text left, then it must be a glob() and not a getline.
9873        Use scan_str to pull out the stuff between the <> and treat it
9874        as nothing more than a string.
9875     */
9876
9877     if (d - PL_tokenbuf != len) {
9878         pl_yylval.ival = OP_GLOB;
9879         s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9880         if (!s)
9881            Perl_croak(aTHX_ "Glob not terminated");
9882         return s;
9883     }
9884     else {
9885         bool readline_overriden = FALSE;
9886         GV *gv_readline;
9887         GV **gvp;
9888         /* we're in a filehandle read situation */
9889         d = PL_tokenbuf;
9890
9891         /* turn <> into <ARGV> */
9892         if (!len)
9893             Copy("ARGV",d,5,char);
9894
9895         /* Check whether readline() is overriden */
9896         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9897         if ((gv_readline
9898                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9899                 ||
9900                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9901                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9902                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9903             readline_overriden = TRUE;
9904
9905         /* if <$fh>, create the ops to turn the variable into a
9906            filehandle
9907         */
9908         if (*d == '$') {
9909             /* try to find it in the pad for this block, otherwise find
9910                add symbol table ops
9911             */
9912             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9913             if (tmp != NOT_IN_PAD) {
9914                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9915                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9916                     HEK * const stashname = HvNAME_HEK(stash);
9917                     SV * const sym = sv_2mortal(newSVhek(stashname));
9918                     sv_catpvs(sym, "::");
9919                     sv_catpv(sym, d+1);
9920                     d = SvPVX(sym);
9921                     goto intro_sym;
9922                 }
9923                 else {
9924                     OP * const o = newOP(OP_PADSV, 0);
9925                     o->op_targ = tmp;
9926                     PL_lex_op = readline_overriden
9927                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9928                                 op_append_elem(OP_LIST, o,
9929                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9930                         : (OP*)newUNOP(OP_READLINE, 0, o);
9931                 }
9932             }
9933             else {
9934                 GV *gv;
9935                 ++d;
9936 intro_sym:
9937                 gv = gv_fetchpv(d,
9938                                 (PL_in_eval
9939                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9940                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
9941                                 SVt_PV);
9942                 PL_lex_op = readline_overriden
9943                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9944                             op_append_elem(OP_LIST,
9945                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9946                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9947                     : (OP*)newUNOP(OP_READLINE, 0,
9948                             newUNOP(OP_RV2SV, 0,
9949                                 newGVOP(OP_GV, 0, gv)));
9950             }
9951             if (!readline_overriden)
9952                 PL_lex_op->op_flags |= OPf_SPECIAL;
9953             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9954             pl_yylval.ival = OP_NULL;
9955         }
9956
9957         /* If it's none of the above, it must be a literal filehandle
9958            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9959         else {
9960             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9961             PL_lex_op = readline_overriden
9962                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9963                         op_append_elem(OP_LIST,
9964                             newGVOP(OP_GV, 0, gv),
9965                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9966                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9967             pl_yylval.ival = OP_NULL;
9968         }
9969     }
9970
9971     return s;
9972 }
9973
9974
9975 /* scan_str
9976    takes: start position in buffer
9977           keep_quoted preserve \ on the embedded delimiter(s)
9978           keep_delims preserve the delimiters around the string
9979           re_reparse  compiling a run-time /(?{})/:
9980                         collapse // to /,  and skip encoding src
9981    returns: position to continue reading from buffer
9982    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9983         updates the read buffer.
9984
9985    This subroutine pulls a string out of the input.  It is called for:
9986         q               single quotes           q(literal text)
9987         '               single quotes           'literal text'
9988         qq              double quotes           qq(interpolate $here please)
9989         "               double quotes           "interpolate $here please"
9990         qx              backticks               qx(/bin/ls -l)
9991         `               backticks               `/bin/ls -l`
9992         qw              quote words             @EXPORT_OK = qw( func() $spam )
9993         m//             regexp match            m/this/
9994         s///            regexp substitute       s/this/that/
9995         tr///           string transliterate    tr/this/that/
9996         y///            string transliterate    y/this/that/
9997         ($*@)           sub prototypes          sub foo ($)
9998         (stuff)         sub attr parameters     sub foo : attr(stuff)
9999         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10000
10001    In most of these cases (all but <>, patterns and transliterate)
10002    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10003    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10004    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10005    calls scan_str().
10006
10007    It skips whitespace before the string starts, and treats the first
10008    character as the delimiter.  If the delimiter is one of ([{< then
10009    the corresponding "close" character )]}> is used as the closing
10010    delimiter.  It allows quoting of delimiters, and if the string has
10011    balanced delimiters ([{<>}]) it allows nesting.
10012
10013    On success, the SV with the resulting string is put into lex_stuff or,
10014    if that is already non-NULL, into lex_repl. The second case occurs only
10015    when parsing the RHS of the special constructs s/// and tr/// (y///).
10016    For convenience, the terminating delimiter character is stuffed into
10017    SvIVX of the SV.
10018 */
10019
10020 STATIC char *
10021 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
10022 {
10023     dVAR;
10024     SV *sv;                             /* scalar value: string */
10025     const char *tmps;                   /* temp string, used for delimiter matching */
10026     char *s = start;            /* current position in the buffer */
10027     char term;                  /* terminating character */
10028     char *to;                   /* current position in the sv's data */
10029     I32 brackets = 1;                   /* bracket nesting level */
10030     bool has_utf8 = FALSE;              /* is there any utf8 content? */
10031     I32 termcode;                       /* terminating char. code */
10032     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
10033     STRLEN termlen;                     /* length of terminating string */
10034     int last_off = 0;                   /* last position for nesting bracket */
10035 #ifdef PERL_MAD
10036     int stuffstart;
10037     char *tstart;
10038 #endif
10039
10040     PERL_ARGS_ASSERT_SCAN_STR;
10041
10042     /* skip space before the delimiter */
10043     if (isSPACE(*s)) {
10044         s = PEEKSPACE(s);
10045     }
10046
10047 #ifdef PERL_MAD
10048     if (PL_realtokenstart >= 0) {
10049         stuffstart = PL_realtokenstart;
10050         PL_realtokenstart = -1;
10051     }
10052     else
10053         stuffstart = start - SvPVX(PL_linestr);
10054 #endif
10055     /* mark where we are, in case we need to report errors */
10056     CLINE;
10057
10058     /* after skipping whitespace, the next character is the terminator */
10059     term = *s;
10060     if (!UTF) {
10061         termcode = termstr[0] = term;
10062         termlen = 1;
10063     }
10064     else {
10065         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10066         Copy(s, termstr, termlen, U8);
10067         if (!UTF8_IS_INVARIANT(term))
10068             has_utf8 = TRUE;
10069     }
10070
10071     /* mark where we are */
10072     PL_multi_start = CopLINE(PL_curcop);
10073     PL_multi_open = term;
10074
10075     /* find corresponding closing delimiter */
10076     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10077         termcode = termstr[0] = term = tmps[5];
10078
10079     PL_multi_close = term;
10080
10081     /* create a new SV to hold the contents.  79 is the SV's initial length.
10082        What a random number. */
10083     sv = newSV_type(SVt_PVIV);
10084     SvGROW(sv, 80);
10085     SvIV_set(sv, termcode);
10086     (void)SvPOK_only(sv);               /* validate pointer */
10087
10088     /* move past delimiter and try to read a complete string */
10089     if (keep_delims)
10090         sv_catpvn(sv, s, termlen);
10091     s += termlen;
10092 #ifdef PERL_MAD
10093     tstart = SvPVX(PL_linestr) + stuffstart;
10094     if (!PL_thisopen && !keep_delims) {
10095         PL_thisopen = newSVpvn(tstart, s - tstart);
10096         stuffstart = s - SvPVX(PL_linestr);
10097     }
10098 #endif
10099     for (;;) {
10100         if (PL_encoding && !UTF && !re_reparse) {
10101             bool cont = TRUE;
10102
10103             while (cont) {
10104                 int offset = s - SvPVX_const(PL_linestr);
10105                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10106                                            &offset, (char*)termstr, termlen);
10107                 const char * const ns = SvPVX_const(PL_linestr) + offset;
10108                 char * const svlast = SvEND(sv) - 1;
10109
10110                 for (; s < ns; s++) {
10111                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10112                         CopLINE_inc(PL_curcop);
10113                 }
10114                 if (!found)
10115                     goto read_more_line;
10116                 else {
10117                     /* handle quoted delimiters */
10118                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10119                         const char *t;
10120                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10121                             t--;
10122                         if ((svlast-1 - t) % 2) {
10123                             if (!keep_quoted) {
10124                                 *(svlast-1) = term;
10125                                 *svlast = '\0';
10126                                 SvCUR_set(sv, SvCUR(sv) - 1);
10127                             }
10128                             continue;
10129                         }
10130                     }
10131                     if (PL_multi_open == PL_multi_close) {
10132                         cont = FALSE;
10133                     }
10134                     else {
10135                         const char *t;
10136                         char *w;
10137                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10138                             /* At here, all closes are "was quoted" one,
10139                                so we don't check PL_multi_close. */
10140                             if (*t == '\\') {
10141                                 if (!keep_quoted && *(t+1) == PL_multi_open)
10142                                     t++;
10143                                 else
10144                                     *w++ = *t++;
10145                             }
10146                             else if (*t == PL_multi_open)
10147                                 brackets++;
10148
10149                             *w = *t;
10150                         }
10151                         if (w < t) {
10152                             *w++ = term;
10153                             *w = '\0';
10154                             SvCUR_set(sv, w - SvPVX_const(sv));
10155                         }
10156                         last_off = w - SvPVX(sv);
10157                         if (--brackets <= 0)
10158                             cont = FALSE;
10159                     }
10160                 }
10161             }
10162             if (!keep_delims) {
10163                 SvCUR_set(sv, SvCUR(sv) - 1);
10164                 *SvEND(sv) = '\0';
10165             }
10166             break;
10167         }
10168
10169         /* extend sv if need be */
10170         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10171         /* set 'to' to the next character in the sv's string */
10172         to = SvPVX(sv)+SvCUR(sv);
10173
10174         /* if open delimiter is the close delimiter read unbridle */
10175         if (PL_multi_open == PL_multi_close) {
10176             for (; s < PL_bufend; s++,to++) {
10177                 /* embedded newlines increment the current line number */
10178                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10179                     CopLINE_inc(PL_curcop);
10180                 /* handle quoted delimiters */
10181                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10182                     if (!keep_quoted
10183                         && (s[1] == term
10184                             || (re_reparse && s[1] == '\\'))
10185                     )
10186                         s++;
10187                     /* any other quotes are simply copied straight through */
10188                     else
10189                         *to++ = *s++;
10190                 }
10191                 /* terminate when run out of buffer (the for() condition), or
10192                    have found the terminator */
10193                 else if (*s == term) {
10194                     if (termlen == 1)
10195                         break;
10196                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10197                         break;
10198                 }
10199                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10200                     has_utf8 = TRUE;
10201                 *to = *s;
10202             }
10203         }
10204
10205         /* if the terminator isn't the same as the start character (e.g.,
10206            matched brackets), we have to allow more in the quoting, and
10207            be prepared for nested brackets.
10208         */
10209         else {
10210             /* read until we run out of string, or we find the terminator */
10211             for (; s < PL_bufend; s++,to++) {
10212                 /* embedded newlines increment the line count */
10213                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10214                     CopLINE_inc(PL_curcop);
10215                 /* backslashes can escape the open or closing characters */
10216                 if (*s == '\\' && s+1 < PL_bufend) {
10217                     if (!keep_quoted &&
10218                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10219                         s++;
10220                     else
10221                         *to++ = *s++;
10222                 }
10223                 /* allow nested opens and closes */
10224                 else if (*s == PL_multi_close && --brackets <= 0)
10225                     break;
10226                 else if (*s == PL_multi_open)
10227                     brackets++;
10228                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10229                     has_utf8 = TRUE;
10230                 *to = *s;
10231             }
10232         }
10233         /* terminate the copied string and update the sv's end-of-string */
10234         *to = '\0';
10235         SvCUR_set(sv, to - SvPVX_const(sv));
10236
10237         /*
10238          * this next chunk reads more into the buffer if we're not done yet
10239          */
10240
10241         if (s < PL_bufend)
10242             break;              /* handle case where we are done yet :-) */
10243
10244 #ifndef PERL_STRICT_CR
10245         if (to - SvPVX_const(sv) >= 2) {
10246             if ((to[-2] == '\r' && to[-1] == '\n') ||
10247                 (to[-2] == '\n' && to[-1] == '\r'))
10248             {
10249                 to[-2] = '\n';
10250                 to--;
10251                 SvCUR_set(sv, to - SvPVX_const(sv));
10252             }
10253             else if (to[-1] == '\r')
10254                 to[-1] = '\n';
10255         }
10256         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10257             to[-1] = '\n';
10258 #endif
10259
10260      read_more_line:
10261         /* if we're out of file, or a read fails, bail and reset the current
10262            line marker so we can report where the unterminated string began
10263         */
10264 #ifdef PERL_MAD
10265         if (PL_madskills) {
10266             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10267             if (PL_thisstuff)
10268                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10269             else
10270                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10271         }
10272 #endif
10273         CopLINE_inc(PL_curcop);
10274         PL_bufptr = PL_bufend;
10275         if (!lex_next_chunk(0)) {
10276             sv_free(sv);
10277             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10278             return NULL;
10279         }
10280         s = PL_bufptr;
10281 #ifdef PERL_MAD
10282         stuffstart = 0;
10283 #endif
10284     }
10285
10286     /* at this point, we have successfully read the delimited string */
10287
10288     if (!PL_encoding || UTF || re_reparse) {
10289 #ifdef PERL_MAD
10290         if (PL_madskills) {
10291             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10292             const int len = s - tstart;
10293             if (PL_thisstuff)
10294                 sv_catpvn(PL_thisstuff, tstart, len);
10295             else
10296                 PL_thisstuff = newSVpvn(tstart, len);
10297             if (!PL_thisclose && !keep_delims)
10298                 PL_thisclose = newSVpvn(s,termlen);
10299         }
10300 #endif
10301
10302         if (keep_delims)
10303             sv_catpvn(sv, s, termlen);
10304         s += termlen;
10305     }
10306 #ifdef PERL_MAD
10307     else {
10308         if (PL_madskills) {
10309             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10310             const int len = s - tstart - termlen;
10311             if (PL_thisstuff)
10312                 sv_catpvn(PL_thisstuff, tstart, len);
10313             else
10314                 PL_thisstuff = newSVpvn(tstart, len);
10315             if (!PL_thisclose && !keep_delims)
10316                 PL_thisclose = newSVpvn(s - termlen,termlen);
10317         }
10318     }
10319 #endif
10320     if (has_utf8 || (PL_encoding && !re_reparse))
10321         SvUTF8_on(sv);
10322
10323     PL_multi_end = CopLINE(PL_curcop);
10324
10325     /* if we allocated too much space, give some back */
10326     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10327         SvLEN_set(sv, SvCUR(sv) + 1);
10328         SvPV_renew(sv, SvLEN(sv));
10329     }
10330
10331     /* decide whether this is the first or second quoted string we've read
10332        for this op
10333     */
10334
10335     if (PL_lex_stuff)
10336         PL_sublex_info.repl = sv;
10337     else
10338         PL_lex_stuff = sv;
10339     return s;
10340 }
10341
10342 /*
10343   scan_num
10344   takes: pointer to position in buffer
10345   returns: pointer to new position in buffer
10346   side-effects: builds ops for the constant in pl_yylval.op
10347
10348   Read a number in any of the formats that Perl accepts:
10349
10350   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10351   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10352   0b[01](_?[01])*
10353   0[0-7](_?[0-7])*
10354   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10355
10356   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10357   thing it reads.
10358
10359   If it reads a number without a decimal point or an exponent, it will
10360   try converting the number to an integer and see if it can do so
10361   without loss of precision.
10362 */
10363
10364 char *
10365 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10366 {
10367     dVAR;
10368     const char *s = start;      /* current position in buffer */
10369     char *d;                    /* destination in temp buffer */
10370     char *e;                    /* end of temp buffer */
10371     NV nv;                              /* number read, as a double */
10372     SV *sv = NULL;                      /* place to put the converted number */
10373     bool floatit;                       /* boolean: int or float? */
10374     const char *lastub = NULL;          /* position of last underbar */
10375     static char const number_too_long[] = "Number too long";
10376
10377     PERL_ARGS_ASSERT_SCAN_NUM;
10378
10379     /* We use the first character to decide what type of number this is */
10380
10381     switch (*s) {
10382     default:
10383         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10384
10385     /* if it starts with a 0, it could be an octal number, a decimal in
10386        0.13 disguise, or a hexadecimal number, or a binary number. */
10387     case '0':
10388         {
10389           /* variables:
10390              u          holds the "number so far"
10391              shift      the power of 2 of the base
10392                         (hex == 4, octal == 3, binary == 1)
10393              overflowed was the number more than we can hold?
10394
10395              Shift is used when we add a digit.  It also serves as an "are
10396              we in octal/hex/binary?" indicator to disallow hex characters
10397              when in octal mode.
10398            */
10399             NV n = 0.0;
10400             UV u = 0;
10401             I32 shift;
10402             bool overflowed = FALSE;
10403             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10404             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10405             static const char* const bases[5] =
10406               { "", "binary", "", "octal", "hexadecimal" };
10407             static const char* const Bases[5] =
10408               { "", "Binary", "", "Octal", "Hexadecimal" };
10409             static const char* const maxima[5] =
10410               { "",
10411                 "0b11111111111111111111111111111111",
10412                 "",
10413                 "037777777777",
10414                 "0xffffffff" };
10415             const char *base, *Base, *max;
10416
10417             /* check for hex */
10418             if (s[1] == 'x' || s[1] == 'X') {
10419                 shift = 4;
10420                 s += 2;
10421                 just_zero = FALSE;
10422             } else if (s[1] == 'b' || s[1] == 'B') {
10423                 shift = 1;
10424                 s += 2;
10425                 just_zero = FALSE;
10426             }
10427             /* check for a decimal in disguise */
10428             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10429                 goto decimal;
10430             /* so it must be octal */
10431             else {
10432                 shift = 3;
10433                 s++;
10434             }
10435
10436             if (*s == '_') {
10437                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10438                                "Misplaced _ in number");
10439                lastub = s++;
10440             }
10441
10442             base = bases[shift];
10443             Base = Bases[shift];
10444             max  = maxima[shift];
10445
10446             /* read the rest of the number */
10447             for (;;) {
10448                 /* x is used in the overflow test,
10449                    b is the digit we're adding on. */
10450                 UV x, b;
10451
10452                 switch (*s) {
10453
10454                 /* if we don't mention it, we're done */
10455                 default:
10456                     goto out;
10457
10458                 /* _ are ignored -- but warned about if consecutive */
10459                 case '_':
10460                     if (lastub && s == lastub + 1)
10461                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10462                                        "Misplaced _ in number");
10463                     lastub = s++;
10464                     break;
10465
10466                 /* 8 and 9 are not octal */
10467                 case '8': case '9':
10468                     if (shift == 3)
10469                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10470                     /* FALL THROUGH */
10471
10472                 /* octal digits */
10473                 case '2': case '3': case '4':
10474                 case '5': case '6': case '7':
10475                     if (shift == 1)
10476                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10477                     /* FALL THROUGH */
10478
10479                 case '0': case '1':
10480                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10481                     goto digit;
10482
10483                 /* hex digits */
10484                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10485                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10486                     /* make sure they said 0x */
10487                     if (shift != 4)
10488                         goto out;
10489                     b = (*s++ & 7) + 9;
10490
10491                     /* Prepare to put the digit we have onto the end
10492                        of the number so far.  We check for overflows.
10493                     */
10494
10495                   digit:
10496                     just_zero = FALSE;
10497                     if (!overflowed) {
10498                         x = u << shift; /* make room for the digit */
10499
10500                         if ((x >> shift) != u
10501                             && !(PL_hints & HINT_NEW_BINARY)) {
10502                             overflowed = TRUE;
10503                             n = (NV) u;
10504                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10505                                              "Integer overflow in %s number",
10506                                              base);
10507                         } else
10508                             u = x | b;          /* add the digit to the end */
10509                     }
10510                     if (overflowed) {
10511                         n *= nvshift[shift];
10512                         /* If an NV has not enough bits in its
10513                          * mantissa to represent an UV this summing of
10514                          * small low-order numbers is a waste of time
10515                          * (because the NV cannot preserve the
10516                          * low-order bits anyway): we could just
10517                          * remember when did we overflow and in the
10518                          * end just multiply n by the right
10519                          * amount. */
10520                         n += (NV) b;
10521                     }
10522                     break;
10523                 }
10524             }
10525
10526           /* if we get here, we had success: make a scalar value from
10527              the number.
10528           */
10529           out:
10530
10531             /* final misplaced underbar check */
10532             if (s[-1] == '_') {
10533                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10534             }
10535
10536             if (overflowed) {
10537                 if (n > 4294967295.0)
10538                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10539                                    "%s number > %s non-portable",
10540                                    Base, max);
10541                 sv = newSVnv(n);
10542             }
10543             else {
10544 #if UVSIZE > 4
10545                 if (u > 0xffffffff)
10546                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10547                                    "%s number > %s non-portable",
10548                                    Base, max);
10549 #endif
10550                 sv = newSVuv(u);
10551             }
10552             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10553                 sv = new_constant(start, s - start, "integer",
10554                                   sv, NULL, NULL, 0);
10555             else if (PL_hints & HINT_NEW_BINARY)
10556                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10557         }
10558         break;
10559
10560     /*
10561       handle decimal numbers.
10562       we're also sent here when we read a 0 as the first digit
10563     */
10564     case '1': case '2': case '3': case '4': case '5':
10565     case '6': case '7': case '8': case '9': case '.':
10566       decimal:
10567         d = PL_tokenbuf;
10568         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10569         floatit = FALSE;
10570
10571         /* read next group of digits and _ and copy into d */
10572         while (isDIGIT(*s) || *s == '_') {
10573             /* skip underscores, checking for misplaced ones
10574                if -w is on
10575             */
10576             if (*s == '_') {
10577                 if (lastub && s == lastub + 1)
10578                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10579                                    "Misplaced _ in number");
10580                 lastub = s++;
10581             }
10582             else {
10583                 /* check for end of fixed-length buffer */
10584                 if (d >= e)
10585                     Perl_croak(aTHX_ number_too_long);
10586                 /* if we're ok, copy the character */
10587                 *d++ = *s++;
10588             }
10589         }
10590
10591         /* final misplaced underbar check */
10592         if (lastub && s == lastub + 1) {
10593             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10594         }
10595
10596         /* read a decimal portion if there is one.  avoid
10597            3..5 being interpreted as the number 3. followed
10598            by .5
10599         */
10600         if (*s == '.' && s[1] != '.') {
10601             floatit = TRUE;
10602             *d++ = *s++;
10603
10604             if (*s == '_') {
10605                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10606                                "Misplaced _ in number");
10607                 lastub = s;
10608             }
10609
10610             /* copy, ignoring underbars, until we run out of digits.
10611             */
10612             for (; isDIGIT(*s) || *s == '_'; s++) {
10613                 /* fixed length buffer check */
10614                 if (d >= e)
10615                     Perl_croak(aTHX_ number_too_long);
10616                 if (*s == '_') {
10617                    if (lastub && s == lastub + 1)
10618                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10619                                       "Misplaced _ in number");
10620                    lastub = s;
10621                 }
10622                 else
10623                     *d++ = *s;
10624             }
10625             /* fractional part ending in underbar? */
10626             if (s[-1] == '_') {
10627                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10628                                "Misplaced _ in number");
10629             }
10630             if (*s == '.' && isDIGIT(s[1])) {
10631                 /* oops, it's really a v-string, but without the "v" */
10632                 s = start;
10633                 goto vstring;
10634             }
10635         }
10636
10637         /* read exponent part, if present */
10638         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10639             floatit = TRUE;
10640             s++;
10641
10642             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10643             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10644
10645             /* stray preinitial _ */
10646             if (*s == '_') {
10647                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10648                                "Misplaced _ in number");
10649                 lastub = s++;
10650             }
10651
10652             /* allow positive or negative exponent */
10653             if (*s == '+' || *s == '-')
10654                 *d++ = *s++;
10655
10656             /* stray initial _ */
10657             if (*s == '_') {
10658                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10659                                "Misplaced _ in number");
10660                 lastub = s++;
10661             }
10662
10663             /* read digits of exponent */
10664             while (isDIGIT(*s) || *s == '_') {
10665                 if (isDIGIT(*s)) {
10666                     if (d >= e)
10667                         Perl_croak(aTHX_ number_too_long);
10668                     *d++ = *s++;
10669                 }
10670                 else {
10671                    if (((lastub && s == lastub + 1) ||
10672                         (!isDIGIT(s[1]) && s[1] != '_')))
10673                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10674                                       "Misplaced _ in number");
10675                    lastub = s++;
10676                 }
10677             }
10678         }
10679
10680
10681         /*
10682            We try to do an integer conversion first if no characters
10683            indicating "float" have been found.
10684          */
10685
10686         if (!floatit) {
10687             UV uv;
10688             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10689
10690             if (flags == IS_NUMBER_IN_UV) {
10691               if (uv <= IV_MAX)
10692                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10693               else
10694                 sv = newSVuv(uv);
10695             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10696               if (uv <= (UV) IV_MIN)
10697                 sv = newSViv(-(IV)uv);
10698               else
10699                 floatit = TRUE;
10700             } else
10701               floatit = TRUE;
10702         }
10703         if (floatit) {
10704             /* terminate the string */
10705             *d = '\0';
10706             nv = Atof(PL_tokenbuf);
10707             sv = newSVnv(nv);
10708         }
10709
10710         if ( floatit
10711              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10712             const char *const key = floatit ? "float" : "integer";
10713             const STRLEN keylen = floatit ? 5 : 7;
10714             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10715                                 key, keylen, sv, NULL, NULL, 0);
10716         }
10717         break;
10718
10719     /* if it starts with a v, it could be a v-string */
10720     case 'v':
10721 vstring:
10722                 sv = newSV(5); /* preallocate storage space */
10723                 s = scan_vstring(s, PL_bufend, sv);
10724         break;
10725     }
10726
10727     /* make the op for the constant and return */
10728
10729     if (sv)
10730         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10731     else
10732         lvalp->opval = NULL;
10733
10734     return (char *)s;
10735 }
10736
10737 STATIC char *
10738 S_scan_formline(pTHX_ register char *s)
10739 {
10740     dVAR;
10741     char *eol;
10742     char *t;
10743     SV * const stuff = newSVpvs("");
10744     bool needargs = FALSE;
10745     bool eofmt = FALSE;
10746 #ifdef PERL_MAD
10747     char *tokenstart = s;
10748     SV* savewhite = NULL;
10749
10750     if (PL_madskills) {
10751         savewhite = PL_thiswhite;
10752         PL_thiswhite = 0;
10753     }
10754 #endif
10755
10756     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10757
10758     while (!needargs) {
10759         if (*s == '.') {
10760             t = s+1;
10761 #ifdef PERL_STRICT_CR
10762             while (SPACE_OR_TAB(*t))
10763                 t++;
10764 #else
10765             while (SPACE_OR_TAB(*t) || *t == '\r')
10766                 t++;
10767 #endif
10768             if (*t == '\n' || t == PL_bufend) {
10769                 eofmt = TRUE;
10770                 break;
10771             }
10772         }
10773         eol = (char *) memchr(s,'\n',PL_bufend-s);
10774         if (!eol++)
10775                 eol = PL_bufend;
10776         if (*s != '#') {
10777             for (t = s; t < eol; t++) {
10778                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10779                     needargs = FALSE;
10780                     goto enough;        /* ~~ must be first line in formline */
10781                 }
10782                 if (*t == '@' || *t == '^')
10783                     needargs = TRUE;
10784             }
10785             if (eol > s) {
10786                 sv_catpvn(stuff, s, eol-s);
10787 #ifndef PERL_STRICT_CR
10788                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10789                     char *end = SvPVX(stuff) + SvCUR(stuff);
10790                     end[-2] = '\n';
10791                     end[-1] = '\0';
10792                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10793                 }
10794 #endif
10795             }
10796             else
10797               break;
10798         }
10799         s = (char*)eol;
10800         if ((PL_rsfp || PL_parser->filtered)
10801          && PL_parser->form_lex_state == LEX_NORMAL) {
10802             bool got_some;
10803 #ifdef PERL_MAD
10804             if (PL_madskills) {
10805                 if (PL_thistoken)
10806                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10807                 else
10808                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10809             }
10810 #endif
10811             PL_bufptr = PL_bufend;
10812             CopLINE_inc(PL_curcop);
10813             got_some = lex_next_chunk(0);
10814             CopLINE_dec(PL_curcop);
10815             s = PL_bufptr;
10816 #ifdef PERL_MAD
10817             tokenstart = PL_bufptr;
10818 #endif
10819             if (!got_some)
10820                 break;
10821         }
10822         incline(s);
10823     }
10824   enough:
10825     if (!SvCUR(stuff) || needargs)
10826         PL_lex_state = PL_parser->form_lex_state;
10827     if (SvCUR(stuff)) {
10828         PL_expect = XSTATE;
10829         if (needargs) {
10830             start_force(PL_curforce);
10831             NEXTVAL_NEXTTOKE.ival = 0;
10832             force_next(FORMLBRACK);
10833         }
10834         if (!IN_BYTES) {
10835             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10836                 SvUTF8_on(stuff);
10837             else if (PL_encoding)
10838                 sv_recode_to_utf8(stuff, PL_encoding);
10839         }
10840         start_force(PL_curforce);
10841         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10842         force_next(THING);
10843     }
10844     else {
10845         SvREFCNT_dec(stuff);
10846         if (eofmt)
10847             PL_lex_formbrack = 0;
10848     }
10849 #ifdef PERL_MAD
10850     if (PL_madskills) {
10851         if (PL_thistoken)
10852             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10853         else
10854             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10855         PL_thiswhite = savewhite;
10856     }
10857 #endif
10858     return s;
10859 }
10860
10861 I32
10862 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10863 {
10864     dVAR;
10865     const I32 oldsavestack_ix = PL_savestack_ix;
10866     CV* const outsidecv = PL_compcv;
10867
10868     SAVEI32(PL_subline);
10869     save_item(PL_subname);
10870     SAVESPTR(PL_compcv);
10871
10872     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10873     CvFLAGS(PL_compcv) |= flags;
10874
10875     PL_subline = CopLINE(PL_curcop);
10876     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10877     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10878     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10879     if (outsidecv && CvPADLIST(outsidecv))
10880         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
10881
10882     return oldsavestack_ix;
10883 }
10884
10885 #ifdef __SC__
10886 #pragma segment Perl_yylex
10887 #endif
10888 static int
10889 S_yywarn(pTHX_ const char *const s, U32 flags)
10890 {
10891     dVAR;
10892
10893     PERL_ARGS_ASSERT_YYWARN;
10894
10895     PL_in_eval |= EVAL_WARNONLY;
10896     yyerror_pv(s, flags);
10897     PL_in_eval &= ~EVAL_WARNONLY;
10898     return 0;
10899 }
10900
10901 int
10902 Perl_yyerror(pTHX_ const char *const s)
10903 {
10904     PERL_ARGS_ASSERT_YYERROR;
10905     return yyerror_pvn(s, strlen(s), 0);
10906 }
10907
10908 int
10909 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10910 {
10911     PERL_ARGS_ASSERT_YYERROR_PV;
10912     return yyerror_pvn(s, strlen(s), flags);
10913 }
10914
10915 int
10916 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10917 {
10918     dVAR;
10919     const char *context = NULL;
10920     int contlen = -1;
10921     SV *msg;
10922     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10923     int yychar  = PL_parser->yychar;
10924     U32 is_utf8 = flags & SVf_UTF8;
10925
10926     PERL_ARGS_ASSERT_YYERROR_PVN;
10927
10928     if (!yychar || (yychar == ';' && !PL_rsfp))
10929         sv_catpvs(where_sv, "at EOF");
10930     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10931       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10932       PL_oldbufptr != PL_bufptr) {
10933         /*
10934                 Only for NetWare:
10935                 The code below is removed for NetWare because it abends/crashes on NetWare
10936                 when the script has error such as not having the closing quotes like:
10937                     if ($var eq "value)
10938                 Checking of white spaces is anyway done in NetWare code.
10939         */
10940 #ifndef NETWARE
10941         while (isSPACE(*PL_oldoldbufptr))
10942             PL_oldoldbufptr++;
10943 #endif
10944         context = PL_oldoldbufptr;
10945         contlen = PL_bufptr - PL_oldoldbufptr;
10946     }
10947     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10948       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10949         /*
10950                 Only for NetWare:
10951                 The code below is removed for NetWare because it abends/crashes on NetWare
10952                 when the script has error such as not having the closing quotes like:
10953                     if ($var eq "value)
10954                 Checking of white spaces is anyway done in NetWare code.
10955         */
10956 #ifndef NETWARE
10957         while (isSPACE(*PL_oldbufptr))
10958             PL_oldbufptr++;
10959 #endif
10960         context = PL_oldbufptr;
10961         contlen = PL_bufptr - PL_oldbufptr;
10962     }
10963     else if (yychar > 255)
10964         sv_catpvs(where_sv, "next token ???");
10965     else if (yychar == -2) { /* YYEMPTY */
10966         if (PL_lex_state == LEX_NORMAL ||
10967            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10968             sv_catpvs(where_sv, "at end of line");
10969         else if (PL_lex_inpat)
10970             sv_catpvs(where_sv, "within pattern");
10971         else
10972             sv_catpvs(where_sv, "within string");
10973     }
10974     else {
10975         sv_catpvs(where_sv, "next char ");
10976         if (yychar < 32)
10977             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10978         else if (isPRINT_LC(yychar)) {
10979             const char string = yychar;
10980             sv_catpvn(where_sv, &string, 1);
10981         }
10982         else
10983             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10984     }
10985     msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
10986     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10987         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10988     if (context)
10989         Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
10990                             SVfARG(newSVpvn_flags(context, contlen,
10991                                         SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
10992     else
10993         Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
10994     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10995         Perl_sv_catpvf(aTHX_ msg,
10996         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10997                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10998         PL_multi_end = 0;
10999     }
11000     if (PL_in_eval & EVAL_WARNONLY) {
11001         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11002     }
11003     else
11004         qerror(msg);
11005     if (PL_error_count >= 10) {
11006         if (PL_in_eval && SvCUR(ERRSV))
11007             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11008                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
11009         else
11010             Perl_croak(aTHX_ "%s has too many errors.\n",
11011             OutCopFILE(PL_curcop));
11012     }
11013     PL_in_my = 0;
11014     PL_in_my_stash = NULL;
11015     return 0;
11016 }
11017 #ifdef __SC__
11018 #pragma segment Main
11019 #endif
11020
11021 STATIC char*
11022 S_swallow_bom(pTHX_ U8 *s)
11023 {
11024     dVAR;
11025     const STRLEN slen = SvCUR(PL_linestr);
11026
11027     PERL_ARGS_ASSERT_SWALLOW_BOM;
11028
11029     switch (s[0]) {
11030     case 0xFF:
11031         if (s[1] == 0xFE) {
11032             /* UTF-16 little-endian? (or UTF-32LE?) */
11033             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11034                 /* diag_listed_as: Unsupported script encoding %s */
11035                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11036 #ifndef PERL_NO_UTF16_FILTER
11037             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11038             s += 2;
11039             if (PL_bufend > (char*)s) {
11040                 s = add_utf16_textfilter(s, TRUE);
11041             }
11042 #else
11043             /* diag_listed_as: Unsupported script encoding %s */
11044             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11045 #endif
11046         }
11047         break;
11048     case 0xFE:
11049         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11050 #ifndef PERL_NO_UTF16_FILTER
11051             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11052             s += 2;
11053             if (PL_bufend > (char *)s) {
11054                 s = add_utf16_textfilter(s, FALSE);
11055             }
11056 #else
11057             /* diag_listed_as: Unsupported script encoding %s */
11058             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11059 #endif
11060         }
11061         break;
11062     case 0xEF:
11063         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11064             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11065             s += 3;                      /* UTF-8 */
11066         }
11067         break;
11068     case 0:
11069         if (slen > 3) {
11070              if (s[1] == 0) {
11071                   if (s[2] == 0xFE && s[3] == 0xFF) {
11072                        /* UTF-32 big-endian */
11073                        /* diag_listed_as: Unsupported script encoding %s */
11074                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11075                   }
11076              }
11077              else if (s[2] == 0 && s[3] != 0) {
11078                   /* Leading bytes
11079                    * 00 xx 00 xx
11080                    * are a good indicator of UTF-16BE. */
11081 #ifndef PERL_NO_UTF16_FILTER
11082                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11083                   s = add_utf16_textfilter(s, FALSE);
11084 #else
11085                   /* diag_listed_as: Unsupported script encoding %s */
11086                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11087 #endif
11088              }
11089         }
11090 #ifdef EBCDIC
11091     case 0xDD:
11092         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11093             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11094             s += 4;                      /* UTF-8 */
11095         }
11096         break;
11097 #endif
11098
11099     default:
11100          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11101                   /* Leading bytes
11102                    * xx 00 xx 00
11103                    * are a good indicator of UTF-16LE. */
11104 #ifndef PERL_NO_UTF16_FILTER
11105               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11106               s = add_utf16_textfilter(s, TRUE);
11107 #else
11108               /* diag_listed_as: Unsupported script encoding %s */
11109               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11110 #endif
11111          }
11112     }
11113     return (char*)s;
11114 }
11115
11116
11117 #ifndef PERL_NO_UTF16_FILTER
11118 static I32
11119 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11120 {
11121     dVAR;
11122     SV *const filter = FILTER_DATA(idx);
11123     /* We re-use this each time round, throwing the contents away before we
11124        return.  */
11125     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11126     SV *const utf8_buffer = filter;
11127     IV status = IoPAGE(filter);
11128     const bool reverse = cBOOL(IoLINES(filter));
11129     I32 retval;
11130
11131     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11132
11133     /* As we're automatically added, at the lowest level, and hence only called
11134        from this file, we can be sure that we're not called in block mode. Hence
11135        don't bother writing code to deal with block mode.  */
11136     if (maxlen) {
11137         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11138     }
11139     if (status < 0) {
11140         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11141     }
11142     DEBUG_P(PerlIO_printf(Perl_debug_log,
11143                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11144                           FPTR2DPTR(void *, S_utf16_textfilter),
11145                           reverse ? 'l' : 'b', idx, maxlen, status,
11146                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11147
11148     while (1) {
11149         STRLEN chars;
11150         STRLEN have;
11151         I32 newlen;
11152         U8 *end;
11153         /* First, look in our buffer of existing UTF-8 data:  */
11154         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11155
11156         if (nl) {
11157             ++nl;
11158         } else if (status == 0) {
11159             /* EOF */
11160             IoPAGE(filter) = 0;
11161             nl = SvEND(utf8_buffer);
11162         }
11163         if (nl) {
11164             STRLEN got = nl - SvPVX(utf8_buffer);
11165             /* Did we have anything to append?  */
11166             retval = got != 0;
11167             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11168             /* Everything else in this code works just fine if SVp_POK isn't
11169                set.  This, however, needs it, and we need it to work, else
11170                we loop infinitely because the buffer is never consumed.  */
11171             sv_chop(utf8_buffer, nl);
11172             break;
11173         }
11174
11175         /* OK, not a complete line there, so need to read some more UTF-16.
11176            Read an extra octect if the buffer currently has an odd number. */
11177         while (1) {
11178             if (status <= 0)
11179                 break;
11180             if (SvCUR(utf16_buffer) >= 2) {
11181                 /* Location of the high octet of the last complete code point.
11182                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11183                    *coupled* with all the benefits of partial reads and
11184                    endianness.  */
11185                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11186                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11187
11188                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11189                     break;
11190                 }
11191
11192                 /* We have the first half of a surrogate. Read more.  */
11193                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11194             }
11195
11196             status = FILTER_READ(idx + 1, utf16_buffer,
11197                                  160 + (SvCUR(utf16_buffer) & 1));
11198             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11199             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11200             if (status < 0) {
11201                 /* Error */
11202                 IoPAGE(filter) = status;
11203                 return status;
11204             }
11205         }
11206
11207         chars = SvCUR(utf16_buffer) >> 1;
11208         have = SvCUR(utf8_buffer);
11209         SvGROW(utf8_buffer, have + chars * 3 + 1);
11210
11211         if (reverse) {
11212             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11213                                          (U8*)SvPVX_const(utf8_buffer) + have,
11214                                          chars * 2, &newlen);
11215         } else {
11216             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11217                                 (U8*)SvPVX_const(utf8_buffer) + have,
11218                                 chars * 2, &newlen);
11219         }
11220         SvCUR_set(utf8_buffer, have + newlen);
11221         *end = '\0';
11222
11223         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11224            it's private to us, and utf16_to_utf8{,reversed} take a
11225            (pointer,length) pair, rather than a NUL-terminated string.  */
11226         if(SvCUR(utf16_buffer) & 1) {
11227             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11228             SvCUR_set(utf16_buffer, 1);
11229         } else {
11230             SvCUR_set(utf16_buffer, 0);
11231         }
11232     }
11233     DEBUG_P(PerlIO_printf(Perl_debug_log,
11234                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11235                           status,
11236                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11237     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11238     return retval;
11239 }
11240
11241 static U8 *
11242 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11243 {
11244     SV *filter = filter_add(S_utf16_textfilter, NULL);
11245
11246     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11247
11248     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11249     sv_setpvs(filter, "");
11250     IoLINES(filter) = reversed;
11251     IoPAGE(filter) = 1; /* Not EOF */
11252
11253     /* Sadly, we have to return a valid pointer, come what may, so we have to
11254        ignore any error return from this.  */
11255     SvCUR_set(PL_linestr, 0);
11256     if (FILTER_READ(0, PL_linestr, 0)) {
11257         SvUTF8_on(PL_linestr);
11258     } else {
11259         SvUTF8_on(PL_linestr);
11260     }
11261     PL_bufend = SvEND(PL_linestr);
11262     return (U8*)SvPVX(PL_linestr);
11263 }
11264 #endif
11265
11266 /*
11267 Returns a pointer to the next character after the parsed
11268 vstring, as well as updating the passed in sv.
11269
11270 Function must be called like
11271
11272         sv = newSV(5);
11273         s = scan_vstring(s,e,sv);
11274
11275 where s and e are the start and end of the string.
11276 The sv should already be large enough to store the vstring
11277 passed in, for performance reasons.
11278
11279 */
11280
11281 char *
11282 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11283 {
11284     dVAR;
11285     const char *pos = s;
11286     const char *start = s;
11287
11288     PERL_ARGS_ASSERT_SCAN_VSTRING;
11289
11290     if (*pos == 'v') pos++;  /* get past 'v' */
11291     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11292         pos++;
11293     if ( *pos != '.') {
11294         /* this may not be a v-string if followed by => */
11295         const char *next = pos;
11296         while (next < e && isSPACE(*next))
11297             ++next;
11298         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11299             /* return string not v-string */
11300             sv_setpvn(sv,(char *)s,pos-s);
11301             return (char *)pos;
11302         }
11303     }
11304
11305     if (!isALPHA(*pos)) {
11306         U8 tmpbuf[UTF8_MAXBYTES+1];
11307
11308         if (*s == 'v')
11309             s++;  /* get past 'v' */
11310
11311         sv_setpvs(sv, "");
11312
11313         for (;;) {
11314             /* this is atoi() that tolerates underscores */
11315             U8 *tmpend;
11316             UV rev = 0;
11317             const char *end = pos;
11318             UV mult = 1;
11319             while (--end >= s) {
11320                 if (*end != '_') {
11321                     const UV orev = rev;
11322                     rev += (*end - '0') * mult;
11323                     mult *= 10;
11324                     if (orev > rev)
11325                         /* diag_listed_as: Integer overflow in %s number */
11326                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11327                                          "Integer overflow in decimal number");
11328                 }
11329             }
11330 #ifdef EBCDIC
11331             if (rev > 0x7FFFFFFF)
11332                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11333 #endif
11334             /* Append native character for the rev point */
11335             tmpend = uvchr_to_utf8(tmpbuf, rev);
11336             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11337             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11338                  SvUTF8_on(sv);
11339             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11340                  s = ++pos;
11341             else {
11342                  s = pos;
11343                  break;
11344             }
11345             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11346                  pos++;
11347         }
11348         SvPOK_on(sv);
11349         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11350         SvRMAGICAL_on(sv);
11351     }
11352     return (char *)s;
11353 }
11354
11355 int
11356 Perl_keyword_plugin_standard(pTHX_
11357         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11358 {
11359     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11360     PERL_UNUSED_CONTEXT;
11361     PERL_UNUSED_ARG(keyword_ptr);
11362     PERL_UNUSED_ARG(keyword_len);
11363     PERL_UNUSED_ARG(op_ptr);
11364     return KEYWORD_PLUGIN_DECLINE;
11365 }
11366
11367 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11368 static void
11369 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11370 {
11371     SAVEI32(PL_lex_brackets);
11372     if (PL_lex_brackets > 100)
11373         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11374     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11375     SAVEI32(PL_lex_allbrackets);
11376     PL_lex_allbrackets = 0;
11377     SAVEI8(PL_lex_fakeeof);
11378     PL_lex_fakeeof = (U8)fakeeof;
11379     if(yyparse(gramtype) && !PL_parser->error_count)
11380         qerror(Perl_mess(aTHX_ "Parse error"));
11381 }
11382
11383 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11384 static OP *
11385 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11386 {
11387     OP *o;
11388     ENTER;
11389     SAVEVPTR(PL_eval_root);
11390     PL_eval_root = NULL;
11391     parse_recdescent(gramtype, fakeeof);
11392     o = PL_eval_root;
11393     LEAVE;
11394     return o;
11395 }
11396
11397 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11398 static OP *
11399 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11400 {
11401     OP *exprop;
11402     if (flags & ~PARSE_OPTIONAL)
11403         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11404     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11405     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11406         if (!PL_parser->error_count)
11407             qerror(Perl_mess(aTHX_ "Parse error"));
11408         exprop = newOP(OP_NULL, 0);
11409     }
11410     return exprop;
11411 }
11412
11413 /*
11414 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11415
11416 Parse a Perl arithmetic expression.  This may contain operators of precedence
11417 down to the bit shift operators.  The expression must be followed (and thus
11418 terminated) either by a comparison or lower-precedence operator or by
11419 something that would normally terminate an expression such as semicolon.
11420 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11421 otherwise it is mandatory.  It is up to the caller to ensure that the
11422 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11423 the source of the code to be parsed and the lexical context for the
11424 expression.
11425
11426 The op tree representing the expression is returned.  If an optional
11427 expression is absent, a null pointer is returned, otherwise the pointer
11428 will be non-null.
11429
11430 If an error occurs in parsing or compilation, in most cases a valid op
11431 tree is returned anyway.  The error is reflected in the parser state,
11432 normally resulting in a single exception at the top level of parsing
11433 which covers all the compilation errors that occurred.  Some compilation
11434 errors, however, will throw an exception immediately.
11435
11436 =cut
11437 */
11438
11439 OP *
11440 Perl_parse_arithexpr(pTHX_ U32 flags)
11441 {
11442     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11443 }
11444
11445 /*
11446 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11447
11448 Parse a Perl term expression.  This may contain operators of precedence
11449 down to the assignment operators.  The expression must be followed (and thus
11450 terminated) either by a comma or lower-precedence operator or by
11451 something that would normally terminate an expression such as semicolon.
11452 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11453 otherwise it is mandatory.  It is up to the caller to ensure that the
11454 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11455 the source of the code to be parsed and the lexical context for the
11456 expression.
11457
11458 The op tree representing the expression is returned.  If an optional
11459 expression is absent, a null pointer is returned, otherwise the pointer
11460 will be non-null.
11461
11462 If an error occurs in parsing or compilation, in most cases a valid op
11463 tree is returned anyway.  The error is reflected in the parser state,
11464 normally resulting in a single exception at the top level of parsing
11465 which covers all the compilation errors that occurred.  Some compilation
11466 errors, however, will throw an exception immediately.
11467
11468 =cut
11469 */
11470
11471 OP *
11472 Perl_parse_termexpr(pTHX_ U32 flags)
11473 {
11474     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11475 }
11476
11477 /*
11478 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11479
11480 Parse a Perl list expression.  This may contain operators of precedence
11481 down to the comma operator.  The expression must be followed (and thus
11482 terminated) either by a low-precedence logic operator such as C<or> or by
11483 something that would normally terminate an expression such as semicolon.
11484 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11485 otherwise it is mandatory.  It is up to the caller to ensure that the
11486 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11487 the source of the code to be parsed and the lexical context for the
11488 expression.
11489
11490 The op tree representing the expression is returned.  If an optional
11491 expression is absent, a null pointer is returned, otherwise the pointer
11492 will be non-null.
11493
11494 If an error occurs in parsing or compilation, in most cases a valid op
11495 tree is returned anyway.  The error is reflected in the parser state,
11496 normally resulting in a single exception at the top level of parsing
11497 which covers all the compilation errors that occurred.  Some compilation
11498 errors, however, will throw an exception immediately.
11499
11500 =cut
11501 */
11502
11503 OP *
11504 Perl_parse_listexpr(pTHX_ U32 flags)
11505 {
11506     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11507 }
11508
11509 /*
11510 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11511
11512 Parse a single complete Perl expression.  This allows the full
11513 expression grammar, including the lowest-precedence operators such
11514 as C<or>.  The expression must be followed (and thus terminated) by a
11515 token that an expression would normally be terminated by: end-of-file,
11516 closing bracketing punctuation, semicolon, or one of the keywords that
11517 signals a postfix expression-statement modifier.  If I<flags> includes
11518 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11519 mandatory.  It is up to the caller to ensure that the dynamic parser
11520 state (L</PL_parser> et al) is correctly set to reflect the source of
11521 the code to be parsed and the lexical context for the expression.
11522
11523 The op tree representing the expression is returned.  If an optional
11524 expression is absent, a null pointer is returned, otherwise the pointer
11525 will be non-null.
11526
11527 If an error occurs in parsing or compilation, in most cases a valid op
11528 tree is returned anyway.  The error is reflected in the parser state,
11529 normally resulting in a single exception at the top level of parsing
11530 which covers all the compilation errors that occurred.  Some compilation
11531 errors, however, will throw an exception immediately.
11532
11533 =cut
11534 */
11535
11536 OP *
11537 Perl_parse_fullexpr(pTHX_ U32 flags)
11538 {
11539     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11540 }
11541
11542 /*
11543 =for apidoc Amx|OP *|parse_block|U32 flags
11544
11545 Parse a single complete Perl code block.  This consists of an opening
11546 brace, a sequence of statements, and a closing brace.  The block
11547 constitutes a lexical scope, so C<my> variables and various compile-time
11548 effects can be contained within it.  It is up to the caller to ensure
11549 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11550 reflect the source of the code to be parsed and the lexical context for
11551 the statement.
11552
11553 The op tree representing the code block is returned.  This is always a
11554 real op, never a null pointer.  It will normally be a C<lineseq> list,
11555 including C<nextstate> or equivalent ops.  No ops to construct any kind
11556 of runtime scope are included by virtue of it being a block.
11557
11558 If an error occurs in parsing or compilation, in most cases a valid op
11559 tree (most likely null) is returned anyway.  The error is reflected in
11560 the parser state, normally resulting in a single exception at the top
11561 level of parsing which covers all the compilation errors that occurred.
11562 Some compilation errors, however, will throw an exception immediately.
11563
11564 The I<flags> parameter is reserved for future use, and must always
11565 be zero.
11566
11567 =cut
11568 */
11569
11570 OP *
11571 Perl_parse_block(pTHX_ U32 flags)
11572 {
11573     if (flags)
11574         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11575     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11576 }
11577
11578 /*
11579 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11580
11581 Parse a single unadorned Perl statement.  This may be a normal imperative
11582 statement or a declaration that has compile-time effect.  It does not
11583 include any label or other affixture.  It is up to the caller to ensure
11584 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11585 reflect the source of the code to be parsed and the lexical context for
11586 the statement.
11587
11588 The op tree representing the statement is returned.  This may be a
11589 null pointer if the statement is null, for example if it was actually
11590 a subroutine definition (which has compile-time side effects).  If not
11591 null, it will be ops directly implementing the statement, suitable to
11592 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11593 equivalent op (except for those embedded in a scope contained entirely
11594 within the statement).
11595
11596 If an error occurs in parsing or compilation, in most cases a valid op
11597 tree (most likely null) is returned anyway.  The error is reflected in
11598 the parser state, normally resulting in a single exception at the top
11599 level of parsing which covers all the compilation errors that occurred.
11600 Some compilation errors, however, will throw an exception immediately.
11601
11602 The I<flags> parameter is reserved for future use, and must always
11603 be zero.
11604
11605 =cut
11606 */
11607
11608 OP *
11609 Perl_parse_barestmt(pTHX_ U32 flags)
11610 {
11611     if (flags)
11612         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11613     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11614 }
11615
11616 /*
11617 =for apidoc Amx|SV *|parse_label|U32 flags
11618
11619 Parse a single label, possibly optional, of the type that may prefix a
11620 Perl statement.  It is up to the caller to ensure that the dynamic parser
11621 state (L</PL_parser> et al) is correctly set to reflect the source of
11622 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11623 label is optional, otherwise it is mandatory.
11624
11625 The name of the label is returned in the form of a fresh scalar.  If an
11626 optional label is absent, a null pointer is returned.
11627
11628 If an error occurs in parsing, which can only occur if the label is
11629 mandatory, a valid label is returned anyway.  The error is reflected in
11630 the parser state, normally resulting in a single exception at the top
11631 level of parsing which covers all the compilation errors that occurred.
11632
11633 =cut
11634 */
11635
11636 SV *
11637 Perl_parse_label(pTHX_ U32 flags)
11638 {
11639     if (flags & ~PARSE_OPTIONAL)
11640         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11641     if (PL_lex_state == LEX_KNOWNEXT) {
11642         PL_parser->yychar = yylex();
11643         if (PL_parser->yychar == LABEL) {
11644             SV *lsv;
11645             PL_parser->yychar = YYEMPTY;
11646             lsv = newSV_type(SVt_PV);
11647             sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
11648             return lsv;
11649         } else {
11650             yyunlex();
11651             goto no_label;
11652         }
11653     } else {
11654         char *s, *t;
11655         STRLEN wlen, bufptr_pos;
11656         lex_read_space(0);
11657         t = s = PL_bufptr;
11658         if (!isIDFIRST_lazy_if(s, UTF))
11659             goto no_label;
11660         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11661         if (word_takes_any_delimeter(s, wlen))
11662             goto no_label;
11663         bufptr_pos = s - SvPVX(PL_linestr);
11664         PL_bufptr = t;
11665         lex_read_space(LEX_KEEP_PREVIOUS);
11666         t = PL_bufptr;
11667         s = SvPVX(PL_linestr) + bufptr_pos;
11668         if (t[0] == ':' && t[1] != ':') {
11669             PL_oldoldbufptr = PL_oldbufptr;
11670             PL_oldbufptr = s;
11671             PL_bufptr = t+1;
11672             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11673         } else {
11674             PL_bufptr = s;
11675             no_label:
11676             if (flags & PARSE_OPTIONAL) {
11677                 return NULL;
11678             } else {
11679                 qerror(Perl_mess(aTHX_ "Parse error"));
11680                 return newSVpvs("x");
11681             }
11682         }
11683     }
11684 }
11685
11686 /*
11687 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11688
11689 Parse a single complete Perl statement.  This may be a normal imperative
11690 statement or a declaration that has compile-time effect, and may include
11691 optional labels.  It is up to the caller to ensure that the dynamic
11692 parser state (L</PL_parser> et al) is correctly set to reflect the source
11693 of the code to be parsed and the lexical context for the statement.
11694
11695 The op tree representing the statement is returned.  This may be a
11696 null pointer if the statement is null, for example if it was actually
11697 a subroutine definition (which has compile-time side effects).  If not
11698 null, it will be the result of a L</newSTATEOP> call, normally including
11699 a C<nextstate> or equivalent op.
11700
11701 If an error occurs in parsing or compilation, in most cases a valid op
11702 tree (most likely null) is returned anyway.  The error is reflected in
11703 the parser state, normally resulting in a single exception at the top
11704 level of parsing which covers all the compilation errors that occurred.
11705 Some compilation errors, however, will throw an exception immediately.
11706
11707 The I<flags> parameter is reserved for future use, and must always
11708 be zero.
11709
11710 =cut
11711 */
11712
11713 OP *
11714 Perl_parse_fullstmt(pTHX_ U32 flags)
11715 {
11716     if (flags)
11717         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11718     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11719 }
11720
11721 /*
11722 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11723
11724 Parse a sequence of zero or more Perl statements.  These may be normal
11725 imperative statements, including optional labels, or declarations
11726 that have compile-time effect, or any mixture thereof.  The statement
11727 sequence ends when a closing brace or end-of-file is encountered in a
11728 place where a new statement could have validly started.  It is up to
11729 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11730 is correctly set to reflect the source of the code to be parsed and the
11731 lexical context for the statements.
11732
11733 The op tree representing the statement sequence is returned.  This may
11734 be a null pointer if the statements were all null, for example if there
11735 were no statements or if there were only subroutine definitions (which
11736 have compile-time side effects).  If not null, it will be a C<lineseq>
11737 list, normally including C<nextstate> or equivalent ops.
11738
11739 If an error occurs in parsing or compilation, in most cases a valid op
11740 tree is returned anyway.  The error is reflected in the parser state,
11741 normally resulting in a single exception at the top level of parsing
11742 which covers all the compilation errors that occurred.  Some compilation
11743 errors, however, will throw an exception immediately.
11744
11745 The I<flags> parameter is reserved for future use, and must always
11746 be zero.
11747
11748 =cut
11749 */
11750
11751 OP *
11752 Perl_parse_stmtseq(pTHX_ U32 flags)
11753 {
11754     OP *stmtseqop;
11755     I32 c;
11756     if (flags)
11757         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11758     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11759     c = lex_peek_unichar(0);
11760     if (c != -1 && c != /*{*/'}')
11761         qerror(Perl_mess(aTHX_ "Parse error"));
11762     return stmtseqop;
11763 }
11764
11765 /*
11766  * Local variables:
11767  * c-indentation-style: bsd
11768  * c-basic-offset: 4
11769  * indent-tabs-mode: nil
11770  * End:
11771  *
11772  * ex: set ts=8 sts=4 sw=4 et:
11773  */