toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XENUMMASK  0x3f
 130 #define XFAKEEOF   0x40
 131 #define XFAKEBRACK 0x80
 132
 133 #ifdef USE_UTF8_SCRIPTS
 134 #   define UTF (!IN_BYTES)
 135 #else
 136 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 137 #endif
 138
 139 /* The maximum number of characters preceding the unrecognized one to display */
 140 #define UNRECOGNIZED_PRECEDE_COUNT 10
 141
 142 /* In variables named $^X, these are the legal values for X.
 143  * 1999-02-27 mjd-perl-patch@plover.com */
 144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 145
 146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 147
 148 /* LEX_* are values for PL_lex_state, the state of the lexer.
 149  * They are arranged oddly so that the guard on the switch statement
 150  * can get by with a single comparison (if the compiler is smart enough).
 151  */
 152
 153 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 154
 155 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 156 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 157 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 158 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 159 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 160
 161                                    /* at end of code, eg "$x" followed by:  */
 162 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 163 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 164
 165 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 166                                         string or after \E, $foo, etc       */
 167 #define LEX_INTERPCONST          2 /* NOT USED */
 168 #define LEX_FORMLINE             1 /* expecting a format line               */
 169 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 170
 171
 172 #ifdef DEBUGGING
 173 static const char* const lex_state_names[] = {
 174     "KNOWNEXT",
 175     "FORMLINE",
 176     "INTERPCONST",
 177     "INTERPCONCAT",
 178     "INTERPENDMAYBE",
 179     "INTERPEND",
 180     "INTERPSTART",
 181     "INTERPPUSH",
 182     "INTERPCASEMOD",
 183     "INTERPNORMAL",
 184     "NORMAL"
 185 };
 186 #endif
 187
 188 #ifdef ff_next
 189 #undef ff_next
 190 #endif
 191
 192 #include "keywords.h"
 193
 194 /* CLINE is a macro that ensures PL_copline has a sane value */
 195
 196 #ifdef CLINE
 197 #undef CLINE
 198 #endif
 199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 200
 201 #ifdef PERL_MAD
 202 #  define SKIPSPACE0(s) skipspace0(s)
 203 #  define SKIPSPACE1(s) skipspace1(s)
 204 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 205 #  define PEEKSPACE(s) skipspace2(s,0)
 206 #else
 207 #  define SKIPSPACE0(s) skipspace(s)
 208 #  define SKIPSPACE1(s) skipspace(s)
 209 #  define SKIPSPACE2(s,tsv) skipspace(s)
 210 #  define PEEKSPACE(s) skipspace(s)
 211 #endif
 212
 213 /*
 214  * Convenience functions to return different tokens and prime the
 215  * lexer for the next token.  They all take an argument.
 216  *
 217  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 218  * OPERATOR     : generic operator
 219  * AOPERATOR    : assignment operator
 220  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 221  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 222  * PREREF       : *EXPR where EXPR is not a simple identifier
 223  * TERM         : expression term
 224  * LOOPX        : loop exiting command (goto, last, dump, etc)
 225  * FTST         : file test operator
 226  * FUN0         : zero-argument function
 227  * FUN0OP       : zero-argument function, with its op created in this file
 228  * FUN1         : not used, except for not, which isn't a UNIOP
 229  * BOop         : bitwise or or xor
 230  * BAop         : bitwise and
 231  * SHop         : shift operator
 232  * PWop         : power operator
 233  * PMop         : pattern-matching operator
 234  * Aop          : addition-level operator
 235  * Mop          : multiplication-level operator
 236  * Eop          : equality-testing operator
 237  * Rop          : relational operator <= != gt
 238  *
 239  * Also see LOP and lop() below.
 240  */
 241
 242 #ifdef DEBUGGING /* Serve -DT. */
 243 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 244 #else
 245 #   define REPORT(retval) (retval)
 246 #endif
 247
 248 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 249 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 250 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 251 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 252 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 253 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 254 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 255 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 256 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 257 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 258 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 259 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 260 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 261 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 262 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 263 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 264 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 265 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 266 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 267 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 268 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 269
 270 /* This bit of chicanery makes a unary function followed by
 271  * a parenthesis into a function with one argument, highest precedence.
 272  * The UNIDOR macro is for unary functions that can be followed by the //
 273  * operator (such as C<shift // 0>).
 274  */
 275 #define UNI2(f,x) { \
 276         pl_yylval.ival = f; \
 277         PL_expect = x; \
 278         PL_bufptr = s; \
 279         PL_last_uni = PL_oldbufptr; \
 280         PL_last_lop_op = f; \
 281         if (*s == '(') \
 282             return REPORT( (int)FUNC1 ); \
 283         s = PEEKSPACE(s); \
 284         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 285         }
 286 #define UNI(f)    UNI2(f,XTERM)
 287 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 288
 289 #define UNIBRACK(f) { \
 290         pl_yylval.ival = f; \
 291         PL_bufptr = s; \
 292         PL_last_uni = PL_oldbufptr; \
 293         if (*s == '(') \
 294             return REPORT( (int)FUNC1 ); \
 295         s = PEEKSPACE(s); \
 296         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 297         }
 298
 299 /* grandfather return to old style */
 300 #define OLDLOP(f) \
 301         do { \
 302             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 303                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 304             pl_yylval.ival = (f); \
 305             PL_expect = XTERM; \
 306             PL_bufptr = s; \
 307             return (int)LSTOP; \
 308         } while(0)
 309
 310 #ifdef DEBUGGING
 311
 312 /* how to interpret the pl_yylval associated with the token */
 313 enum token_type {
 314     TOKENTYPE_NONE,
 315     TOKENTYPE_IVAL,
 316     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 317     TOKENTYPE_PVAL,
 318     TOKENTYPE_OPVAL,
 319     TOKENTYPE_GVVAL
 320 };
 321
 322 static struct debug_tokens {
 323     const int token;
 324     enum token_type type;
 325     const char *name;
 326 } const debug_tokens[] =
 327 {
 328     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 329     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 330     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 331     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 332     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 333     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 334     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 335     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 336     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 337     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 338     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 339     { DO,               TOKENTYPE_NONE,         "DO" },
 340     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 341     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 342     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 343     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 344     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 345     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 346     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 347     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 348     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 349     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 350     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 351     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 352     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 353     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 354     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 355     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 356     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 357     { IF,               TOKENTYPE_IVAL,         "IF" },
 358     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 359     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 360     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 361     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 362     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 363     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 364     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 365     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 366     { MY,               TOKENTYPE_IVAL,         "MY" },
 367     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 368     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 369     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 370     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 371     { OROR,             TOKENTYPE_NONE,         "OROR" },
 372     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 373     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 374     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 375     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 376     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 377     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 378     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 379     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 380     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 381     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 382     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 383     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 384     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 385     { SUB,              TOKENTYPE_NONE,         "SUB" },
 386     { THING,            TOKENTYPE_OPVAL,        "THING" },
 387     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 388     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 389     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 390     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 391     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 392     { USE,              TOKENTYPE_IVAL,         "USE" },
 393     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 394     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 395     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 396     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 397     { 0,                TOKENTYPE_NONE,         NULL }
 398 };
 399
 400 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 401
 402 STATIC int
 403 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 404 {
 405     dVAR;
 406
 407     PERL_ARGS_ASSERT_TOKEREPORT;
 408
 409     if (DEBUG_T_TEST) {
 410         const char *name = NULL;
 411         enum token_type type = TOKENTYPE_NONE;
 412         const struct debug_tokens *p;
 413         SV* const report = newSVpvs("<== ");
 414
 415         for (p = debug_tokens; p->token; p++) {
 416             if (p->token == (int)rv) {
 417                 name = p->name;
 418                 type = p->type;
 419                 break;
 420             }
 421         }
 422         if (name)
 423             Perl_sv_catpv(aTHX_ report, name);
 424         else if ((char)rv > ' ' && (char)rv < '~')
 425             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 426         else if (!rv)
 427             sv_catpvs(report, "EOF");
 428         else
 429             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 430         switch (type) {
 431         case TOKENTYPE_NONE:
 432         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 433             break;
 434         case TOKENTYPE_IVAL:
 435             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 436             break;
 437         case TOKENTYPE_OPNUM:
 438             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 439                                     PL_op_name[lvalp->ival]);
 440             break;
 441         case TOKENTYPE_PVAL:
 442             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 443             break;
 444         case TOKENTYPE_OPVAL:
 445             if (lvalp->opval) {
 446                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 447                                     PL_op_name[lvalp->opval->op_type]);
 448                 if (lvalp->opval->op_type == OP_CONST) {
 449                     Perl_sv_catpvf(aTHX_ report, " %s",
 450                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 451                 }
 452
 453             }
 454             else
 455                 sv_catpvs(report, "(opval=null)");
 456             break;
 457         }
 458         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 459     };
 460     return (int)rv;
 461 }
 462
 463
 464 /* print the buffer with suitable escapes */
 465
 466 STATIC void
 467 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 468 {
 469     SV* const tmp = newSVpvs("");
 470
 471     PERL_ARGS_ASSERT_PRINTBUF;
 472
 473     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 474     SvREFCNT_dec(tmp);
 475 }
 476
 477 #endif
 478
 479 static int
 480 S_deprecate_commaless_var_list(pTHX) {
 481     PL_expect = XTERM;
 482     deprecate("comma-less variable list");
 483     return REPORT(','); /* grandfather non-comma-format format */
 484 }
 485
 486 /*
 487  * S_ao
 488  *
 489  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 490  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 491  */
 492
 493 STATIC int
 494 S_ao(pTHX_ int toketype)
 495 {
 496     dVAR;
 497     if (*PL_bufptr == '=') {
 498         PL_bufptr++;
 499         if (toketype == ANDAND)
 500             pl_yylval.ival = OP_ANDASSIGN;
 501         else if (toketype == OROR)
 502             pl_yylval.ival = OP_ORASSIGN;
 503         else if (toketype == DORDOR)
 504             pl_yylval.ival = OP_DORASSIGN;
 505         toketype = ASSIGNOP;
 506     }
 507     return toketype;
 508 }
 509
 510 /*
 511  * S_no_op
 512  * When Perl expects an operator and finds something else, no_op
 513  * prints the warning.  It always prints "<something> found where
 514  * operator expected.  It prints "Missing semicolon on previous line?"
 515  * if the surprise occurs at the start of the line.  "do you need to
 516  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 517  * where the compiler doesn't know if foo is a method call or a function.
 518  * It prints "Missing operator before end of line" if there's nothing
 519  * after the missing operator, or "... before <...>" if there is something
 520  * after the missing operator.
 521  */
 522
 523 STATIC void
 524 S_no_op(pTHX_ const char *const what, char *s)
 525 {
 526     dVAR;
 527     char * const oldbp = PL_bufptr;
 528     const bool is_first = (PL_oldbufptr == PL_linestart);
 529
 530     PERL_ARGS_ASSERT_NO_OP;
 531
 532     if (!s)
 533         s = oldbp;
 534     else
 535         PL_bufptr = s;
 536     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 537     if (ckWARN_d(WARN_SYNTAX)) {
 538         if (is_first)
 539             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 540                     "\t(Missing semicolon on previous line?)\n");
 541         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 542             const char *t;
 543             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 544                 NOOP;
 545             if (t < PL_bufptr && isSPACE(*t))
 546                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 547                         "\t(Do you need to predeclare %.*s?)\n",
 548                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 549         }
 550         else {
 551             assert(s >= oldbp);
 552             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 553                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 554         }
 555     }
 556     PL_bufptr = oldbp;
 557 }
 558
 559 /*
 560  * S_missingterm
 561  * Complain about missing quote/regexp/heredoc terminator.
 562  * If it's called with NULL then it cauterizes the line buffer.
 563  * If we're in a delimited string and the delimiter is a control
 564  * character, it's reformatted into a two-char sequence like ^C.
 565  * This is fatal.
 566  */
 567
 568 STATIC void
 569 S_missingterm(pTHX_ char *s)
 570 {
 571     dVAR;
 572     char tmpbuf[3];
 573     char q;
 574     if (s) {
 575         char * const nl = strrchr(s,'\n');
 576         if (nl)
 577             *nl = '\0';
 578     }
 579     else if (isCNTRL(PL_multi_close)) {
 580         *tmpbuf = '^';
 581         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 582         tmpbuf[2] = '\0';
 583         s = tmpbuf;
 584     }
 585     else {
 586         *tmpbuf = (char)PL_multi_close;
 587         tmpbuf[1] = '\0';
 588         s = tmpbuf;
 589     }
 590     q = strchr(s,'"') ? '\'' : '"';
 591     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 592 }
 593
 594 /*
 595  * Check whether the named feature is enabled.
 596  */
 597 bool
 598 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 599 {
 600     dVAR;
 601     HV * const hinthv = GvHV(PL_hintgv);
 602     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 603
 604     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 605
 606     if (namelen > MAX_FEATURE_LEN)
 607         return FALSE;
 608     memcpy(&he_name[8], name, namelen);
 609
 610     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 611 }
 612
 613 /*
 614  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 615  * utf16-to-utf8-reversed.
 616  */
 617
 618 #ifdef PERL_CR_FILTER
 619 static void
 620 strip_return(SV *sv)
 621 {
 622     register const char *s = SvPVX_const(sv);
 623     register const char * const e = s + SvCUR(sv);
 624
 625     PERL_ARGS_ASSERT_STRIP_RETURN;
 626
 627     /* outer loop optimized to do nothing if there are no CR-LFs */
 628     while (s < e) {
 629         if (*s++ == '\r' && *s == '\n') {
 630             /* hit a CR-LF, need to copy the rest */
 631             register char *d = s - 1;
 632             *d++ = *s++;
 633             while (s < e) {
 634                 if (*s == '\r' && s[1] == '\n')
 635                     s++;
 636                 *d++ = *s++;
 637             }
 638             SvCUR(sv) -= s - d;
 639             return;
 640         }
 641     }
 642 }
 643
 644 STATIC I32
 645 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 646 {
 647     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 648     if (count > 0 && !maxlen)
 649         strip_return(sv);
 650     return count;
 651 }
 652 #endif
 653
 654 /*
 655 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 656
 657 Creates and initialises a new lexer/parser state object, supplying
 658 a context in which to lex and parse from a new source of Perl code.
 659 A pointer to the new state object is placed in L</PL_parser>.  An entry
 660 is made on the save stack so that upon unwinding the new state object
 661 will be destroyed and the former value of L</PL_parser> will be restored.
 662 Nothing else need be done to clean up the parsing context.
 663
 664 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 665 non-null, provides a string (in SV form) containing code to be parsed.
 666 A copy of the string is made, so subsequent modification of I<line>
 667 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 668 from which code will be read to be parsed.  If both are non-null, the
 669 code in I<line> comes first and must consist of complete lines of input,
 670 and I<rsfp> supplies the remainder of the source.
 671
 672 The I<flags> parameter is reserved for future use, and must always
 673 be zero, except for one flag that is currently reserved for perl's internal
 674 use.
 675
 676 =cut
 677 */
 678
 679 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 680    can share filters with the current parser. */
 681
 682 void
 683 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 684 {
 685     dVAR;
 686     const char *s = NULL;
 687     STRLEN len;
 688     yy_parser *parser, *oparser;
 689     if (flags && flags != LEX_START_SAME_FILTER)
 690         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 691
 692     /* create and initialise a parser */
 693
 694     Newxz(parser, 1, yy_parser);
 695     parser->old_parser = oparser = PL_parser;
 696     PL_parser = parser;
 697
 698     parser->stack = NULL;
 699     parser->ps = NULL;
 700     parser->stack_size = 0;
 701
 702     /* on scope exit, free this parser and restore any outer one */
 703     SAVEPARSER(parser);
 704     parser->saved_curcop = PL_curcop;
 705
 706     /* initialise lexer state */
 707
 708 #ifdef PERL_MAD
 709     parser->curforce = -1;
 710 #else
 711     parser->nexttoke = 0;
 712 #endif
 713     parser->error_count = oparser ? oparser->error_count : 0;
 714     parser->copline = NOLINE;
 715     parser->lex_state = LEX_NORMAL;
 716     parser->expect = XSTATE;
 717     parser->rsfp = rsfp;
 718     parser->rsfp_filters =
 719       !(flags & LEX_START_SAME_FILTER) || !oparser
 720         ? newAV()
 721         : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 722
 723     Newx(parser->lex_brackstack, 120, char);
 724     Newx(parser->lex_casestack, 12, char);
 725     *parser->lex_casestack = '\0';
 726
 727     if (line) {
 728         s = SvPV_const(line, len);
 729     } else {
 730         len = 0;
 731     }
 732
 733     if (!len) {
 734         parser->linestr = newSVpvs("\n;");
 735     } else {
 736         parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
 737         if (s[len-1] != ';')
 738             sv_catpvs(parser->linestr, "\n;");
 739     }
 740     parser->oldoldbufptr =
 741         parser->oldbufptr =
 742         parser->bufptr =
 743         parser->linestart = SvPVX(parser->linestr);
 744     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 745     parser->last_lop = parser->last_uni = NULL;
 746
 747     parser->in_pod = 0;
 748 }
 749
 750
 751 /* delete a parser object */
 752
 753 void
 754 Perl_parser_free(pTHX_  const yy_parser *parser)
 755 {
 756     PERL_ARGS_ASSERT_PARSER_FREE;
 757
 758     PL_curcop = parser->saved_curcop;
 759     SvREFCNT_dec(parser->linestr);
 760
 761     if (parser->rsfp == PerlIO_stdin())
 762         PerlIO_clearerr(parser->rsfp);
 763     else if (parser->rsfp && (!parser->old_parser ||
 764                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 765         PerlIO_close(parser->rsfp);
 766     SvREFCNT_dec(parser->rsfp_filters);
 767
 768     Safefree(parser->lex_brackstack);
 769     Safefree(parser->lex_casestack);
 770     PL_parser = parser->old_parser;
 771     Safefree(parser);
 772 }
 773
 774
 775 /*
 776 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 777
 778 Buffer scalar containing the chunk currently under consideration of the
 779 text currently being lexed.  This is always a plain string scalar (for
 780 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 781 normal scalar means; instead refer to the buffer directly by the pointer
 782 variables described below.
 783
 784 The lexer maintains various C<char*> pointers to things in the
 785 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 786 reallocated, all of these pointers must be updated.  Don't attempt to
 787 do this manually, but rather use L</lex_grow_linestr> if you need to
 788 reallocate the buffer.
 789
 790 The content of the text chunk in the buffer is commonly exactly one
 791 complete line of input, up to and including a newline terminator,
 792 but there are situations where it is otherwise.  The octets of the
 793 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 794 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 795 flag on this scalar, which may disagree with it.
 796
 797 For direct examination of the buffer, the variable
 798 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 799 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 800 of these pointers is usually preferable to examination of the scalar
 801 through normal scalar means.
 802
 803 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 804
 805 Direct pointer to the end of the chunk of text currently being lexed, the
 806 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 807 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 808 always located at the end of the buffer, and does not count as part of
 809 the buffer's contents.
 810
 811 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 812
 813 Points to the current position of lexing inside the lexer buffer.
 814 Characters around this point may be freely examined, within
 815 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 816 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 817 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 818
 819 Lexing code (whether in the Perl core or not) moves this pointer past
 820 the characters that it consumes.  It is also expected to perform some
 821 bookkeeping whenever a newline character is consumed.  This movement
 822 can be more conveniently performed by the function L</lex_read_to>,
 823 which handles newlines appropriately.
 824
 825 Interpretation of the buffer's octets can be abstracted out by
 826 using the slightly higher-level functions L</lex_peek_unichar> and
 827 L</lex_read_unichar>.
 828
 829 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 830
 831 Points to the start of the current line inside the lexer buffer.
 832 This is useful for indicating at which column an error occurred, and
 833 not much else.  This must be updated by any lexing code that consumes
 834 a newline; the function L</lex_read_to> handles this detail.
 835
 836 =cut
 837 */
 838
 839 /*
 840 =for apidoc Amx|bool|lex_bufutf8
 841
 842 Indicates whether the octets in the lexer buffer
 843 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 844 of Unicode characters.  If not, they should be interpreted as Latin-1
 845 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 846
 847 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 848 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 849 encoding.
 850
 851 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 852 is significant, but not the whole story regarding the input character
 853 encoding.  Normally, when a file is being read, the scalar contains octets
 854 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 855 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 856 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 857 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 858 is in effect.  This logic may change in the future; use this function
 859 instead of implementing the logic yourself.
 860
 861 =cut
 862 */
 863
 864 bool
 865 Perl_lex_bufutf8(pTHX)
 866 {
 867     return UTF;
 868 }
 869
 870 /*
 871 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 872
 873 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 874 at least I<len> octets (including terminating NUL).  Returns a
 875 pointer to the reallocated buffer.  This is necessary before making
 876 any direct modification of the buffer that would increase its length.
 877 L</lex_stuff_pvn> provides a more convenient way to insert text into
 878 the buffer.
 879
 880 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 881 this function updates all of the lexer's variables that point directly
 882 into the buffer.
 883
 884 =cut
 885 */
 886
 887 char *
 888 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 889 {
 890     SV *linestr;
 891     char *buf;
 892     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 893     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 894     linestr = PL_parser->linestr;
 895     buf = SvPVX(linestr);
 896     if (len <= SvLEN(linestr))
 897         return buf;
 898     bufend_pos = PL_parser->bufend - buf;
 899     bufptr_pos = PL_parser->bufptr - buf;
 900     oldbufptr_pos = PL_parser->oldbufptr - buf;
 901     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 902     linestart_pos = PL_parser->linestart - buf;
 903     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 904     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 905     buf = sv_grow(linestr, len);
 906     PL_parser->bufend = buf + bufend_pos;
 907     PL_parser->bufptr = buf + bufptr_pos;
 908     PL_parser->oldbufptr = buf + oldbufptr_pos;
 909     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 910     PL_parser->linestart = buf + linestart_pos;
 911     if (PL_parser->last_uni)
 912         PL_parser->last_uni = buf + last_uni_pos;
 913     if (PL_parser->last_lop)
 914         PL_parser->last_lop = buf + last_lop_pos;
 915     return buf;
 916 }
 917
 918 /*
 919 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 920
 921 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 922 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 923 reallocating the buffer if necessary.  This means that lexing code that
 924 runs later will see the characters as if they had appeared in the input.
 925 It is not recommended to do this as part of normal parsing, and most
 926 uses of this facility run the risk of the inserted characters being
 927 interpreted in an unintended manner.
 928
 929 The string to be inserted is represented by I<len> octets starting
 930 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 931 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 932 The characters are recoded for the lexer buffer, according to how the
 933 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 934 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 935 function is more convenient.
 936
 937 =cut
 938 */
 939
 940 void
 941 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 942 {
 943     dVAR;
 944     char *bufptr;
 945     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 946     if (flags & ~(LEX_STUFF_UTF8))
 947         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 948     if (UTF) {
 949         if (flags & LEX_STUFF_UTF8) {
 950             goto plain_copy;
 951         } else {
 952             STRLEN highhalf = 0;
 953             const char *p, *e = pv+len;
 954             for (p = pv; p != e; p++)
 955                 highhalf += !!(((U8)*p) & 0x80);
 956             if (!highhalf)
 957                 goto plain_copy;
 958             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 959             bufptr = PL_parser->bufptr;
 960             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 961             SvCUR_set(PL_parser->linestr,
 962                 SvCUR(PL_parser->linestr) + len+highhalf);
 963             PL_parser->bufend += len+highhalf;
 964             for (p = pv; p != e; p++) {
 965                 U8 c = (U8)*p;
 966                 if (c & 0x80) {
 967                     *bufptr++ = (char)(0xc0 | (c >> 6));
 968                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 969                 } else {
 970                     *bufptr++ = (char)c;
 971                 }
 972             }
 973         }
 974     } else {
 975         if (flags & LEX_STUFF_UTF8) {
 976             STRLEN highhalf = 0;
 977             const char *p, *e = pv+len;
 978             for (p = pv; p != e; p++) {
 979                 U8 c = (U8)*p;
 980                 if (c >= 0xc4) {
 981                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 982                                 "non-Latin-1 character into Latin-1 input");
 983                 } else if (c >= 0xc2 && p+1 != e &&
 984                             (((U8)p[1]) & 0xc0) == 0x80) {
 985                     p++;
 986                     highhalf++;
 987                 } else if (c >= 0x80) {
 988                     /* malformed UTF-8 */
 989                     ENTER;
 990                     SAVESPTR(PL_warnhook);
 991                     PL_warnhook = PERL_WARNHOOK_FATAL;
 992                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 993                     LEAVE;
 994                 }
 995             }
 996             if (!highhalf)
 997                 goto plain_copy;
 998             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 999             bufptr = PL_parser->bufptr;
1000             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1001             SvCUR_set(PL_parser->linestr,
1002                 SvCUR(PL_parser->linestr) + len-highhalf);
1003             PL_parser->bufend += len-highhalf;
1004             for (p = pv; p != e; p++) {
1005                 U8 c = (U8)*p;
1006                 if (c & 0x80) {
1007                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1008                     p++;
1009                 } else {
1010                     *bufptr++ = (char)c;
1011                 }
1012             }
1013         } else {
1014             plain_copy:
1015             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1016             bufptr = PL_parser->bufptr;
1017             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1018             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1019             PL_parser->bufend += len;
1020             Copy(pv, bufptr, len, char);
1021         }
1022     }
1023 }
1024
1025 /*
1026 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1027
1028 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1029 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1030 reallocating the buffer if necessary.  This means that lexing code that
1031 runs later will see the characters as if they had appeared in the input.
1032 It is not recommended to do this as part of normal parsing, and most
1033 uses of this facility run the risk of the inserted characters being
1034 interpreted in an unintended manner.
1035
1036 The string to be inserted is represented by octets starting at I<pv>
1037 and continuing to the first nul.  These octets are interpreted as either
1038 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1039 in I<flags>.  The characters are recoded for the lexer buffer, according
1040 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1041 If it is not convenient to nul-terminate a string to be inserted, the
1042 L</lex_stuff_pvn> function is more appropriate.
1043
1044 =cut
1045 */
1046
1047 void
1048 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1049 {
1050     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1051     lex_stuff_pvn(pv, strlen(pv), flags);
1052 }
1053
1054 /*
1055 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1056
1057 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1058 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1059 reallocating the buffer if necessary.  This means that lexing code that
1060 runs later will see the characters as if they had appeared in the input.
1061 It is not recommended to do this as part of normal parsing, and most
1062 uses of this facility run the risk of the inserted characters being
1063 interpreted in an unintended manner.
1064
1065 The string to be inserted is the string value of I<sv>.  The characters
1066 are recoded for the lexer buffer, according to how the buffer is currently
1067 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1068 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1069 need to construct a scalar.
1070
1071 =cut
1072 */
1073
1074 void
1075 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1076 {
1077     char *pv;
1078     STRLEN len;
1079     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1080     if (flags)
1081         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1082     pv = SvPV(sv, len);
1083     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1084 }
1085
1086 /*
1087 =for apidoc Amx|void|lex_unstuff|char *ptr
1088
1089 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1090 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1091 This hides the discarded text from any lexing code that runs later,
1092 as if the text had never appeared.
1093
1094 This is not the normal way to consume lexed text.  For that, use
1095 L</lex_read_to>.
1096
1097 =cut
1098 */
1099
1100 void
1101 Perl_lex_unstuff(pTHX_ char *ptr)
1102 {
1103     char *buf, *bufend;
1104     STRLEN unstuff_len;
1105     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1106     buf = PL_parser->bufptr;
1107     if (ptr < buf)
1108         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1109     if (ptr == buf)
1110         return;
1111     bufend = PL_parser->bufend;
1112     if (ptr > bufend)
1113         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1114     unstuff_len = ptr - buf;
1115     Move(ptr, buf, bufend+1-ptr, char);
1116     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1117     PL_parser->bufend = bufend - unstuff_len;
1118 }
1119
1120 /*
1121 =for apidoc Amx|void|lex_read_to|char *ptr
1122
1123 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1124 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1125 performing the correct bookkeeping whenever a newline character is passed.
1126 This is the normal way to consume lexed text.
1127
1128 Interpretation of the buffer's octets can be abstracted out by
1129 using the slightly higher-level functions L</lex_peek_unichar> and
1130 L</lex_read_unichar>.
1131
1132 =cut
1133 */
1134
1135 void
1136 Perl_lex_read_to(pTHX_ char *ptr)
1137 {
1138     char *s;
1139     PERL_ARGS_ASSERT_LEX_READ_TO;
1140     s = PL_parser->bufptr;
1141     if (ptr < s || ptr > PL_parser->bufend)
1142         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1143     for (; s != ptr; s++)
1144         if (*s == '\n') {
1145             CopLINE_inc(PL_curcop);
1146             PL_parser->linestart = s+1;
1147         }
1148     PL_parser->bufptr = ptr;
1149 }
1150
1151 /*
1152 =for apidoc Amx|void|lex_discard_to|char *ptr
1153
1154 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1155 up to I<ptr>.  The remaining content of the buffer will be moved, and
1156 all pointers into the buffer updated appropriately.  I<ptr> must not
1157 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1158 it is not permitted to discard text that has yet to be lexed.
1159
1160 Normally it is not necessarily to do this directly, because it suffices to
1161 use the implicit discarding behaviour of L</lex_next_chunk> and things
1162 based on it.  However, if a token stretches across multiple lines,
1163 and the lexing code has kept multiple lines of text in the buffer for
1164 that purpose, then after completion of the token it would be wise to
1165 explicitly discard the now-unneeded earlier lines, to avoid future
1166 multi-line tokens growing the buffer without bound.
1167
1168 =cut
1169 */
1170
1171 void
1172 Perl_lex_discard_to(pTHX_ char *ptr)
1173 {
1174     char *buf;
1175     STRLEN discard_len;
1176     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1177     buf = SvPVX(PL_parser->linestr);
1178     if (ptr < buf)
1179         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1180     if (ptr == buf)
1181         return;
1182     if (ptr > PL_parser->bufptr)
1183         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1184     discard_len = ptr - buf;
1185     if (PL_parser->oldbufptr < ptr)
1186         PL_parser->oldbufptr = ptr;
1187     if (PL_parser->oldoldbufptr < ptr)
1188         PL_parser->oldoldbufptr = ptr;
1189     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1190         PL_parser->last_uni = NULL;
1191     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1192         PL_parser->last_lop = NULL;
1193     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1194     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1195     PL_parser->bufend -= discard_len;
1196     PL_parser->bufptr -= discard_len;
1197     PL_parser->oldbufptr -= discard_len;
1198     PL_parser->oldoldbufptr -= discard_len;
1199     if (PL_parser->last_uni)
1200         PL_parser->last_uni -= discard_len;
1201     if (PL_parser->last_lop)
1202         PL_parser->last_lop -= discard_len;
1203 }
1204
1205 /*
1206 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1207
1208 Reads in the next chunk of text to be lexed, appending it to
1209 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1210 looked to the end of the current chunk and wants to know more.  It is
1211 usual, but not necessary, for lexing to have consumed the entirety of
1212 the current chunk at this time.
1213
1214 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1215 chunk (i.e., the current chunk has been entirely consumed), normally the
1216 current chunk will be discarded at the same time that the new chunk is
1217 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1218 will not be discarded.  If the current chunk has not been entirely
1219 consumed, then it will not be discarded regardless of the flag.
1220
1221 Returns true if some new text was added to the buffer, or false if the
1222 buffer has reached the end of the input text.
1223
1224 =cut
1225 */
1226
1227 #define LEX_FAKE_EOF 0x80000000
1228
1229 bool
1230 Perl_lex_next_chunk(pTHX_ U32 flags)
1231 {
1232     SV *linestr;
1233     char *buf;
1234     STRLEN old_bufend_pos, new_bufend_pos;
1235     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1236     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1237     bool got_some_for_debugger = 0;
1238     bool got_some;
1239     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1240         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1241     linestr = PL_parser->linestr;
1242     buf = SvPVX(linestr);
1243     if (!(flags & LEX_KEEP_PREVIOUS) &&
1244             PL_parser->bufptr == PL_parser->bufend) {
1245         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1246         linestart_pos = 0;
1247         if (PL_parser->last_uni != PL_parser->bufend)
1248             PL_parser->last_uni = NULL;
1249         if (PL_parser->last_lop != PL_parser->bufend)
1250             PL_parser->last_lop = NULL;
1251         last_uni_pos = last_lop_pos = 0;
1252         *buf = 0;
1253         SvCUR(linestr) = 0;
1254     } else {
1255         old_bufend_pos = PL_parser->bufend - buf;
1256         bufptr_pos = PL_parser->bufptr - buf;
1257         oldbufptr_pos = PL_parser->oldbufptr - buf;
1258         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1259         linestart_pos = PL_parser->linestart - buf;
1260         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1261         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1262     }
1263     if (flags & LEX_FAKE_EOF) {
1264         goto eof;
1265     } else if (!PL_parser->rsfp) {
1266         got_some = 0;
1267     } else if (filter_gets(linestr, old_bufend_pos)) {
1268         got_some = 1;
1269         got_some_for_debugger = 1;
1270     } else {
1271         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1272             sv_setpvs(linestr, "");
1273         eof:
1274         /* End of real input.  Close filehandle (unless it was STDIN),
1275          * then add implicit termination.
1276          */
1277         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1278             PerlIO_clearerr(PL_parser->rsfp);
1279         else if (PL_parser->rsfp)
1280             (void)PerlIO_close(PL_parser->rsfp);
1281         PL_parser->rsfp = NULL;
1282         PL_parser->in_pod = 0;
1283 #ifdef PERL_MAD
1284         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1285             PL_faketokens = 1;
1286 #endif
1287         if (!PL_in_eval && PL_minus_p) {
1288             sv_catpvs(linestr,
1289                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1290             PL_minus_n = PL_minus_p = 0;
1291         } else if (!PL_in_eval && PL_minus_n) {
1292             sv_catpvs(linestr, /*{*/";}");
1293             PL_minus_n = 0;
1294         } else
1295             sv_catpvs(linestr, ";");
1296         got_some = 1;
1297     }
1298     buf = SvPVX(linestr);
1299     new_bufend_pos = SvCUR(linestr);
1300     PL_parser->bufend = buf + new_bufend_pos;
1301     PL_parser->bufptr = buf + bufptr_pos;
1302     PL_parser->oldbufptr = buf + oldbufptr_pos;
1303     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1304     PL_parser->linestart = buf + linestart_pos;
1305     if (PL_parser->last_uni)
1306         PL_parser->last_uni = buf + last_uni_pos;
1307     if (PL_parser->last_lop)
1308         PL_parser->last_lop = buf + last_lop_pos;
1309     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1310             PL_curstash != PL_debstash) {
1311         /* debugger active and we're not compiling the debugger code,
1312          * so store the line into the debugger's array of lines
1313          */
1314         update_debugger_info(NULL, buf+old_bufend_pos,
1315             new_bufend_pos-old_bufend_pos);
1316     }
1317     return got_some;
1318 }
1319
1320 /*
1321 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1322
1323 Looks ahead one (Unicode) character in the text currently being lexed.
1324 Returns the codepoint (unsigned integer value) of the next character,
1325 or -1 if lexing has reached the end of the input text.  To consume the
1326 peeked character, use L</lex_read_unichar>.
1327
1328 If the next character is in (or extends into) the next chunk of input
1329 text, the next chunk will be read in.  Normally the current chunk will be
1330 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1331 then the current chunk will not be discarded.
1332
1333 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1334 is encountered, an exception is generated.
1335
1336 =cut
1337 */
1338
1339 I32
1340 Perl_lex_peek_unichar(pTHX_ U32 flags)
1341 {
1342     dVAR;
1343     char *s, *bufend;
1344     if (flags & ~(LEX_KEEP_PREVIOUS))
1345         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1346     s = PL_parser->bufptr;
1347     bufend = PL_parser->bufend;
1348     if (UTF) {
1349         U8 head;
1350         I32 unichar;
1351         STRLEN len, retlen;
1352         if (s == bufend) {
1353             if (!lex_next_chunk(flags))
1354                 return -1;
1355             s = PL_parser->bufptr;
1356             bufend = PL_parser->bufend;
1357         }
1358         head = (U8)*s;
1359         if (!(head & 0x80))
1360             return head;
1361         if (head & 0x40) {
1362             len = PL_utf8skip[head];
1363             while ((STRLEN)(bufend-s) < len) {
1364                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1365                     break;
1366                 s = PL_parser->bufptr;
1367                 bufend = PL_parser->bufend;
1368             }
1369         }
1370         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1371         if (retlen == (STRLEN)-1) {
1372             /* malformed UTF-8 */
1373             ENTER;
1374             SAVESPTR(PL_warnhook);
1375             PL_warnhook = PERL_WARNHOOK_FATAL;
1376             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1377             LEAVE;
1378         }
1379         return unichar;
1380     } else {
1381         if (s == bufend) {
1382             if (!lex_next_chunk(flags))
1383                 return -1;
1384             s = PL_parser->bufptr;
1385         }
1386         return (U8)*s;
1387     }
1388 }
1389
1390 /*
1391 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1392
1393 Reads the next (Unicode) character in the text currently being lexed.
1394 Returns the codepoint (unsigned integer value) of the character read,
1395 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1396 if lexing has reached the end of the input text.  To non-destructively
1397 examine the next character, use L</lex_peek_unichar> instead.
1398
1399 If the next character is in (or extends into) the next chunk of input
1400 text, the next chunk will be read in.  Normally the current chunk will be
1401 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1402 then the current chunk will not be discarded.
1403
1404 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1405 is encountered, an exception is generated.
1406
1407 =cut
1408 */
1409
1410 I32
1411 Perl_lex_read_unichar(pTHX_ U32 flags)
1412 {
1413     I32 c;
1414     if (flags & ~(LEX_KEEP_PREVIOUS))
1415         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1416     c = lex_peek_unichar(flags);
1417     if (c != -1) {
1418         if (c == '\n')
1419             CopLINE_inc(PL_curcop);
1420         if (UTF)
1421             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1422         else
1423             ++(PL_parser->bufptr);
1424     }
1425     return c;
1426 }
1427
1428 /*
1429 =for apidoc Amx|void|lex_read_space|U32 flags
1430
1431 Reads optional spaces, in Perl style, in the text currently being
1432 lexed.  The spaces may include ordinary whitespace characters and
1433 Perl-style comments.  C<#line> directives are processed if encountered.
1434 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1435 at a non-space character (or the end of the input text).
1436
1437 If spaces extend into the next chunk of input text, the next chunk will
1438 be read in.  Normally the current chunk will be discarded at the same
1439 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1440 chunk will not be discarded.
1441
1442 =cut
1443 */
1444
1445 #define LEX_NO_NEXT_CHUNK 0x80000000
1446
1447 void
1448 Perl_lex_read_space(pTHX_ U32 flags)
1449 {
1450     char *s, *bufend;
1451     bool need_incline = 0;
1452     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1453         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1454 #ifdef PERL_MAD
1455     if (PL_skipwhite) {
1456         sv_free(PL_skipwhite);
1457         PL_skipwhite = NULL;
1458     }
1459     if (PL_madskills)
1460         PL_skipwhite = newSVpvs("");
1461 #endif /* PERL_MAD */
1462     s = PL_parser->bufptr;
1463     bufend = PL_parser->bufend;
1464     while (1) {
1465         char c = *s;
1466         if (c == '#') {
1467             do {
1468                 c = *++s;
1469             } while (!(c == '\n' || (c == 0 && s == bufend)));
1470         } else if (c == '\n') {
1471             s++;
1472             PL_parser->linestart = s;
1473             if (s == bufend)
1474                 need_incline = 1;
1475             else
1476                 incline(s);
1477         } else if (isSPACE(c)) {
1478             s++;
1479         } else if (c == 0 && s == bufend) {
1480             bool got_more;
1481 #ifdef PERL_MAD
1482             if (PL_madskills)
1483                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1484 #endif /* PERL_MAD */
1485             if (flags & LEX_NO_NEXT_CHUNK)
1486                 break;
1487             PL_parser->bufptr = s;
1488             CopLINE_inc(PL_curcop);
1489             got_more = lex_next_chunk(flags);
1490             CopLINE_dec(PL_curcop);
1491             s = PL_parser->bufptr;
1492             bufend = PL_parser->bufend;
1493             if (!got_more)
1494                 break;
1495             if (need_incline && PL_parser->rsfp) {
1496                 incline(s);
1497                 need_incline = 0;
1498             }
1499         } else {
1500             break;
1501         }
1502     }
1503 #ifdef PERL_MAD
1504     if (PL_madskills)
1505         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1506 #endif /* PERL_MAD */
1507     PL_parser->bufptr = s;
1508 }
1509
1510 /*
1511  * S_incline
1512  * This subroutine has nothing to do with tilting, whether at windmills
1513  * or pinball tables.  Its name is short for "increment line".  It
1514  * increments the current line number in CopLINE(PL_curcop) and checks
1515  * to see whether the line starts with a comment of the form
1516  *    # line 500 "foo.pm"
1517  * If so, it sets the current line number and file to the values in the comment.
1518  */
1519
1520 STATIC void
1521 S_incline(pTHX_ const char *s)
1522 {
1523     dVAR;
1524     const char *t;
1525     const char *n;
1526     const char *e;
1527     line_t line_num;
1528
1529     PERL_ARGS_ASSERT_INCLINE;
1530
1531     CopLINE_inc(PL_curcop);
1532     if (*s++ != '#')
1533         return;
1534     while (SPACE_OR_TAB(*s))
1535         s++;
1536     if (strnEQ(s, "line", 4))
1537         s += 4;
1538     else
1539         return;
1540     if (SPACE_OR_TAB(*s))
1541         s++;
1542     else
1543         return;
1544     while (SPACE_OR_TAB(*s))
1545         s++;
1546     if (!isDIGIT(*s))
1547         return;
1548
1549     n = s;
1550     while (isDIGIT(*s))
1551         s++;
1552     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1553         return;
1554     while (SPACE_OR_TAB(*s))
1555         s++;
1556     if (*s == '"' && (t = strchr(s+1, '"'))) {
1557         s++;
1558         e = t + 1;
1559     }
1560     else {
1561         t = s;
1562         while (!isSPACE(*t))
1563             t++;
1564         e = t;
1565     }
1566     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1567         e++;
1568     if (*e != '\n' && *e != '\0')
1569         return;         /* false alarm */
1570
1571     line_num = atoi(n)-1;
1572
1573     if (t - s > 0) {
1574         const STRLEN len = t - s;
1575         SV *const temp_sv = CopFILESV(PL_curcop);
1576         const char *cf;
1577         STRLEN tmplen;
1578
1579         if (temp_sv) {
1580             cf = SvPVX(temp_sv);
1581             tmplen = SvCUR(temp_sv);
1582         } else {
1583             cf = NULL;
1584             tmplen = 0;
1585         }
1586
1587         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1588             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1589              * to *{"::_<newfilename"} */
1590             /* However, the long form of evals is only turned on by the
1591                debugger - usually they're "(eval %lu)" */
1592             char smallbuf[128];
1593             char *tmpbuf;
1594             GV **gvp;
1595             STRLEN tmplen2 = len;
1596             if (tmplen + 2 <= sizeof smallbuf)
1597                 tmpbuf = smallbuf;
1598             else
1599                 Newx(tmpbuf, tmplen + 2, char);
1600             tmpbuf[0] = '_';
1601             tmpbuf[1] = '<';
1602             memcpy(tmpbuf + 2, cf, tmplen);
1603             tmplen += 2;
1604             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1605             if (gvp) {
1606                 char *tmpbuf2;
1607                 GV *gv2;
1608
1609                 if (tmplen2 + 2 <= sizeof smallbuf)
1610                     tmpbuf2 = smallbuf;
1611                 else
1612                     Newx(tmpbuf2, tmplen2 + 2, char);
1613
1614                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1615                     /* Either they malloc'd it, or we malloc'd it,
1616                        so no prefix is present in ours.  */
1617                     tmpbuf2[0] = '_';
1618                     tmpbuf2[1] = '<';
1619                 }
1620
1621                 memcpy(tmpbuf2 + 2, s, tmplen2);
1622                 tmplen2 += 2;
1623
1624                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1625                 if (!isGV(gv2)) {
1626                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1627                     /* adjust ${"::_<newfilename"} to store the new file name */
1628                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1629                     /* The line number may differ. If that is the case,
1630                        alias the saved lines that are in the array.
1631                        Otherwise alias the whole array. */
1632                     if (CopLINE(PL_curcop) == line_num) {
1633                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1634                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1635                     }
1636                     else if (GvAV(*gvp)) {
1637                         AV * const av = GvAV(*gvp);
1638                         const I32 start = CopLINE(PL_curcop)+1;
1639                         I32 items = AvFILLp(av) - start;
1640                         if (items > 0) {
1641                             AV * const av2 = GvAVn(gv2);
1642                             SV **svp = AvARRAY(av) + start;
1643                             I32 l = (I32)line_num+1;
1644                             while (items--)
1645                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1646                         }
1647                     }
1648                 }
1649
1650                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1651             }
1652             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1653         }
1654         CopFILE_free(PL_curcop);
1655         CopFILE_setn(PL_curcop, s, len);
1656     }
1657     CopLINE_set(PL_curcop, line_num);
1658 }
1659
1660 #ifdef PERL_MAD
1661 /* skip space before PL_thistoken */
1662
1663 STATIC char *
1664 S_skipspace0(pTHX_ register char *s)
1665 {
1666     PERL_ARGS_ASSERT_SKIPSPACE0;
1667
1668     s = skipspace(s);
1669     if (!PL_madskills)
1670         return s;
1671     if (PL_skipwhite) {
1672         if (!PL_thiswhite)
1673             PL_thiswhite = newSVpvs("");
1674         sv_catsv(PL_thiswhite, PL_skipwhite);
1675         sv_free(PL_skipwhite);
1676         PL_skipwhite = 0;
1677     }
1678     PL_realtokenstart = s - SvPVX(PL_linestr);
1679     return s;
1680 }
1681
1682 /* skip space after PL_thistoken */
1683
1684 STATIC char *
1685 S_skipspace1(pTHX_ register char *s)
1686 {
1687     const char *start = s;
1688     I32 startoff = start - SvPVX(PL_linestr);
1689
1690     PERL_ARGS_ASSERT_SKIPSPACE1;
1691
1692     s = skipspace(s);
1693     if (!PL_madskills)
1694         return s;
1695     start = SvPVX(PL_linestr) + startoff;
1696     if (!PL_thistoken && PL_realtokenstart >= 0) {
1697         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1698         PL_thistoken = newSVpvn(tstart, start - tstart);
1699     }
1700     PL_realtokenstart = -1;
1701     if (PL_skipwhite) {
1702         if (!PL_nextwhite)
1703             PL_nextwhite = newSVpvs("");
1704         sv_catsv(PL_nextwhite, PL_skipwhite);
1705         sv_free(PL_skipwhite);
1706         PL_skipwhite = 0;
1707     }
1708     return s;
1709 }
1710
1711 STATIC char *
1712 S_skipspace2(pTHX_ register char *s, SV **svp)
1713 {
1714     char *start;
1715     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1716     const I32 startoff = s - SvPVX(PL_linestr);
1717
1718     PERL_ARGS_ASSERT_SKIPSPACE2;
1719
1720     s = skipspace(s);
1721     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1722     if (!PL_madskills || !svp)
1723         return s;
1724     start = SvPVX(PL_linestr) + startoff;
1725     if (!PL_thistoken && PL_realtokenstart >= 0) {
1726         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1727         PL_thistoken = newSVpvn(tstart, start - tstart);
1728         PL_realtokenstart = -1;
1729     }
1730     if (PL_skipwhite) {
1731         if (!*svp)
1732             *svp = newSVpvs("");
1733         sv_setsv(*svp, PL_skipwhite);
1734         sv_free(PL_skipwhite);
1735         PL_skipwhite = 0;
1736     }
1737
1738     return s;
1739 }
1740 #endif
1741
1742 STATIC void
1743 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1744 {
1745     AV *av = CopFILEAVx(PL_curcop);
1746     if (av) {
1747         SV * const sv = newSV_type(SVt_PVMG);
1748         if (orig_sv)
1749             sv_setsv(sv, orig_sv);
1750         else
1751             sv_setpvn(sv, buf, len);
1752         (void)SvIOK_on(sv);
1753         SvIV_set(sv, 0);
1754         av_store(av, (I32)CopLINE(PL_curcop), sv);
1755     }
1756 }
1757
1758 /*
1759  * S_skipspace
1760  * Called to gobble the appropriate amount and type of whitespace.
1761  * Skips comments as well.
1762  */
1763
1764 STATIC char *
1765 S_skipspace(pTHX_ register char *s)
1766 {
1767 #ifdef PERL_MAD
1768     char *start = s;
1769 #endif /* PERL_MAD */
1770     PERL_ARGS_ASSERT_SKIPSPACE;
1771 #ifdef PERL_MAD
1772     if (PL_skipwhite) {
1773         sv_free(PL_skipwhite);
1774         PL_skipwhite = NULL;
1775     }
1776 #endif /* PERL_MAD */
1777     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1778         while (s < PL_bufend && SPACE_OR_TAB(*s))
1779             s++;
1780     } else {
1781         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1782         PL_bufptr = s;
1783         lex_read_space(LEX_KEEP_PREVIOUS |
1784                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1785                     LEX_NO_NEXT_CHUNK : 0));
1786         s = PL_bufptr;
1787         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1788         if (PL_linestart > PL_bufptr)
1789             PL_bufptr = PL_linestart;
1790         return s;
1791     }
1792 #ifdef PERL_MAD
1793     if (PL_madskills)
1794         PL_skipwhite = newSVpvn(start, s-start);
1795 #endif /* PERL_MAD */
1796     return s;
1797 }
1798
1799 /*
1800  * S_check_uni
1801  * Check the unary operators to ensure there's no ambiguity in how they're
1802  * used.  An ambiguous piece of code would be:
1803  *     rand + 5
1804  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1805  * the +5 is its argument.
1806  */
1807
1808 STATIC void
1809 S_check_uni(pTHX)
1810 {
1811     dVAR;
1812     const char *s;
1813     const char *t;
1814
1815     if (PL_oldoldbufptr != PL_last_uni)
1816         return;
1817     while (isSPACE(*PL_last_uni))
1818         PL_last_uni++;
1819     s = PL_last_uni;
1820     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1821         s++;
1822     if ((t = strchr(s, '(')) && t < PL_bufptr)
1823         return;
1824
1825     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1826                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1827                      (int)(s - PL_last_uni), PL_last_uni);
1828 }
1829
1830 /*
1831  * LOP : macro to build a list operator.  Its behaviour has been replaced
1832  * with a subroutine, S_lop() for which LOP is just another name.
1833  */
1834
1835 #define LOP(f,x) return lop(f,x,s)
1836
1837 /*
1838  * S_lop
1839  * Build a list operator (or something that might be one).  The rules:
1840  *  - if we have a next token, then it's a list operator [why?]
1841  *  - if the next thing is an opening paren, then it's a function
1842  *  - else it's a list operator
1843  */
1844
1845 STATIC I32
1846 S_lop(pTHX_ I32 f, int x, char *s)
1847 {
1848     dVAR;
1849
1850     PERL_ARGS_ASSERT_LOP;
1851
1852     pl_yylval.ival = f;
1853     CLINE;
1854     PL_expect = x;
1855     PL_bufptr = s;
1856     PL_last_lop = PL_oldbufptr;
1857     PL_last_lop_op = (OPCODE)f;
1858 #ifdef PERL_MAD
1859     if (PL_lasttoke)
1860         goto lstop;
1861 #else
1862     if (PL_nexttoke)
1863         goto lstop;
1864 #endif
1865     if (*s == '(')
1866         return REPORT(FUNC);
1867     s = PEEKSPACE(s);
1868     if (*s == '(')
1869         return REPORT(FUNC);
1870     else {
1871         lstop:
1872         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1873             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1874         return REPORT(LSTOP);
1875     }
1876 }
1877
1878 #ifdef PERL_MAD
1879  /*
1880  * S_start_force
1881  * Sets up for an eventual force_next().  start_force(0) basically does
1882  * an unshift, while start_force(-1) does a push.  yylex removes items
1883  * on the "pop" end.
1884  */
1885
1886 STATIC void
1887 S_start_force(pTHX_ int where)
1888 {
1889     int i;
1890
1891     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1892         where = PL_lasttoke;
1893     assert(PL_curforce < 0 || PL_curforce == where);
1894     if (PL_curforce != where) {
1895         for (i = PL_lasttoke; i > where; --i) {
1896             PL_nexttoke[i] = PL_nexttoke[i-1];
1897         }
1898         PL_lasttoke++;
1899     }
1900     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1901         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1902     PL_curforce = where;
1903     if (PL_nextwhite) {
1904         if (PL_madskills)
1905             curmad('^', newSVpvs(""));
1906         CURMAD('_', PL_nextwhite);
1907     }
1908 }
1909
1910 STATIC void
1911 S_curmad(pTHX_ char slot, SV *sv)
1912 {
1913     MADPROP **where;
1914
1915     if (!sv)
1916         return;
1917     if (PL_curforce < 0)
1918         where = &PL_thismad;
1919     else
1920         where = &PL_nexttoke[PL_curforce].next_mad;
1921
1922     if (PL_faketokens)
1923         sv_setpvs(sv, "");
1924     else {
1925         if (!IN_BYTES) {
1926             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1927                 SvUTF8_on(sv);
1928             else if (PL_encoding) {
1929                 sv_recode_to_utf8(sv, PL_encoding);
1930             }
1931         }
1932     }
1933
1934     /* keep a slot open for the head of the list? */
1935     if (slot != '_' && *where && (*where)->mad_key == '^') {
1936         (*where)->mad_key = slot;
1937         sv_free(MUTABLE_SV(((*where)->mad_val)));
1938         (*where)->mad_val = (void*)sv;
1939     }
1940     else
1941         addmad(newMADsv(slot, sv), where, 0);
1942 }
1943 #else
1944 #  define start_force(where)    NOOP
1945 #  define curmad(slot, sv)      NOOP
1946 #endif
1947
1948 /*
1949  * S_force_next
1950  * When the lexer realizes it knows the next token (for instance,
1951  * it is reordering tokens for the parser) then it can call S_force_next
1952  * to know what token to return the next time the lexer is called.  Caller
1953  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1954  * and possibly PL_expect to ensure the lexer handles the token correctly.
1955  */
1956
1957 STATIC void
1958 S_force_next(pTHX_ I32 type)
1959 {
1960     dVAR;
1961 #ifdef DEBUGGING
1962     if (DEBUG_T_TEST) {
1963         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1964         tokereport(type, &NEXTVAL_NEXTTOKE);
1965     }
1966 #endif
1967 #ifdef PERL_MAD
1968     if (PL_curforce < 0)
1969         start_force(PL_lasttoke);
1970     PL_nexttoke[PL_curforce].next_type = type;
1971     if (PL_lex_state != LEX_KNOWNEXT)
1972         PL_lex_defer = PL_lex_state;
1973     PL_lex_state = LEX_KNOWNEXT;
1974     PL_lex_expect = PL_expect;
1975     PL_curforce = -1;
1976 #else
1977     PL_nexttype[PL_nexttoke] = type;
1978     PL_nexttoke++;
1979     if (PL_lex_state != LEX_KNOWNEXT) {
1980         PL_lex_defer = PL_lex_state;
1981         PL_lex_expect = PL_expect;
1982         PL_lex_state = LEX_KNOWNEXT;
1983     }
1984 #endif
1985 }
1986
1987 void
1988 Perl_yyunlex(pTHX)
1989 {
1990     int yyc = PL_parser->yychar;
1991     if (yyc != YYEMPTY) {
1992         if (yyc) {
1993             start_force(-1);
1994             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1995             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1996                 PL_lex_allbrackets--;
1997                 PL_lex_brackets--;
1998                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1999             } else if (yyc == '('/*)*/) {
2000                 PL_lex_allbrackets--;
2001                 yyc |= (2<<24);
2002             }
2003             force_next(yyc);
2004         }
2005         PL_parser->yychar = YYEMPTY;
2006     }
2007 }
2008
2009 STATIC SV *
2010 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2011 {
2012     dVAR;
2013     SV * const sv = newSVpvn_utf8(start, len,
2014                                   !IN_BYTES
2015                                   && UTF
2016                                   && !is_ascii_string((const U8*)start, len)
2017                                   && is_utf8_string((const U8*)start, len));
2018     return sv;
2019 }
2020
2021 /*
2022  * S_force_word
2023  * When the lexer knows the next thing is a word (for instance, it has
2024  * just seen -> and it knows that the next char is a word char, then
2025  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2026  * lookahead.
2027  *
2028  * Arguments:
2029  *   char *start : buffer position (must be within PL_linestr)
2030  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2031  *   int check_keyword : if true, Perl checks to make sure the word isn't
2032  *       a keyword (do this if the word is a label, e.g. goto FOO)
2033  *   int allow_pack : if true, : characters will also be allowed (require,
2034  *       use, etc. do this)
2035  *   int allow_initial_tick : used by the "sub" lexer only.
2036  */
2037
2038 STATIC char *
2039 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2040 {
2041     dVAR;
2042     register char *s;
2043     STRLEN len;
2044
2045     PERL_ARGS_ASSERT_FORCE_WORD;
2046
2047     start = SKIPSPACE1(start);
2048     s = start;
2049     if (isIDFIRST_lazy_if(s,UTF) ||
2050         (allow_pack && *s == ':') ||
2051         (allow_initial_tick && *s == '\'') )
2052     {
2053         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2054         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2055             return start;
2056         start_force(PL_curforce);
2057         if (PL_madskills)
2058             curmad('X', newSVpvn(start,s-start));
2059         if (token == METHOD) {
2060             s = SKIPSPACE1(s);
2061             if (*s == '(')
2062                 PL_expect = XTERM;
2063             else {
2064                 PL_expect = XOPERATOR;
2065             }
2066         }
2067         if (PL_madskills)
2068             curmad('g', newSVpvs( "forced" ));
2069         NEXTVAL_NEXTTOKE.opval
2070             = (OP*)newSVOP(OP_CONST,0,
2071                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2072         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2073         force_next(token);
2074     }
2075     return s;
2076 }
2077
2078 /*
2079  * S_force_ident
2080  * Called when the lexer wants $foo *foo &foo etc, but the program
2081  * text only contains the "foo" portion.  The first argument is a pointer
2082  * to the "foo", and the second argument is the type symbol to prefix.
2083  * Forces the next token to be a "WORD".
2084  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2085  */
2086
2087 STATIC void
2088 S_force_ident(pTHX_ register const char *s, int kind)
2089 {
2090     dVAR;
2091
2092     PERL_ARGS_ASSERT_FORCE_IDENT;
2093
2094     if (*s) {
2095         const STRLEN len = strlen(s);
2096         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2097         start_force(PL_curforce);
2098         NEXTVAL_NEXTTOKE.opval = o;
2099         force_next(WORD);
2100         if (kind) {
2101             o->op_private = OPpCONST_ENTERED;
2102             /* XXX see note in pp_entereval() for why we forgo typo
2103                warnings if the symbol must be introduced in an eval.
2104                GSAR 96-10-12 */
2105             gv_fetchpvn_flags(s, len,
2106                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2107                               : GV_ADD,
2108                               kind == '$' ? SVt_PV :
2109                               kind == '@' ? SVt_PVAV :
2110                               kind == '%' ? SVt_PVHV :
2111                               SVt_PVGV
2112                               );
2113         }
2114     }
2115 }
2116
2117 NV
2118 Perl_str_to_version(pTHX_ SV *sv)
2119 {
2120     NV retval = 0.0;
2121     NV nshift = 1.0;
2122     STRLEN len;
2123     const char *start = SvPV_const(sv,len);
2124     const char * const end = start + len;
2125     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2126
2127     PERL_ARGS_ASSERT_STR_TO_VERSION;
2128
2129     while (start < end) {
2130         STRLEN skip;
2131         UV n;
2132         if (utf)
2133             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2134         else {
2135             n = *(U8*)start;
2136             skip = 1;
2137         }
2138         retval += ((NV)n)/nshift;
2139         start += skip;
2140         nshift *= 1000;
2141     }
2142     return retval;
2143 }
2144
2145 /*
2146  * S_force_version
2147  * Forces the next token to be a version number.
2148  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2149  * and if "guessing" is TRUE, then no new token is created (and the caller
2150  * must use an alternative parsing method).
2151  */
2152
2153 STATIC char *
2154 S_force_version(pTHX_ char *s, int guessing)
2155 {
2156     dVAR;
2157     OP *version = NULL;
2158     char *d;
2159 #ifdef PERL_MAD
2160     I32 startoff = s - SvPVX(PL_linestr);
2161 #endif
2162
2163     PERL_ARGS_ASSERT_FORCE_VERSION;
2164
2165     s = SKIPSPACE1(s);
2166
2167     d = s;
2168     if (*d == 'v')
2169         d++;
2170     if (isDIGIT(*d)) {
2171         while (isDIGIT(*d) || *d == '_' || *d == '.')
2172             d++;
2173 #ifdef PERL_MAD
2174         if (PL_madskills) {
2175             start_force(PL_curforce);
2176             curmad('X', newSVpvn(s,d-s));
2177         }
2178 #endif
2179         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2180             SV *ver;
2181 #ifdef USE_LOCALE_NUMERIC
2182             char *loc = setlocale(LC_NUMERIC, "C");
2183 #endif
2184             s = scan_num(s, &pl_yylval);
2185 #ifdef USE_LOCALE_NUMERIC
2186             setlocale(LC_NUMERIC, loc);
2187 #endif
2188             version = pl_yylval.opval;
2189             ver = cSVOPx(version)->op_sv;
2190             if (SvPOK(ver) && !SvNIOK(ver)) {
2191                 SvUPGRADE(ver, SVt_PVNV);
2192                 SvNV_set(ver, str_to_version(ver));
2193                 SvNOK_on(ver);          /* hint that it is a version */
2194             }
2195         }
2196         else if (guessing) {
2197 #ifdef PERL_MAD
2198             if (PL_madskills) {
2199                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2200                 PL_nextwhite = 0;
2201                 s = SvPVX(PL_linestr) + startoff;
2202             }
2203 #endif
2204             return s;
2205         }
2206     }
2207
2208 #ifdef PERL_MAD
2209     if (PL_madskills && !version) {
2210         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2211         PL_nextwhite = 0;
2212         s = SvPVX(PL_linestr) + startoff;
2213     }
2214 #endif
2215     /* NOTE: The parser sees the package name and the VERSION swapped */
2216     start_force(PL_curforce);
2217     NEXTVAL_NEXTTOKE.opval = version;
2218     force_next(WORD);
2219
2220     return s;
2221 }
2222
2223 /*
2224  * S_force_strict_version
2225  * Forces the next token to be a version number using strict syntax rules.
2226  */
2227
2228 STATIC char *
2229 S_force_strict_version(pTHX_ char *s)
2230 {
2231     dVAR;
2232     OP *version = NULL;
2233 #ifdef PERL_MAD
2234     I32 startoff = s - SvPVX(PL_linestr);
2235 #endif
2236     const char *errstr = NULL;
2237
2238     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2239
2240     while (isSPACE(*s)) /* leading whitespace */
2241         s++;
2242
2243     if (is_STRICT_VERSION(s,&errstr)) {
2244         SV *ver = newSV(0);
2245         s = (char *)scan_version(s, ver, 0);
2246         version = newSVOP(OP_CONST, 0, ver);
2247     }
2248     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2249             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2250     {
2251         PL_bufptr = s;
2252         if (errstr)
2253             yyerror(errstr); /* version required */
2254         return s;
2255     }
2256
2257 #ifdef PERL_MAD
2258     if (PL_madskills && !version) {
2259         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2260         PL_nextwhite = 0;
2261         s = SvPVX(PL_linestr) + startoff;
2262     }
2263 #endif
2264     /* NOTE: The parser sees the package name and the VERSION swapped */
2265     start_force(PL_curforce);
2266     NEXTVAL_NEXTTOKE.opval = version;
2267     force_next(WORD);
2268
2269     return s;
2270 }
2271
2272 /*
2273  * S_tokeq
2274  * Tokenize a quoted string passed in as an SV.  It finds the next
2275  * chunk, up to end of string or a backslash.  It may make a new
2276  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2277  * turns \\ into \.
2278  */
2279
2280 STATIC SV *
2281 S_tokeq(pTHX_ SV *sv)
2282 {
2283     dVAR;
2284     register char *s;
2285     register char *send;
2286     register char *d;
2287     STRLEN len = 0;
2288     SV *pv = sv;
2289
2290     PERL_ARGS_ASSERT_TOKEQ;
2291
2292     if (!SvLEN(sv))
2293         goto finish;
2294
2295     s = SvPV_force(sv, len);
2296     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2297         goto finish;
2298     send = s + len;
2299     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2300     while (s < send && !(*s == '\\' && s[1] == '\\'))
2301         s++;
2302     if (s == send)
2303         goto finish;
2304     d = s;
2305     if ( PL_hints & HINT_NEW_STRING ) {
2306         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2307     }
2308     while (s < send) {
2309         if (*s == '\\') {
2310             if (s + 1 < send && (s[1] == '\\'))
2311                 s++;            /* all that, just for this */
2312         }
2313         *d++ = *s++;
2314     }
2315     *d = '\0';
2316     SvCUR_set(sv, d - SvPVX_const(sv));
2317   finish:
2318     if ( PL_hints & HINT_NEW_STRING )
2319        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2320     return sv;
2321 }
2322
2323 /*
2324  * Now come three functions related to double-quote context,
2325  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2326  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2327  * interact with PL_lex_state, and create fake ( ... ) argument lists
2328  * to handle functions and concatenation.
2329  * They assume that whoever calls them will be setting up a fake
2330  * join call, because each subthing puts a ',' after it.  This lets
2331  *   "lower \luPpEr"
2332  * become
2333  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2334  *
2335  * (I'm not sure whether the spurious commas at the end of lcfirst's
2336  * arguments and join's arguments are created or not).
2337  */
2338
2339 /*
2340  * S_sublex_start
2341  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2342  *
2343  * Pattern matching will set PL_lex_op to the pattern-matching op to
2344  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2345  *
2346  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2347  *
2348  * Everything else becomes a FUNC.
2349  *
2350  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2351  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2352  * call to S_sublex_push().
2353  */
2354
2355 STATIC I32
2356 S_sublex_start(pTHX)
2357 {
2358     dVAR;
2359     register const I32 op_type = pl_yylval.ival;
2360
2361     if (op_type == OP_NULL) {
2362         pl_yylval.opval = PL_lex_op;
2363         PL_lex_op = NULL;
2364         return THING;
2365     }
2366     if (op_type == OP_CONST || op_type == OP_READLINE) {
2367         SV *sv = tokeq(PL_lex_stuff);
2368
2369         if (SvTYPE(sv) == SVt_PVIV) {
2370             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2371             STRLEN len;
2372             const char * const p = SvPV_const(sv, len);
2373             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2374             SvREFCNT_dec(sv);
2375             sv = nsv;
2376         }
2377         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2378         PL_lex_stuff = NULL;
2379         /* Allow <FH> // "foo" */
2380         if (op_type == OP_READLINE)
2381             PL_expect = XTERMORDORDOR;
2382         return THING;
2383     }
2384     else if (op_type == OP_BACKTICK && PL_lex_op) {
2385         /* readpipe() vas overriden */
2386         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2387         pl_yylval.opval = PL_lex_op;
2388         PL_lex_op = NULL;
2389         PL_lex_stuff = NULL;
2390         return THING;
2391     }
2392
2393     PL_sublex_info.super_state = PL_lex_state;
2394     PL_sublex_info.sub_inwhat = (U16)op_type;
2395     PL_sublex_info.sub_op = PL_lex_op;
2396     PL_lex_state = LEX_INTERPPUSH;
2397
2398     PL_expect = XTERM;
2399     if (PL_lex_op) {
2400         pl_yylval.opval = PL_lex_op;
2401         PL_lex_op = NULL;
2402         return PMFUNC;
2403     }
2404     else
2405         return FUNC;
2406 }
2407
2408 /*
2409  * S_sublex_push
2410  * Create a new scope to save the lexing state.  The scope will be
2411  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2412  * to the uc, lc, etc. found before.
2413  * Sets PL_lex_state to LEX_INTERPCONCAT.
2414  */
2415
2416 STATIC I32
2417 S_sublex_push(pTHX)
2418 {
2419     dVAR;
2420     ENTER;
2421
2422     PL_lex_state = PL_sublex_info.super_state;
2423     SAVEBOOL(PL_lex_dojoin);
2424     SAVEI32(PL_lex_brackets);
2425     SAVEI32(PL_lex_allbrackets);
2426     SAVEI8(PL_lex_fakeeof);
2427     SAVEI32(PL_lex_casemods);
2428     SAVEI32(PL_lex_starts);
2429     SAVEI8(PL_lex_state);
2430     SAVEVPTR(PL_lex_inpat);
2431     SAVEI16(PL_lex_inwhat);
2432     SAVECOPLINE(PL_curcop);
2433     SAVEPPTR(PL_bufptr);
2434     SAVEPPTR(PL_bufend);
2435     SAVEPPTR(PL_oldbufptr);
2436     SAVEPPTR(PL_oldoldbufptr);
2437     SAVEPPTR(PL_last_lop);
2438     SAVEPPTR(PL_last_uni);
2439     SAVEPPTR(PL_linestart);
2440     SAVESPTR(PL_linestr);
2441     SAVEGENERICPV(PL_lex_brackstack);
2442     SAVEGENERICPV(PL_lex_casestack);
2443
2444     PL_linestr = PL_lex_stuff;
2445     PL_lex_stuff = NULL;
2446
2447     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2448         = SvPVX(PL_linestr);
2449     PL_bufend += SvCUR(PL_linestr);
2450     PL_last_lop = PL_last_uni = NULL;
2451     SAVEFREESV(PL_linestr);
2452
2453     PL_lex_dojoin = FALSE;
2454     PL_lex_brackets = 0;
2455     PL_lex_allbrackets = 0;
2456     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2457     Newx(PL_lex_brackstack, 120, char);
2458     Newx(PL_lex_casestack, 12, char);
2459     PL_lex_casemods = 0;
2460     *PL_lex_casestack = '\0';
2461     PL_lex_starts = 0;
2462     PL_lex_state = LEX_INTERPCONCAT;
2463     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2464
2465     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2466     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2467     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2468         PL_lex_inpat = PL_sublex_info.sub_op;
2469     else
2470         PL_lex_inpat = NULL;
2471
2472     return '(';
2473 }
2474
2475 /*
2476  * S_sublex_done
2477  * Restores lexer state after a S_sublex_push.
2478  */
2479
2480 STATIC I32
2481 S_sublex_done(pTHX)
2482 {
2483     dVAR;
2484     if (!PL_lex_starts++) {
2485         SV * const sv = newSVpvs("");
2486         if (SvUTF8(PL_linestr))
2487             SvUTF8_on(sv);
2488         PL_expect = XOPERATOR;
2489         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2490         return THING;
2491     }
2492
2493     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2494         PL_lex_state = LEX_INTERPCASEMOD;
2495         return yylex();
2496     }
2497
2498     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2499     assert(PL_lex_inwhat != OP_TRANSR);
2500     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2501         PL_linestr = PL_lex_repl;
2502         PL_lex_inpat = 0;
2503         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2504         PL_bufend += SvCUR(PL_linestr);
2505         PL_last_lop = PL_last_uni = NULL;
2506         SAVEFREESV(PL_linestr);
2507         PL_lex_dojoin = FALSE;
2508         PL_lex_brackets = 0;
2509         PL_lex_allbrackets = 0;
2510         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2511         PL_lex_casemods = 0;
2512         *PL_lex_casestack = '\0';
2513         PL_lex_starts = 0;
2514         if (SvEVALED(PL_lex_repl)) {
2515             PL_lex_state = LEX_INTERPNORMAL;
2516             PL_lex_starts++;
2517             /*  we don't clear PL_lex_repl here, so that we can check later
2518                 whether this is an evalled subst; that means we rely on the
2519                 logic to ensure sublex_done() is called again only via the
2520                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2521         }
2522         else {
2523             PL_lex_state = LEX_INTERPCONCAT;
2524             PL_lex_repl = NULL;
2525         }
2526         return ',';
2527     }
2528     else {
2529 #ifdef PERL_MAD
2530         if (PL_madskills) {
2531             if (PL_thiswhite) {
2532                 if (!PL_endwhite)
2533                     PL_endwhite = newSVpvs("");
2534                 sv_catsv(PL_endwhite, PL_thiswhite);
2535                 PL_thiswhite = 0;
2536             }
2537             if (PL_thistoken)
2538                 sv_setpvs(PL_thistoken,"");
2539             else
2540                 PL_realtokenstart = -1;
2541         }
2542 #endif
2543         LEAVE;
2544         PL_bufend = SvPVX(PL_linestr);
2545         PL_bufend += SvCUR(PL_linestr);
2546         PL_expect = XOPERATOR;
2547         PL_sublex_info.sub_inwhat = 0;
2548         return ')';
2549     }
2550 }
2551
2552 /*
2553   scan_const
2554
2555   Extracts a pattern, double-quoted string, or transliteration.  This
2556   is terrifying code.
2557
2558   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2559   processing a pattern (PL_lex_inpat is true), a transliteration
2560   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2561
2562   Returns a pointer to the character scanned up to. If this is
2563   advanced from the start pointer supplied (i.e. if anything was
2564   successfully parsed), will leave an OP for the substring scanned
2565   in pl_yylval. Caller must intuit reason for not parsing further
2566   by looking at the next characters herself.
2567
2568   In patterns:
2569     backslashes:
2570       constants: \N{NAME} only
2571       case and quoting: \U \Q \E
2572     stops on @ and $, but not for $ as tail anchor
2573
2574   In transliterations:
2575     characters are VERY literal, except for - not at the start or end
2576     of the string, which indicates a range. If the range is in bytes,
2577     scan_const expands the range to the full set of intermediate
2578     characters. If the range is in utf8, the hyphen is replaced with
2579     a certain range mark which will be handled by pmtrans() in op.c.
2580
2581   In double-quoted strings:
2582     backslashes:
2583       double-quoted style: \r and \n
2584       constants: \x31, etc.
2585       deprecated backrefs: \1 (in substitution replacements)
2586       case and quoting: \U \Q \E
2587     stops on @ and $
2588
2589   scan_const does *not* construct ops to handle interpolated strings.
2590   It stops processing as soon as it finds an embedded $ or @ variable
2591   and leaves it to the caller to work out what's going on.
2592
2593   embedded arrays (whether in pattern or not) could be:
2594       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2595
2596   $ in double-quoted strings must be the symbol of an embedded scalar.
2597
2598   $ in pattern could be $foo or could be tail anchor.  Assumption:
2599   it's a tail anchor if $ is the last thing in the string, or if it's
2600   followed by one of "()| \r\n\t"
2601
2602   \1 (backreferences) are turned into $1
2603
2604   The structure of the code is
2605       while (there's a character to process) {
2606           handle transliteration ranges
2607           skip regexp comments /(?#comment)/ and codes /(?{code})/
2608           skip #-initiated comments in //x patterns
2609           check for embedded arrays
2610           check for embedded scalars
2611           if (backslash) {
2612               deprecate \1 in substitution replacements
2613               handle string-changing backslashes \l \U \Q \E, etc.
2614               switch (what was escaped) {
2615                   handle \- in a transliteration (becomes a literal -)
2616                   if a pattern and not \N{, go treat as regular character
2617                   handle \132 (octal characters)
2618                   handle \x15 and \x{1234} (hex characters)
2619                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2620                   handle \cV (control characters)
2621                   handle printf-style backslashes (\f, \r, \n, etc)
2622               } (end switch)
2623               continue
2624           } (end if backslash)
2625           handle regular character
2626     } (end while character to read)
2627
2628 */
2629
2630 STATIC char *
2631 S_scan_const(pTHX_ char *start)
2632 {
2633     dVAR;
2634     register char *send = PL_bufend;            /* end of the constant */
2635     SV *sv = newSV(send - start);               /* sv for the constant.  See
2636                                                    note below on sizing. */
2637     register char *s = start;                   /* start of the constant */
2638     register char *d = SvPVX(sv);               /* destination for copies */
2639     bool dorange = FALSE;                       /* are we in a translit range? */
2640     bool didrange = FALSE;                      /* did we just finish a range? */
2641     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2642     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2643                                                    to be UTF8?  But, this can
2644                                                    show as true when the source
2645                                                    isn't utf8, as for example
2646                                                    when it is entirely composed
2647                                                    of hex constants */
2648
2649     /* Note on sizing:  The scanned constant is placed into sv, which is
2650      * initialized by newSV() assuming one byte of output for every byte of
2651      * input.  This routine expects newSV() to allocate an extra byte for a
2652      * trailing NUL, which this routine will append if it gets to the end of
2653      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2654      * CAPITAL LETTER A}), or more output than input if the constant ends up
2655      * recoded to utf8, but each time a construct is found that might increase
2656      * the needed size, SvGROW() is called.  Its size parameter each time is
2657      * based on the best guess estimate at the time, namely the length used so
2658      * far, plus the length the current construct will occupy, plus room for
2659      * the trailing NUL, plus one byte for every input byte still unscanned */
2660
2661     UV uv;
2662 #ifdef EBCDIC
2663     UV literal_endpoint = 0;
2664     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2665 #endif
2666
2667     PERL_ARGS_ASSERT_SCAN_CONST;
2668
2669     assert(PL_lex_inwhat != OP_TRANSR);
2670     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2671         /* If we are doing a trans and we know we want UTF8 set expectation */
2672         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2673         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2674     }
2675
2676
2677     while (s < send || dorange) {
2678
2679         /* get transliterations out of the way (they're most literal) */
2680         if (PL_lex_inwhat == OP_TRANS) {
2681             /* expand a range A-Z to the full set of characters.  AIE! */
2682             if (dorange) {
2683                 I32 i;                          /* current expanded character */
2684                 I32 min;                        /* first character in range */
2685                 I32 max;                        /* last character in range */
2686
2687 #ifdef EBCDIC
2688                 UV uvmax = 0;
2689 #endif
2690
2691                 if (has_utf8
2692 #ifdef EBCDIC
2693                     && !native_range
2694 #endif
2695                     ) {
2696                     char * const c = (char*)utf8_hop((U8*)d, -1);
2697                     char *e = d++;
2698                     while (e-- > c)
2699                         *(e + 1) = *e;
2700                     *c = (char)UTF_TO_NATIVE(0xff);
2701                     /* mark the range as done, and continue */
2702                     dorange = FALSE;
2703                     didrange = TRUE;
2704                     continue;
2705                 }
2706
2707                 i = d - SvPVX_const(sv);                /* remember current offset */
2708 #ifdef EBCDIC
2709                 SvGROW(sv,
2710                        SvLEN(sv) + (has_utf8 ?
2711                                     (512 - UTF_CONTINUATION_MARK +
2712                                      UNISKIP(0x100))
2713                                     : 256));
2714                 /* How many two-byte within 0..255: 128 in UTF-8,
2715                  * 96 in UTF-8-mod. */
2716 #else
2717                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2718 #endif
2719                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2720 #ifdef EBCDIC
2721                 if (has_utf8) {
2722                     int j;
2723                     for (j = 0; j <= 1; j++) {
2724                         char * const c = (char*)utf8_hop((U8*)d, -1);
2725                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2726                         if (j)
2727                             min = (U8)uv;
2728                         else if (uv < 256)
2729                             max = (U8)uv;
2730                         else {
2731                             max = (U8)0xff; /* only to \xff */
2732                             uvmax = uv; /* \x{100} to uvmax */
2733                         }
2734                         d = c; /* eat endpoint chars */
2735                      }
2736                 }
2737                else {
2738 #endif
2739                    d -= 2;              /* eat the first char and the - */
2740                    min = (U8)*d;        /* first char in range */
2741                    max = (U8)d[1];      /* last char in range  */
2742 #ifdef EBCDIC
2743                }
2744 #endif
2745
2746                 if (min > max) {
2747                     Perl_croak(aTHX_
2748                                "Invalid range \"%c-%c\" in transliteration operator",
2749                                (char)min, (char)max);
2750                 }
2751
2752 #ifdef EBCDIC
2753                 if (literal_endpoint == 2 &&
2754                     ((isLOWER(min) && isLOWER(max)) ||
2755                      (isUPPER(min) && isUPPER(max)))) {
2756                     if (isLOWER(min)) {
2757                         for (i = min; i <= max; i++)
2758                             if (isLOWER(i))
2759                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2760                     } else {
2761                         for (i = min; i <= max; i++)
2762                             if (isUPPER(i))
2763                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2764                     }
2765                 }
2766                 else
2767 #endif
2768                     for (i = min; i <= max; i++)
2769 #ifdef EBCDIC
2770                         if (has_utf8) {
2771                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2772                             if (UNI_IS_INVARIANT(ch))
2773                                 *d++ = (U8)i;
2774                             else {
2775                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2776                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2777                             }
2778                         }
2779                         else
2780 #endif
2781                             *d++ = (char)i;
2782
2783 #ifdef EBCDIC
2784                 if (uvmax) {
2785                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2786                     if (uvmax > 0x101)
2787                         *d++ = (char)UTF_TO_NATIVE(0xff);
2788                     if (uvmax > 0x100)
2789                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2790                 }
2791 #endif
2792
2793                 /* mark the range as done, and continue */
2794                 dorange = FALSE;
2795                 didrange = TRUE;
2796 #ifdef EBCDIC
2797                 literal_endpoint = 0;
2798 #endif
2799                 continue;
2800             }
2801
2802             /* range begins (ignore - as first or last char) */
2803             else if (*s == '-' && s+1 < send  && s != start) {
2804                 if (didrange) {
2805                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2806                 }
2807                 if (has_utf8
2808 #ifdef EBCDIC
2809                     && !native_range
2810 #endif
2811                     ) {
2812                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2813                     s++;
2814                     continue;
2815                 }
2816                 dorange = TRUE;
2817                 s++;
2818             }
2819             else {
2820                 didrange = FALSE;
2821 #ifdef EBCDIC
2822                 literal_endpoint = 0;
2823                 native_range = TRUE;
2824 #endif
2825             }
2826         }
2827
2828         /* if we get here, we're not doing a transliteration */
2829
2830         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2831            except for the last char, which will be done separately. */
2832         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2833             if (s[2] == '#') {
2834                 while (s+1 < send && *s != ')')
2835                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2836             }
2837             else if (s[2] == '{' /* This should match regcomp.c */
2838                     || (s[2] == '?' && s[3] == '{'))
2839             {
2840                 I32 count = 1;
2841                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2842                 char c;
2843
2844                 while (count && (c = *regparse)) {
2845                     if (c == '\\' && regparse[1])
2846                         regparse++;
2847                     else if (c == '{')
2848                         count++;
2849                     else if (c == '}')
2850                         count--;
2851                     regparse++;
2852                 }
2853                 if (*regparse != ')')
2854                     regparse--;         /* Leave one char for continuation. */
2855                 while (s < regparse)
2856                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2857             }
2858         }
2859
2860         /* likewise skip #-initiated comments in //x patterns */
2861         else if (*s == '#' && PL_lex_inpat &&
2862           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2863             while (s+1 < send && *s != '\n')
2864                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2865         }
2866
2867         /* check for embedded arrays
2868            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2869            */
2870         else if (*s == '@' && s[1]) {
2871             if (isALNUM_lazy_if(s+1,UTF))
2872                 break;
2873             if (strchr(":'{$", s[1]))
2874                 break;
2875             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2876                 break; /* in regexp, neither @+ nor @- are interpolated */
2877         }
2878
2879         /* check for embedded scalars.  only stop if we're sure it's a
2880            variable.
2881         */
2882         else if (*s == '$') {
2883             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2884                 break;
2885             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2886                 if (s[1] == '\\') {
2887                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2888                                    "Possible unintended interpolation of $\\ in regex");
2889                 }
2890                 break;          /* in regexp, $ might be tail anchor */
2891             }
2892         }
2893
2894         /* End of else if chain - OP_TRANS rejoin rest */
2895
2896         /* backslashes */
2897         if (*s == '\\' && s+1 < send) {
2898             char* e;    /* Can be used for ending '}', etc. */
2899
2900             s++;
2901
2902             /* warn on \1 - \9 in substitution replacements, but note that \11
2903              * is an octal; and \19 is \1 followed by '9' */
2904             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2905                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2906             {
2907                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2908                 *--s = '$';
2909                 break;
2910             }
2911
2912             /* string-change backslash escapes */
2913             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2914                 --s;
2915                 break;
2916             }
2917             /* In a pattern, process \N, but skip any other backslash escapes.
2918              * This is because we don't want to translate an escape sequence
2919              * into a meta symbol and have the regex compiler use the meta
2920              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2921              * in spite of this, we do have to process \N here while the proper
2922              * charnames handler is in scope.  See bugs #56444 and #62056.
2923              * There is a complication because \N in a pattern may also stand
2924              * for 'match a non-nl', and not mean a charname, in which case its
2925              * processing should be deferred to the regex compiler.  To be a
2926              * charname it must be followed immediately by a '{', and not look
2927              * like \N followed by a curly quantifier, i.e., not something like
2928              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2929              * quantifier */
2930             else if (PL_lex_inpat
2931                     && (*s != 'N'
2932                         || s[1] != '{'
2933                         || regcurly(s + 1)))
2934             {
2935                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2936                 goto default_action;
2937             }
2938
2939             switch (*s) {
2940
2941             /* quoted - in transliterations */
2942             case '-':
2943                 if (PL_lex_inwhat == OP_TRANS) {
2944                     *d++ = *s++;
2945                     continue;
2946                 }
2947                 /* FALL THROUGH */
2948             default:
2949                 {
2950                     if ((isALPHA(*s) || isDIGIT(*s)))
2951                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2952                                        "Unrecognized escape \\%c passed through",
2953                                        *s);
2954                     /* default action is to copy the quoted character */
2955                     goto default_action;
2956                 }
2957
2958             /* eg. \132 indicates the octal constant 0132 */
2959             case '0': case '1': case '2': case '3':
2960             case '4': case '5': case '6': case '7':
2961                 {
2962                     I32 flags = 0;
2963                     STRLEN len = 3;
2964                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2965                     s += len;
2966                 }
2967                 goto NUM_ESCAPE_INSERT;
2968
2969             /* eg. \o{24} indicates the octal constant \024 */
2970             case 'o':
2971                 {
2972                     STRLEN len;
2973                     const char* error;
2974
2975                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2976                     s += len;
2977                     if (! valid) {
2978                         yyerror(error);
2979                         continue;
2980                     }
2981                     goto NUM_ESCAPE_INSERT;
2982                 }
2983
2984             /* eg. \x24 indicates the hex constant 0x24 */
2985             case 'x':
2986                 ++s;
2987                 if (*s == '{') {
2988                     char* const e = strchr(s, '}');
2989                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2990                       PERL_SCAN_DISALLOW_PREFIX;
2991                     STRLEN len;
2992
2993                     ++s;
2994                     if (!e) {
2995                         yyerror("Missing right brace on \\x{}");
2996                         continue;
2997                     }
2998                     len = e - s;
2999                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3000                     s = e + 1;
3001                 }
3002                 else {
3003                     {
3004                         STRLEN len = 2;
3005                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3006                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3007                         s += len;
3008                     }
3009                 }
3010
3011               NUM_ESCAPE_INSERT:
3012                 /* Insert oct or hex escaped character.  There will always be
3013                  * enough room in sv since such escapes will be longer than any
3014                  * UTF-8 sequence they can end up as, except if they force us
3015                  * to recode the rest of the string into utf8 */
3016
3017                 /* Here uv is the ordinal of the next character being added in
3018                  * unicode (converted from native). */
3019                 if (!UNI_IS_INVARIANT(uv)) {
3020                     if (!has_utf8 && uv > 255) {
3021                         /* Might need to recode whatever we have accumulated so
3022                          * far if it contains any chars variant in utf8 or
3023                          * utf-ebcdic. */
3024
3025                         SvCUR_set(sv, d - SvPVX_const(sv));
3026                         SvPOK_on(sv);
3027                         *d = '\0';
3028                         /* See Note on sizing above.  */
3029                         sv_utf8_upgrade_flags_grow(sv,
3030                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3031                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3032                         d = SvPVX(sv) + SvCUR(sv);
3033                         has_utf8 = TRUE;
3034                     }
3035
3036                     if (has_utf8) {
3037                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3038                         if (PL_lex_inwhat == OP_TRANS &&
3039                             PL_sublex_info.sub_op) {
3040                             PL_sublex_info.sub_op->op_private |=
3041                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3042                                              : OPpTRANS_TO_UTF);
3043                         }
3044 #ifdef EBCDIC
3045                         if (uv > 255 && !dorange)
3046                             native_range = FALSE;
3047 #endif
3048                     }
3049                     else {
3050                         *d++ = (char)uv;
3051                     }
3052                 }
3053                 else {
3054                     *d++ = (char) uv;
3055                 }
3056                 continue;
3057
3058             case 'N':
3059                 /* In a non-pattern \N must be a named character, like \N{LATIN
3060                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3061                  * mean to match a non-newline.  For non-patterns, named
3062                  * characters are converted to their string equivalents. In
3063                  * patterns, named characters are not converted to their
3064                  * ultimate forms for the same reasons that other escapes
3065                  * aren't.  Instead, they are converted to the \N{U+...} form
3066                  * to get the value from the charnames that is in effect right
3067                  * now, while preserving the fact that it was a named character
3068                  * so that the regex compiler knows this */
3069
3070                 /* This section of code doesn't generally use the
3071                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3072                  * a close examination of this macro and determined it is a
3073                  * no-op except on utfebcdic variant characters.  Every
3074                  * character generated by this that would normally need to be
3075                  * enclosed by this macro is invariant, so the macro is not
3076                  * needed, and would complicate use of copy().  XXX There are
3077                  * other parts of this file where the macro is used
3078                  * inconsistently, but are saved by it being a no-op */
3079
3080                 /* The structure of this section of code (besides checking for
3081                  * errors and upgrading to utf8) is:
3082                  *  Further disambiguate between the two meanings of \N, and if
3083                  *      not a charname, go process it elsewhere
3084                  *  If of form \N{U+...}, pass it through if a pattern;
3085                  *      otherwise convert to utf8
3086                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3087                  *  pattern; otherwise convert to utf8 */
3088
3089                 /* Here, s points to the 'N'; the test below is guaranteed to
3090                  * succeed if we are being called on a pattern as we already
3091                  * know from a test above that the next character is a '{'.
3092                  * On a non-pattern \N must mean 'named sequence, which
3093                  * requires braces */
3094                 s++;
3095                 if (*s != '{') {
3096                     yyerror("Missing braces on \\N{}");
3097                     continue;
3098                 }
3099                 s++;
3100
3101                 /* If there is no matching '}', it is an error. */
3102                 if (! (e = strchr(s, '}'))) {
3103                     if (! PL_lex_inpat) {
3104                         yyerror("Missing right brace on \\N{}");
3105                     } else {
3106                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3107                     }
3108                     continue;
3109                 }
3110
3111                 /* Here it looks like a named character */
3112
3113                 if (PL_lex_inpat) {
3114
3115                     /* XXX This block is temporary code.  \N{} implies that the
3116                      * pattern is to have Unicode semantics, and therefore
3117                      * currently has to be encoded in utf8.  By putting it in
3118                      * utf8 now, we save a whole pass in the regular expression
3119                      * compiler.  Once that code is changed so Unicode
3120                      * semantics doesn't necessarily have to be in utf8, this
3121                      * block should be removed.  However, the code that parses
3122                      * the output of this would have to be changed to not
3123                      * necessarily expect utf8 */
3124                     if (!has_utf8) {
3125                         SvCUR_set(sv, d - SvPVX_const(sv));
3126                         SvPOK_on(sv);
3127                         *d = '\0';
3128                         /* See Note on sizing above.  */
3129                         sv_utf8_upgrade_flags_grow(sv,
3130                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3131                                         /* 5 = '\N{' + cur char + NUL */
3132                                         (STRLEN)(send - s) + 5);
3133                         d = SvPVX(sv) + SvCUR(sv);
3134                         has_utf8 = TRUE;
3135                     }
3136                 }
3137
3138                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3139                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3140                                 | PERL_SCAN_DISALLOW_PREFIX;
3141                     STRLEN len;
3142
3143                     /* For \N{U+...}, the '...' is a unicode value even on
3144                      * EBCDIC machines */
3145                     s += 2;         /* Skip to next char after the 'U+' */
3146                     len = e - s;
3147                     uv = grok_hex(s, &len, &flags, NULL);
3148                     if (len == 0 || len != (STRLEN)(e - s)) {
3149                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3150                         s = e + 1;
3151                         continue;
3152                     }
3153
3154                     if (PL_lex_inpat) {
3155
3156                         /* On non-EBCDIC platforms, pass through to the regex
3157                          * compiler unchanged.  The reason we evaluated the
3158                          * number above is to make sure there wasn't a syntax
3159                          * error.  But on EBCDIC we convert to native so
3160                          * downstream code can continue to assume it's native
3161                          */
3162                         s -= 5;     /* Include the '\N{U+' */
3163 #ifdef EBCDIC
3164                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3165                                                                and the \0 */
3166                                     "\\N{U+%X}",
3167                                     (unsigned int) UNI_TO_NATIVE(uv));
3168 #else
3169                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3170                         d += e - s + 1;
3171 #endif
3172                     }
3173                     else {  /* Not a pattern: convert the hex to string */
3174
3175                          /* If destination is not in utf8, unconditionally
3176                           * recode it to be so.  This is because \N{} implies
3177                           * Unicode semantics, and scalars have to be in utf8
3178                           * to guarantee those semantics */
3179                         if (! has_utf8) {
3180                             SvCUR_set(sv, d - SvPVX_const(sv));
3181                             SvPOK_on(sv);
3182                             *d = '\0';
3183                             /* See Note on sizing above.  */
3184                             sv_utf8_upgrade_flags_grow(
3185                                         sv,
3186                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3187                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3188                             d = SvPVX(sv) + SvCUR(sv);
3189                             has_utf8 = TRUE;
3190                         }
3191
3192                         /* Add the string to the output */
3193                         if (UNI_IS_INVARIANT(uv)) {
3194                             *d++ = (char) uv;
3195                         }
3196                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3197                     }
3198                 }
3199                 else { /* Here is \N{NAME} but not \N{U+...}. */
3200
3201                     SV *res;            /* result from charnames */
3202                     const char *str;    /* the string in 'res' */
3203                     STRLEN len;         /* its length */
3204
3205                     /* Get the value for NAME */
3206                     res = newSVpvn(s, e - s);
3207                     res = new_constant( NULL, 0, "charnames",
3208                                         /* includes all of: \N{...} */
3209                                         res, NULL, s - 3, e - s + 4 );
3210
3211                     /* Most likely res will be in utf8 already since the
3212                      * standard charnames uses pack U, but a custom translator
3213                      * can leave it otherwise, so make sure.  XXX This can be
3214                      * revisited to not have charnames use utf8 for characters
3215                      * that don't need it when regexes don't have to be in utf8
3216                      * for Unicode semantics.  If doing so, remember EBCDIC */
3217                     sv_utf8_upgrade(res);
3218                     str = SvPV_const(res, len);
3219
3220                     /* Don't accept malformed input */
3221                     if (! is_utf8_string((U8 *) str, len)) {
3222                         yyerror("Malformed UTF-8 returned by \\N");
3223                     }
3224                     else if (PL_lex_inpat) {
3225
3226                         if (! len) { /* The name resolved to an empty string */
3227                             Copy("\\N{}", d, 4, char);
3228                             d += 4;
3229                         }
3230                         else {
3231                             /* In order to not lose information for the regex
3232                             * compiler, pass the result in the specially made
3233                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3234                             * the code points in hex of each character
3235                             * returned by charnames */
3236
3237                             const char *str_end = str + len;
3238                             STRLEN char_length;     /* cur char's byte length */
3239                             STRLEN output_length;   /* and the number of bytes
3240                                                        after this is translated
3241                                                        into hex digits */
3242                             const STRLEN off = d - SvPVX_const(sv);
3243
3244                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3245                              * max('U+', '.'); and 1 for NUL */
3246                             char hex_string[2 * UTF8_MAXBYTES + 5];
3247
3248                             /* Get the first character of the result. */
3249                             U32 uv = utf8n_to_uvuni((U8 *) str,
3250                                                     len,
3251                                                     &char_length,
3252                                                     UTF8_ALLOW_ANYUV);
3253
3254                             /* The call to is_utf8_string() above hopefully
3255                              * guarantees that there won't be an error.  But
3256                              * it's easy here to make sure.  The function just
3257                              * above warns and returns 0 if invalid utf8, but
3258                              * it can also return 0 if the input is validly a
3259                              * NUL. Disambiguate */
3260                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3261                                 uv = UNICODE_REPLACEMENT;
3262                             }
3263
3264                             /* Convert first code point to hex, including the
3265                              * boiler plate before it.  For all these, we
3266                              * convert to native format so that downstream code
3267                              * can continue to assume the input is native */
3268                             output_length =
3269                                 my_snprintf(hex_string, sizeof(hex_string),
3270                                             "\\N{U+%X",
3271                                             (unsigned int) UNI_TO_NATIVE(uv));
3272
3273                             /* Make sure there is enough space to hold it */
3274                             d = off + SvGROW(sv, off
3275                                                  + output_length
3276                                                  + (STRLEN)(send - e)
3277                                                  + 2);  /* '}' + NUL */
3278                             /* And output it */
3279                             Copy(hex_string, d, output_length, char);
3280                             d += output_length;
3281
3282                             /* For each subsequent character, append dot and
3283                              * its ordinal in hex */
3284                             while ((str += char_length) < str_end) {
3285                                 const STRLEN off = d - SvPVX_const(sv);
3286                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3287                                                         str_end - str,
3288                                                         &char_length,
3289                                                         UTF8_ALLOW_ANYUV);
3290                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3291                                     uv = UNICODE_REPLACEMENT;
3292                                 }
3293
3294                                 output_length =
3295                                     my_snprintf(hex_string, sizeof(hex_string),
3296                                             ".%X",
3297                                             (unsigned int) UNI_TO_NATIVE(uv));
3298
3299                                 d = off + SvGROW(sv, off
3300                                                      + output_length
3301                                                      + (STRLEN)(send - e)
3302                                                      + 2);      /* '}' +  NUL */
3303                                 Copy(hex_string, d, output_length, char);
3304                                 d += output_length;
3305                             }
3306
3307                             *d++ = '}'; /* Done.  Add the trailing brace */
3308                         }
3309                     }
3310                     else { /* Here, not in a pattern.  Convert the name to a
3311                             * string. */
3312
3313                          /* If destination is not in utf8, unconditionally
3314                           * recode it to be so.  This is because \N{} implies
3315                           * Unicode semantics, and scalars have to be in utf8
3316                           * to guarantee those semantics */
3317                         if (! has_utf8) {
3318                             SvCUR_set(sv, d - SvPVX_const(sv));
3319                             SvPOK_on(sv);
3320                             *d = '\0';
3321                             /* See Note on sizing above.  */
3322                             sv_utf8_upgrade_flags_grow(sv,
3323                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3324                                                 len + (STRLEN)(send - s) + 1);
3325                             d = SvPVX(sv) + SvCUR(sv);
3326                             has_utf8 = TRUE;
3327                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3328
3329                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3330                              * set correctly here). */
3331                             const STRLEN off = d - SvPVX_const(sv);
3332                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3333                         }
3334                         Copy(str, d, len, char);
3335                         d += len;
3336                     }
3337                     SvREFCNT_dec(res);
3338
3339                     /* Deprecate non-approved name syntax */
3340                     if (ckWARN_d(WARN_DEPRECATED)) {
3341                         bool problematic = FALSE;
3342                         char* i = s;
3343
3344                         /* For non-ut8 input, look to see that the first
3345                          * character is an alpha, then loop through the rest
3346                          * checking that each is a continuation */
3347                         if (! this_utf8) {
3348                             if (! isALPHAU(*i)) problematic = TRUE;
3349                             else for (i = s + 1; i < e; i++) {
3350                                 if (isCHARNAME_CONT(*i)) continue;
3351                                 problematic = TRUE;
3352                                 break;
3353                             }
3354                         }
3355                         else {
3356                             /* Similarly for utf8.  For invariants can check
3357                              * directly.  We accept anything above the latin1
3358                              * range because it is immaterial to Perl if it is
3359                              * correct or not, and is expensive to check.  But
3360                              * it is fairly easy in the latin1 range to convert
3361                              * the variants into a single character and check
3362                              * those */
3363                             if (UTF8_IS_INVARIANT(*i)) {
3364                                 if (! isALPHAU(*i)) problematic = TRUE;
3365                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3366                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3367                                                                             *(i+1)))))
3368                                 {
3369                                     problematic = TRUE;
3370                                 }
3371                             }
3372                             if (! problematic) for (i = s + UTF8SKIP(s);
3373                                                     i < e;
3374                                                     i+= UTF8SKIP(i))
3375                             {
3376                                 if (UTF8_IS_INVARIANT(*i)) {
3377                                     if (isCHARNAME_CONT(*i)) continue;
3378                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3379                                     continue;
3380                                 } else if (isCHARNAME_CONT(
3381                                             UNI_TO_NATIVE(
3382                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3383                                 {
3384                                     continue;
3385                                 }
3386                                 problematic = TRUE;
3387                                 break;
3388                             }
3389                         }
3390                         if (problematic) {
3391                             /* The e-i passed to the final %.*s makes sure that
3392                              * should the trailing NUL be missing that this
3393                              * print won't run off the end of the string */
3394                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3395                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3396                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3397                         }
3398                     }
3399                 } /* End \N{NAME} */
3400 #ifdef EBCDIC
3401                 if (!dorange)
3402                     native_range = FALSE; /* \N{} is defined to be Unicode */
3403 #endif
3404                 s = e + 1;  /* Point to just after the '}' */
3405                 continue;
3406
3407             /* \c is a control character */
3408             case 'c':
3409                 s++;
3410                 if (s < send) {
3411                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3412                 }
3413                 else {
3414                     yyerror("Missing control char name in \\c");
3415                 }
3416                 continue;
3417
3418             /* printf-style backslashes, formfeeds, newlines, etc */
3419             case 'b':
3420                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3421                 break;
3422             case 'n':
3423                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3424                 break;
3425             case 'r':
3426                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3427                 break;
3428             case 'f':
3429                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3430                 break;
3431             case 't':
3432                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3433                 break;
3434             case 'e':
3435                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3436                 break;
3437             case 'a':
3438                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3439                 break;
3440             } /* end switch */
3441
3442             s++;
3443             continue;
3444         } /* end if (backslash) */
3445 #ifdef EBCDIC
3446         else
3447             literal_endpoint++;
3448 #endif
3449
3450     default_action:
3451         /* If we started with encoded form, or already know we want it,
3452            then encode the next character */
3453         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3454             STRLEN len  = 1;
3455
3456
3457             /* One might think that it is wasted effort in the case of the
3458              * source being utf8 (this_utf8 == TRUE) to take the next character
3459              * in the source, convert it to an unsigned value, and then convert
3460              * it back again.  But the source has not been validated here.  The
3461              * routine that does the conversion checks for errors like
3462              * malformed utf8 */
3463
3464             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3465             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3466             if (!has_utf8) {
3467                 SvCUR_set(sv, d - SvPVX_const(sv));
3468                 SvPOK_on(sv);
3469                 *d = '\0';
3470                 /* See Note on sizing above.  */
3471                 sv_utf8_upgrade_flags_grow(sv,
3472                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3473                                         need + (STRLEN)(send - s) + 1);
3474                 d = SvPVX(sv) + SvCUR(sv);
3475                 has_utf8 = TRUE;
3476             } else if (need > len) {
3477                 /* encoded value larger than old, may need extra space (NOTE:
3478                  * SvCUR() is not set correctly here).   See Note on sizing
3479                  * above.  */
3480                 const STRLEN off = d - SvPVX_const(sv);
3481                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3482             }
3483             s += len;
3484
3485             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3486 #ifdef EBCDIC
3487             if (uv > 255 && !dorange)
3488                 native_range = FALSE;
3489 #endif
3490         }
3491         else {
3492             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3493         }
3494     } /* while loop to process each character */
3495
3496     /* terminate the string and set up the sv */
3497     *d = '\0';
3498     SvCUR_set(sv, d - SvPVX_const(sv));
3499     if (SvCUR(sv) >= SvLEN(sv))
3500         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3501
3502     SvPOK_on(sv);
3503     if (PL_encoding && !has_utf8) {
3504         sv_recode_to_utf8(sv, PL_encoding);
3505         if (SvUTF8(sv))
3506             has_utf8 = TRUE;
3507     }
3508     if (has_utf8) {
3509         SvUTF8_on(sv);
3510         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3511             PL_sublex_info.sub_op->op_private |=
3512                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3513         }
3514     }
3515
3516     /* shrink the sv if we allocated more than we used */
3517     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3518         SvPV_shrink_to_cur(sv);
3519     }
3520
3521     /* return the substring (via pl_yylval) only if we parsed anything */
3522     if (s > PL_bufptr) {
3523         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3524             const char *const key = PL_lex_inpat ? "qr" : "q";
3525             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3526             const char *type;
3527             STRLEN typelen;
3528
3529             if (PL_lex_inwhat == OP_TRANS) {
3530                 type = "tr";
3531                 typelen = 2;
3532             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3533                 type = "s";
3534                 typelen = 1;
3535             } else  {
3536                 type = "qq";
3537                 typelen = 2;
3538             }
3539
3540             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3541                                 type, typelen);
3542         }
3543         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3544     } else
3545         SvREFCNT_dec(sv);
3546     return s;
3547 }
3548
3549 /* S_intuit_more
3550  * Returns TRUE if there's more to the expression (e.g., a subscript),
3551  * FALSE otherwise.
3552  *
3553  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3554  *
3555  * ->[ and ->{ return TRUE
3556  * { and [ outside a pattern are always subscripts, so return TRUE
3557  * if we're outside a pattern and it's not { or [, then return FALSE
3558  * if we're in a pattern and the first char is a {
3559  *   {4,5} (any digits around the comma) returns FALSE
3560  * if we're in a pattern and the first char is a [
3561  *   [] returns FALSE
3562  *   [SOMETHING] has a funky algorithm to decide whether it's a
3563  *      character class or not.  It has to deal with things like
3564  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3565  * anything else returns TRUE
3566  */
3567
3568 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3569
3570 STATIC int
3571 S_intuit_more(pTHX_ register char *s)
3572 {
3573     dVAR;
3574
3575     PERL_ARGS_ASSERT_INTUIT_MORE;
3576
3577     if (PL_lex_brackets)
3578         return TRUE;
3579     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3580         return TRUE;
3581     if (*s != '{' && *s != '[')
3582         return FALSE;
3583     if (!PL_lex_inpat)
3584         return TRUE;
3585
3586     /* In a pattern, so maybe we have {n,m}. */
3587     if (*s == '{') {
3588         if (regcurly(s)) {
3589             return FALSE;
3590         }
3591         return TRUE;
3592     }
3593
3594     /* On the other hand, maybe we have a character class */
3595
3596     s++;
3597     if (*s == ']' || *s == '^')
3598         return FALSE;
3599     else {
3600         /* this is terrifying, and it works */
3601         int weight = 2;         /* let's weigh the evidence */
3602         char seen[256];
3603         unsigned char un_char = 255, last_un_char;
3604         const char * const send = strchr(s,']');
3605         char tmpbuf[sizeof PL_tokenbuf * 4];
3606
3607         if (!send)              /* has to be an expression */
3608             return TRUE;
3609
3610         Zero(seen,256,char);
3611         if (*s == '$')
3612             weight -= 3;
3613         else if (isDIGIT(*s)) {
3614             if (s[1] != ']') {
3615                 if (isDIGIT(s[1]) && s[2] == ']')
3616                     weight -= 10;
3617             }
3618             else
3619                 weight -= 100;
3620         }
3621         for (; s < send; s++) {
3622             last_un_char = un_char;
3623             un_char = (unsigned char)*s;
3624             switch (*s) {
3625             case '@':
3626             case '&':
3627             case '$':
3628                 weight -= seen[un_char] * 10;
3629                 if (isALNUM_lazy_if(s+1,UTF)) {
3630                     int len;
3631                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3632                     len = (int)strlen(tmpbuf);
3633                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3634                         weight -= 100;
3635                     else
3636                         weight -= 10;
3637                 }
3638                 else if (*s == '$' && s[1] &&
3639                   strchr("[#!%*<>()-=",s[1])) {
3640                     if (/*{*/ strchr("])} =",s[2]))
3641                         weight -= 10;
3642                     else
3643                         weight -= 1;
3644                 }
3645                 break;
3646             case '\\':
3647                 un_char = 254;
3648                 if (s[1]) {
3649                     if (strchr("wds]",s[1]))
3650                         weight += 100;
3651                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3652                         weight += 1;
3653                     else if (strchr("rnftbxcav",s[1]))
3654                         weight += 40;
3655                     else if (isDIGIT(s[1])) {
3656                         weight += 40;
3657                         while (s[1] && isDIGIT(s[1]))
3658                             s++;
3659                     }
3660                 }
3661                 else
3662                     weight += 100;
3663                 break;
3664             case '-':
3665                 if (s[1] == '\\')
3666                     weight += 50;
3667                 if (strchr("aA01! ",last_un_char))
3668                     weight += 30;
3669                 if (strchr("zZ79~",s[1]))
3670                     weight += 30;
3671                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3672                     weight -= 5;        /* cope with negative subscript */
3673                 break;
3674             default:
3675                 if (!isALNUM(last_un_char)
3676                     && !(last_un_char == '$' || last_un_char == '@'
3677                          || last_un_char == '&')
3678                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3679                     char *d = tmpbuf;
3680                     while (isALPHA(*s))
3681                         *d++ = *s++;
3682                     *d = '\0';
3683                     if (keyword(tmpbuf, d - tmpbuf, 0))
3684                         weight -= 150;
3685                 }
3686                 if (un_char == last_un_char + 1)
3687                     weight += 5;
3688                 weight -= seen[un_char];
3689                 break;
3690             }
3691             seen[un_char]++;
3692         }
3693         if (weight >= 0)        /* probably a character class */
3694             return FALSE;
3695     }
3696
3697     return TRUE;
3698 }
3699
3700 /*
3701  * S_intuit_method
3702  *
3703  * Does all the checking to disambiguate
3704  *   foo bar
3705  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3706  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3707  *
3708  * First argument is the stuff after the first token, e.g. "bar".
3709  *
3710  * Not a method if bar is a filehandle.
3711  * Not a method if foo is a subroutine prototyped to take a filehandle.
3712  * Not a method if it's really "Foo $bar"
3713  * Method if it's "foo $bar"
3714  * Not a method if it's really "print foo $bar"
3715  * Method if it's really "foo package::" (interpreted as package->foo)
3716  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3717  * Not a method if bar is a filehandle or package, but is quoted with
3718  *   =>
3719  */
3720
3721 STATIC int
3722 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3723 {
3724     dVAR;
3725     char *s = start + (*start == '$');
3726     char tmpbuf[sizeof PL_tokenbuf];
3727     STRLEN len;
3728     GV* indirgv;
3729 #ifdef PERL_MAD
3730     int soff;
3731 #endif
3732
3733     PERL_ARGS_ASSERT_INTUIT_METHOD;
3734
3735     if (gv) {
3736         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3737             return 0;
3738         if (cv) {
3739             if (SvPOK(cv)) {
3740                 const char *proto = SvPVX_const(cv);
3741                 if (proto) {
3742                     if (*proto == ';')
3743                         proto++;
3744                     if (*proto == '*')
3745                         return 0;
3746                 }
3747             }
3748         } else
3749             gv = NULL;
3750     }
3751     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3752     /* start is the beginning of the possible filehandle/object,
3753      * and s is the end of it
3754      * tmpbuf is a copy of it
3755      */
3756
3757     if (*start == '$') {
3758         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3759                 isUPPER(*PL_tokenbuf))
3760             return 0;
3761 #ifdef PERL_MAD
3762         len = start - SvPVX(PL_linestr);
3763 #endif
3764         s = PEEKSPACE(s);
3765 #ifdef PERL_MAD
3766         start = SvPVX(PL_linestr) + len;
3767 #endif
3768         PL_bufptr = start;
3769         PL_expect = XREF;
3770         return *s == '(' ? FUNCMETH : METHOD;
3771     }
3772     if (!keyword(tmpbuf, len, 0)) {
3773         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3774             len -= 2;
3775             tmpbuf[len] = '\0';
3776 #ifdef PERL_MAD
3777             soff = s - SvPVX(PL_linestr);
3778 #endif
3779             goto bare_package;
3780         }
3781         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3782         if (indirgv && GvCVu(indirgv))
3783             return 0;
3784         /* filehandle or package name makes it a method */
3785         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3786 #ifdef PERL_MAD
3787             soff = s - SvPVX(PL_linestr);
3788 #endif
3789             s = PEEKSPACE(s);
3790             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3791                 return 0;       /* no assumptions -- "=>" quotes bareword */
3792       bare_package:
3793             start_force(PL_curforce);
3794             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3795                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3796             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3797             if (PL_madskills)
3798                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3799             PL_expect = XTERM;
3800             force_next(WORD);
3801             PL_bufptr = s;
3802 #ifdef PERL_MAD
3803             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3804 #endif
3805             return *s == '(' ? FUNCMETH : METHOD;
3806         }
3807     }
3808     return 0;
3809 }
3810
3811 /* Encoded script support. filter_add() effectively inserts a
3812  * 'pre-processing' function into the current source input stream.
3813  * Note that the filter function only applies to the current source file
3814  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3815  *
3816  * The datasv parameter (which may be NULL) can be used to pass
3817  * private data to this instance of the filter. The filter function
3818  * can recover the SV using the FILTER_DATA macro and use it to
3819  * store private buffers and state information.
3820  *
3821  * The supplied datasv parameter is upgraded to a PVIO type
3822  * and the IoDIRP/IoANY field is used to store the function pointer,
3823  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3824  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3825  * private use must be set using malloc'd pointers.
3826  */
3827
3828 SV *
3829 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3830 {
3831     dVAR;
3832     if (!funcp)
3833         return NULL;
3834
3835     if (!PL_parser)
3836         return NULL;
3837
3838     if (!PL_rsfp_filters)
3839         PL_rsfp_filters = newAV();
3840     if (!datasv)
3841         datasv = newSV(0);
3842     SvUPGRADE(datasv, SVt_PVIO);
3843     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3844     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3845     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3846                           FPTR2DPTR(void *, IoANY(datasv)),
3847                           SvPV_nolen(datasv)));
3848     av_unshift(PL_rsfp_filters, 1);
3849     av_store(PL_rsfp_filters, 0, datasv) ;
3850     return(datasv);
3851 }
3852
3853
3854 /* Delete most recently added instance of this filter function. */
3855 void
3856 Perl_filter_del(pTHX_ filter_t funcp)
3857 {
3858     dVAR;
3859     SV *datasv;
3860
3861     PERL_ARGS_ASSERT_FILTER_DEL;
3862
3863 #ifdef DEBUGGING
3864     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3865                           FPTR2DPTR(void*, funcp)));
3866 #endif
3867     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3868         return;
3869     /* if filter is on top of stack (usual case) just pop it off */
3870     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3871     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3872         sv_free(av_pop(PL_rsfp_filters));
3873
3874         return;
3875     }
3876     /* we need to search for the correct entry and clear it     */
3877     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3878 }
3879
3880
3881 /* Invoke the idxth filter function for the current rsfp.        */
3882 /* maxlen 0 = read one text line */
3883 I32
3884 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3885 {
3886     dVAR;
3887     filter_t funcp;
3888     SV *datasv = NULL;
3889     /* This API is bad. It should have been using unsigned int for maxlen.
3890        Not sure if we want to change the API, but if not we should sanity
3891        check the value here.  */
3892     const unsigned int correct_length
3893         = maxlen < 0 ?
3894 #ifdef PERL_MICRO
3895         0x7FFFFFFF
3896 #else
3897         INT_MAX
3898 #endif
3899         : maxlen;
3900
3901     PERL_ARGS_ASSERT_FILTER_READ;
3902
3903     if (!PL_parser || !PL_rsfp_filters)
3904         return -1;
3905     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3906         /* Provide a default input filter to make life easy.    */
3907         /* Note that we append to the line. This is handy.      */
3908         DEBUG_P(PerlIO_printf(Perl_debug_log,
3909                               "filter_read %d: from rsfp\n", idx));
3910         if (correct_length) {
3911             /* Want a block */
3912             int len ;
3913             const int old_len = SvCUR(buf_sv);
3914
3915             /* ensure buf_sv is large enough */
3916             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3917             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3918                                    correct_length)) <= 0) {
3919                 if (PerlIO_error(PL_rsfp))
3920                     return -1;          /* error */
3921                 else
3922                     return 0 ;          /* end of file */
3923             }
3924             SvCUR_set(buf_sv, old_len + len) ;
3925             SvPVX(buf_sv)[old_len + len] = '\0';
3926         } else {
3927             /* Want a line */
3928             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3929                 if (PerlIO_error(PL_rsfp))
3930                     return -1;          /* error */
3931                 else
3932                     return 0 ;          /* end of file */
3933             }
3934         }
3935         return SvCUR(buf_sv);
3936     }
3937     /* Skip this filter slot if filter has been deleted */
3938     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3939         DEBUG_P(PerlIO_printf(Perl_debug_log,
3940                               "filter_read %d: skipped (filter deleted)\n",
3941                               idx));
3942         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3943     }
3944     /* Get function pointer hidden within datasv        */
3945     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3946     DEBUG_P(PerlIO_printf(Perl_debug_log,
3947                           "filter_read %d: via function %p (%s)\n",
3948                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3949     /* Call function. The function is expected to       */
3950     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3951     /* Return: <0:error, =0:eof, >0:not eof             */
3952     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3953 }
3954
3955 STATIC char *
3956 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3957 {
3958     dVAR;
3959
3960     PERL_ARGS_ASSERT_FILTER_GETS;
3961
3962 #ifdef PERL_CR_FILTER
3963     if (!PL_rsfp_filters) {
3964         filter_add(S_cr_textfilter,NULL);
3965     }
3966 #endif
3967     if (PL_rsfp_filters) {
3968         if (!append)
3969             SvCUR_set(sv, 0);   /* start with empty line        */
3970         if (FILTER_READ(0, sv, 0) > 0)
3971             return ( SvPVX(sv) ) ;
3972         else
3973             return NULL ;
3974     }
3975     else
3976         return (sv_gets(sv, PL_rsfp, append));
3977 }
3978
3979 STATIC HV *
3980 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3981 {
3982     dVAR;
3983     GV *gv;
3984
3985     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3986
3987     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3988         return PL_curstash;
3989
3990     if (len > 2 &&
3991         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3992         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3993     {
3994         return GvHV(gv);                        /* Foo:: */
3995     }
3996
3997     /* use constant CLASS => 'MyClass' */
3998     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3999     if (gv && GvCV(gv)) {
4000         SV * const sv = cv_const_sv(GvCV(gv));
4001         if (sv)
4002             pkgname = SvPV_const(sv, len);
4003     }
4004
4005     return gv_stashpvn(pkgname, len, 0);
4006 }
4007
4008 /*
4009  * S_readpipe_override
4010  * Check whether readpipe() is overridden, and generates the appropriate
4011  * optree, provided sublex_start() is called afterwards.
4012  */
4013 STATIC void
4014 S_readpipe_override(pTHX)
4015 {
4016     GV **gvp;
4017     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4018     pl_yylval.ival = OP_BACKTICK;
4019     if ((gv_readpipe
4020                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4021             ||
4022             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4023              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4024              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4025     {
4026         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4027             op_append_elem(OP_LIST,
4028                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4029                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4030     }
4031 }
4032
4033 #ifdef PERL_MAD
4034  /*
4035  * Perl_madlex
4036  * The intent of this yylex wrapper is to minimize the changes to the
4037  * tokener when we aren't interested in collecting madprops.  It remains
4038  * to be seen how successful this strategy will be...
4039  */
4040
4041 int
4042 Perl_madlex(pTHX)
4043 {
4044     int optype;
4045     char *s = PL_bufptr;
4046
4047     /* make sure PL_thiswhite is initialized */
4048     PL_thiswhite = 0;
4049     PL_thismad = 0;
4050
4051     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4052     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4053         return S_pending_ident(aTHX);
4054
4055     /* previous token ate up our whitespace? */
4056     if (!PL_lasttoke && PL_nextwhite) {
4057         PL_thiswhite = PL_nextwhite;
4058         PL_nextwhite = 0;
4059     }
4060
4061     /* isolate the token, and figure out where it is without whitespace */
4062     PL_realtokenstart = -1;
4063     PL_thistoken = 0;
4064     optype = yylex();
4065     s = PL_bufptr;
4066     assert(PL_curforce < 0);
4067
4068     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4069         if (!PL_thistoken) {
4070             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4071                 PL_thistoken = newSVpvs("");
4072             else {
4073                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4074                 PL_thistoken = newSVpvn(tstart, s - tstart);
4075             }
4076         }
4077         if (PL_thismad) /* install head */
4078             CURMAD('X', PL_thistoken);
4079     }
4080
4081     /* last whitespace of a sublex? */
4082     if (optype == ')' && PL_endwhite) {
4083         CURMAD('X', PL_endwhite);
4084     }
4085
4086     if (!PL_thismad) {
4087
4088         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4089         if (!PL_thiswhite && !PL_endwhite && !optype) {
4090             sv_free(PL_thistoken);
4091             PL_thistoken = 0;
4092             return 0;
4093         }
4094
4095         /* put off final whitespace till peg */
4096         if (optype == ';' && !PL_rsfp) {
4097             PL_nextwhite = PL_thiswhite;
4098             PL_thiswhite = 0;
4099         }
4100         else if (PL_thisopen) {
4101             CURMAD('q', PL_thisopen);
4102             if (PL_thistoken)
4103                 sv_free(PL_thistoken);
4104             PL_thistoken = 0;
4105         }
4106         else {
4107             /* Store actual token text as madprop X */
4108             CURMAD('X', PL_thistoken);
4109         }
4110
4111         if (PL_thiswhite) {
4112             /* add preceding whitespace as madprop _ */
4113             CURMAD('_', PL_thiswhite);
4114         }
4115
4116         if (PL_thisstuff) {
4117             /* add quoted material as madprop = */
4118             CURMAD('=', PL_thisstuff);
4119         }
4120
4121         if (PL_thisclose) {
4122             /* add terminating quote as madprop Q */
4123             CURMAD('Q', PL_thisclose);
4124         }
4125     }
4126
4127     /* special processing based on optype */
4128
4129     switch (optype) {
4130
4131     /* opval doesn't need a TOKEN since it can already store mp */
4132     case WORD:
4133     case METHOD:
4134     case FUNCMETH:
4135     case THING:
4136     case PMFUNC:
4137     case PRIVATEREF:
4138     case FUNC0SUB:
4139     case UNIOPSUB:
4140     case LSTOPSUB:
4141         if (pl_yylval.opval)
4142             append_madprops(PL_thismad, pl_yylval.opval, 0);
4143         PL_thismad = 0;
4144         return optype;
4145
4146     /* fake EOF */
4147     case 0:
4148         optype = PEG;
4149         if (PL_endwhite) {
4150             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4151             PL_endwhite = 0;
4152         }
4153         break;
4154
4155     case ']':
4156     case '}':
4157         if (PL_faketokens)
4158             break;
4159         /* remember any fake bracket that lexer is about to discard */
4160         if (PL_lex_brackets == 1 &&
4161             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4162         {
4163             s = PL_bufptr;
4164             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4165                 s++;
4166             if (*s == '}') {
4167                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4168                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4169                 PL_thiswhite = 0;
4170                 PL_bufptr = s - 1;
4171                 break;  /* don't bother looking for trailing comment */
4172             }
4173             else
4174                 s = PL_bufptr;
4175         }
4176         if (optype == ']')
4177             break;
4178         /* FALLTHROUGH */
4179
4180     /* attach a trailing comment to its statement instead of next token */
4181     case ';':
4182         if (PL_faketokens)
4183             break;
4184         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4185             s = PL_bufptr;
4186             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4187                 s++;
4188             if (*s == '\n' || *s == '#') {
4189                 while (s < PL_bufend && *s != '\n')
4190                     s++;
4191                 if (s < PL_bufend)
4192                     s++;
4193                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4194                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4195                 PL_thiswhite = 0;
4196                 PL_bufptr = s;
4197             }
4198         }
4199         break;
4200
4201     /* pval */
4202     case LABEL:
4203         break;
4204
4205     /* ival */
4206     default:
4207         break;
4208
4209     }
4210
4211     /* Create new token struct.  Note: opvals return early above. */
4212     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4213     PL_thismad = 0;
4214     return optype;
4215 }
4216 #endif
4217
4218 STATIC char *
4219 S_tokenize_use(pTHX_ int is_use, char *s) {
4220     dVAR;
4221
4222     PERL_ARGS_ASSERT_TOKENIZE_USE;
4223
4224     if (PL_expect != XSTATE)
4225         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4226                     is_use ? "use" : "no"));
4227     s = SKIPSPACE1(s);
4228     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4229         s = force_version(s, TRUE);
4230         if (*s == ';' || *s == '}'
4231                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4232             start_force(PL_curforce);
4233             NEXTVAL_NEXTTOKE.opval = NULL;
4234             force_next(WORD);
4235         }
4236         else if (*s == 'v') {
4237             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4238             s = force_version(s, FALSE);
4239         }
4240     }
4241     else {
4242         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4243         s = force_version(s, FALSE);
4244     }
4245     pl_yylval.ival = is_use;
4246     return s;
4247 }
4248 #ifdef DEBUGGING
4249     static const char* const exp_name[] =
4250         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4251           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4252         };
4253 #endif
4254
4255 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4256 STATIC bool
4257 S_word_takes_any_delimeter(char *p, STRLEN len)
4258 {
4259     return (len == 1 && strchr("msyq", p[0])) ||
4260            (len == 2 && (
4261             (p[0] == 't' && p[1] == 'r') ||
4262             (p[0] == 'q' && strchr("qwxr", p[1]))));
4263 }
4264
4265 /*
4266   yylex
4267
4268   Works out what to call the token just pulled out of the input
4269   stream.  The yacc parser takes care of taking the ops we return and
4270   stitching them into a tree.
4271
4272   Returns:
4273     PRIVATEREF
4274
4275   Structure:
4276       if read an identifier
4277           if we're in a my declaration
4278               croak if they tried to say my($foo::bar)
4279               build the ops for a my() declaration
4280           if it's an access to a my() variable
4281               are we in a sort block?
4282                   croak if my($a); $a <=> $b
4283               build ops for access to a my() variable
4284           if in a dq string, and they've said @foo and we can't find @foo
4285               croak
4286           build ops for a bareword
4287       if we already built the token before, use it.
4288 */
4289
4290
4291 #ifdef __SC__
4292 #pragma segment Perl_yylex
4293 #endif
4294 int
4295 Perl_yylex(pTHX)
4296 {
4297     dVAR;
4298     register char *s = PL_bufptr;
4299     register char *d;
4300     STRLEN len;
4301     bool bof = FALSE;
4302     U32 fake_eof = 0;
4303
4304     /* orig_keyword, gvp, and gv are initialized here because
4305      * jump to the label just_a_word_zero can bypass their
4306      * initialization later. */
4307     I32 orig_keyword = 0;
4308     GV *gv = NULL;
4309     GV **gvp = NULL;
4310
4311     DEBUG_T( {
4312         SV* tmp = newSVpvs("");
4313         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4314             (IV)CopLINE(PL_curcop),
4315             lex_state_names[PL_lex_state],
4316             exp_name[PL_expect],
4317             pv_display(tmp, s, strlen(s), 0, 60));
4318         SvREFCNT_dec(tmp);
4319     } );
4320     /* check if there's an identifier for us to look at */
4321     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4322         return REPORT(S_pending_ident(aTHX));
4323
4324     /* no identifier pending identification */
4325
4326     switch (PL_lex_state) {
4327 #ifdef COMMENTARY
4328     case LEX_NORMAL:            /* Some compilers will produce faster */
4329     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4330         break;
4331 #endif
4332
4333     /* when we've already built the next token, just pull it out of the queue */
4334     case LEX_KNOWNEXT:
4335 #ifdef PERL_MAD
4336         PL_lasttoke--;
4337         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4338         if (PL_madskills) {
4339             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4340             PL_nexttoke[PL_lasttoke].next_mad = 0;
4341             if (PL_thismad && PL_thismad->mad_key == '_') {
4342                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4343                 PL_thismad->mad_val = 0;
4344                 mad_free(PL_thismad);
4345                 PL_thismad = 0;
4346             }
4347         }
4348         if (!PL_lasttoke) {
4349             PL_lex_state = PL_lex_defer;
4350             PL_expect = PL_lex_expect;
4351             PL_lex_defer = LEX_NORMAL;
4352             if (!PL_nexttoke[PL_lasttoke].next_type)
4353                 return yylex();
4354         }
4355 #else
4356         PL_nexttoke--;
4357         pl_yylval = PL_nextval[PL_nexttoke];
4358         if (!PL_nexttoke) {
4359             PL_lex_state = PL_lex_defer;
4360             PL_expect = PL_lex_expect;
4361             PL_lex_defer = LEX_NORMAL;
4362         }
4363 #endif
4364         {
4365             I32 next_type;
4366 #ifdef PERL_MAD
4367             next_type = PL_nexttoke[PL_lasttoke].next_type;
4368 #else
4369             next_type = PL_nexttype[PL_nexttoke];
4370 #endif
4371             if (next_type & (7<<24)) {
4372                 if (next_type & (1<<24)) {
4373                     if (PL_lex_brackets > 100)
4374                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4375                     PL_lex_brackstack[PL_lex_brackets++] =
4376                         (char) ((next_type >> 16) & 0xff);
4377                 }
4378                 if (next_type & (2<<24))
4379                     PL_lex_allbrackets++;
4380                 if (next_type & (4<<24))
4381                     PL_lex_allbrackets--;
4382                 next_type &= 0xffff;
4383             }
4384 #ifdef PERL_MAD
4385             /* FIXME - can these be merged?  */
4386             return next_type;
4387 #else
4388             return REPORT(next_type);
4389 #endif
4390         }
4391
4392     /* interpolated case modifiers like \L \U, including \Q and \E.
4393        when we get here, PL_bufptr is at the \
4394     */
4395     case LEX_INTERPCASEMOD:
4396 #ifdef DEBUGGING
4397         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4398             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4399 #endif
4400         /* handle \E or end of string */
4401         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4402             /* if at a \E */
4403             if (PL_lex_casemods) {
4404                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4405                 PL_lex_casestack[PL_lex_casemods] = '\0';
4406
4407                 if (PL_bufptr != PL_bufend
4408                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4409                     PL_bufptr += 2;
4410                     PL_lex_state = LEX_INTERPCONCAT;
4411 #ifdef PERL_MAD
4412                     if (PL_madskills)
4413                         PL_thistoken = newSVpvs("\\E");
4414 #endif
4415                 }
4416                 PL_lex_allbrackets--;
4417                 return REPORT(')');
4418             }
4419 #ifdef PERL_MAD
4420             while (PL_bufptr != PL_bufend &&
4421               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4422                 if (!PL_thiswhite)
4423                     PL_thiswhite = newSVpvs("");
4424                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4425                 PL_bufptr += 2;
4426             }
4427 #else
4428             if (PL_bufptr != PL_bufend)
4429                 PL_bufptr += 2;
4430 #endif
4431             PL_lex_state = LEX_INTERPCONCAT;
4432             return yylex();
4433         }
4434         else {
4435             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4436               "### Saw case modifier\n"); });
4437             s = PL_bufptr + 1;
4438             if (s[1] == '\\' && s[2] == 'E') {
4439 #ifdef PERL_MAD
4440                 if (!PL_thiswhite)
4441                     PL_thiswhite = newSVpvs("");
4442                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4443 #endif
4444                 PL_bufptr = s + 3;
4445                 PL_lex_state = LEX_INTERPCONCAT;
4446                 return yylex();
4447             }
4448             else {
4449                 I32 tmp;
4450                 if (!PL_madskills) /* when just compiling don't need correct */
4451                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4452                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4453                 if ((*s == 'L' || *s == 'U') &&
4454                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4455                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4456                     PL_lex_allbrackets--;
4457                     return REPORT(')');
4458                 }
4459                 if (PL_lex_casemods > 10)
4460                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4461                 PL_lex_casestack[PL_lex_casemods++] = *s;
4462                 PL_lex_casestack[PL_lex_casemods] = '\0';
4463                 PL_lex_state = LEX_INTERPCONCAT;
4464                 start_force(PL_curforce);
4465                 NEXTVAL_NEXTTOKE.ival = 0;
4466                 force_next((2<<24)|'(');
4467                 start_force(PL_curforce);
4468                 if (*s == 'l')
4469                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4470                 else if (*s == 'u')
4471                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4472                 else if (*s == 'L')
4473                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4474                 else if (*s == 'U')
4475                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4476                 else if (*s == 'Q')
4477                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4478                 else
4479                     Perl_croak(aTHX_ "panic: yylex");
4480                 if (PL_madskills) {
4481                     SV* const tmpsv = newSVpvs("\\ ");
4482                     /* replace the space with the character we want to escape
4483                      */
4484                     SvPVX(tmpsv)[1] = *s;
4485                     curmad('_', tmpsv);
4486                 }
4487                 PL_bufptr = s + 1;
4488             }
4489             force_next(FUNC);
4490             if (PL_lex_starts) {
4491                 s = PL_bufptr;
4492                 PL_lex_starts = 0;
4493 #ifdef PERL_MAD
4494                 if (PL_madskills) {
4495                     if (PL_thistoken)
4496                         sv_free(PL_thistoken);
4497                     PL_thistoken = newSVpvs("");
4498                 }
4499 #endif
4500                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4501                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4502                     OPERATOR(',');
4503                 else
4504                     Aop(OP_CONCAT);
4505             }
4506             else
4507                 return yylex();
4508         }
4509
4510     case LEX_INTERPPUSH:
4511         return REPORT(sublex_push());
4512
4513     case LEX_INTERPSTART:
4514         if (PL_bufptr == PL_bufend)
4515             return REPORT(sublex_done());
4516         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4517               "### Interpolated variable\n"); });
4518         PL_expect = XTERM;
4519         PL_lex_dojoin = (*PL_bufptr == '@');
4520         PL_lex_state = LEX_INTERPNORMAL;
4521         if (PL_lex_dojoin) {
4522             start_force(PL_curforce);
4523             NEXTVAL_NEXTTOKE.ival = 0;
4524             force_next(',');
4525             start_force(PL_curforce);
4526             force_ident("\"", '$');
4527             start_force(PL_curforce);
4528             NEXTVAL_NEXTTOKE.ival = 0;
4529             force_next('$');
4530             start_force(PL_curforce);
4531             NEXTVAL_NEXTTOKE.ival = 0;
4532             force_next((2<<24)|'(');
4533             start_force(PL_curforce);
4534             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4535             force_next(FUNC);
4536         }
4537         if (PL_lex_starts++) {
4538             s = PL_bufptr;
4539 #ifdef PERL_MAD
4540             if (PL_madskills) {
4541                 if (PL_thistoken)
4542                     sv_free(PL_thistoken);
4543                 PL_thistoken = newSVpvs("");
4544             }
4545 #endif
4546             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4547             if (!PL_lex_casemods && PL_lex_inpat)
4548                 OPERATOR(',');
4549             else
4550                 Aop(OP_CONCAT);
4551         }
4552         return yylex();
4553
4554     case LEX_INTERPENDMAYBE:
4555         if (intuit_more(PL_bufptr)) {
4556             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4557             break;
4558         }
4559         /* FALL THROUGH */
4560
4561     case LEX_INTERPEND:
4562         if (PL_lex_dojoin) {
4563             PL_lex_dojoin = FALSE;
4564             PL_lex_state = LEX_INTERPCONCAT;
4565 #ifdef PERL_MAD
4566             if (PL_madskills) {
4567                 if (PL_thistoken)
4568                     sv_free(PL_thistoken);
4569                 PL_thistoken = newSVpvs("");
4570             }
4571 #endif
4572             PL_lex_allbrackets--;
4573             return REPORT(')');
4574         }
4575         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4576             && SvEVALED(PL_lex_repl))
4577         {
4578             if (PL_bufptr != PL_bufend)
4579                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4580             PL_lex_repl = NULL;
4581         }
4582         /* FALLTHROUGH */
4583     case LEX_INTERPCONCAT:
4584 #ifdef DEBUGGING
4585         if (PL_lex_brackets)
4586             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4587 #endif
4588         if (PL_bufptr == PL_bufend)
4589             return REPORT(sublex_done());
4590
4591         if (SvIVX(PL_linestr) == '\'') {
4592             SV *sv = newSVsv(PL_linestr);
4593             if (!PL_lex_inpat)
4594                 sv = tokeq(sv);
4595             else if ( PL_hints & HINT_NEW_RE )
4596                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4597             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4598             s = PL_bufend;
4599         }
4600         else {
4601             s = scan_const(PL_bufptr);
4602             if (*s == '\\')
4603                 PL_lex_state = LEX_INTERPCASEMOD;
4604             else
4605                 PL_lex_state = LEX_INTERPSTART;
4606         }
4607
4608         if (s != PL_bufptr) {
4609             start_force(PL_curforce);
4610             if (PL_madskills) {
4611                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4612             }
4613             NEXTVAL_NEXTTOKE = pl_yylval;
4614             PL_expect = XTERM;
4615             force_next(THING);
4616             if (PL_lex_starts++) {
4617 #ifdef PERL_MAD
4618                 if (PL_madskills) {
4619                     if (PL_thistoken)
4620                         sv_free(PL_thistoken);
4621                     PL_thistoken = newSVpvs("");
4622                 }
4623 #endif
4624                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4625                 if (!PL_lex_casemods && PL_lex_inpat)
4626                     OPERATOR(',');
4627                 else
4628                     Aop(OP_CONCAT);
4629             }
4630             else {
4631                 PL_bufptr = s;
4632                 return yylex();
4633             }
4634         }
4635
4636         return yylex();
4637     case LEX_FORMLINE:
4638         PL_lex_state = LEX_NORMAL;
4639         s = scan_formline(PL_bufptr);
4640         if (!PL_lex_formbrack)
4641             goto rightbracket;
4642         OPERATOR(';');
4643     }
4644
4645     s = PL_bufptr;
4646     PL_oldoldbufptr = PL_oldbufptr;
4647     PL_oldbufptr = s;
4648
4649   retry:
4650 #ifdef PERL_MAD
4651     if (PL_thistoken) {
4652         sv_free(PL_thistoken);
4653         PL_thistoken = 0;
4654     }
4655     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4656 #endif
4657     switch (*s) {
4658     default:
4659         if (isIDFIRST_lazy_if(s,UTF))
4660             goto keylookup;
4661         {
4662         unsigned char c = *s;
4663         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4664         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4665             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4666         } else {
4667             d = PL_linestart;
4668         }
4669         *s = '\0';
4670         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4671     }
4672     case 4:
4673     case 26:
4674         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4675     case 0:
4676 #ifdef PERL_MAD
4677         if (PL_madskills)
4678             PL_faketokens = 0;
4679 #endif
4680         if (!PL_rsfp) {
4681             PL_last_uni = 0;
4682             PL_last_lop = 0;
4683             if (PL_lex_brackets &&
4684                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4685                 yyerror((const char *)
4686                         (PL_lex_formbrack
4687                          ? "Format not terminated"
4688                          : "Missing right curly or square bracket"));
4689             }
4690             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4691                         "### Tokener got EOF\n");
4692             } );
4693             TOKEN(0);
4694         }
4695         if (s++ < PL_bufend)
4696             goto retry;                 /* ignore stray nulls */
4697         PL_last_uni = 0;
4698         PL_last_lop = 0;
4699         if (!PL_in_eval && !PL_preambled) {
4700             PL_preambled = TRUE;
4701 #ifdef PERL_MAD
4702             if (PL_madskills)
4703                 PL_faketokens = 1;
4704 #endif
4705             if (PL_perldb) {
4706                 /* Generate a string of Perl code to load the debugger.
4707                  * If PERL5DB is set, it will return the contents of that,
4708                  * otherwise a compile-time require of perl5db.pl.  */
4709
4710                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4711
4712                 if (pdb) {
4713                     sv_setpv(PL_linestr, pdb);
4714                     sv_catpvs(PL_linestr,";");
4715                 } else {
4716                     SETERRNO(0,SS_NORMAL);
4717                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4718                 }
4719             } else
4720                 sv_setpvs(PL_linestr,"");
4721             if (PL_preambleav) {
4722                 SV **svp = AvARRAY(PL_preambleav);
4723                 SV **const end = svp + AvFILLp(PL_preambleav);
4724                 while(svp <= end) {
4725                     sv_catsv(PL_linestr, *svp);
4726                     ++svp;
4727                     sv_catpvs(PL_linestr, ";");
4728                 }
4729                 sv_free(MUTABLE_SV(PL_preambleav));
4730                 PL_preambleav = NULL;
4731             }
4732             if (PL_minus_E)
4733                 sv_catpvs(PL_linestr,
4734                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4735             if (PL_minus_n || PL_minus_p) {
4736                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4737                 if (PL_minus_l)
4738                     sv_catpvs(PL_linestr,"chomp;");
4739                 if (PL_minus_a) {
4740                     if (PL_minus_F) {
4741                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4742                              || *PL_splitstr == '"')
4743                               && strchr(PL_splitstr + 1, *PL_splitstr))
4744                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4745                         else {
4746                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4747                                bytes can be used as quoting characters.  :-) */
4748                             const char *splits = PL_splitstr;
4749                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4750                             do {
4751                                 /* Need to \ \s  */
4752                                 if (*splits == '\\')
4753                                     sv_catpvn(PL_linestr, splits, 1);
4754                                 sv_catpvn(PL_linestr, splits, 1);
4755                             } while (*splits++);
4756                             /* This loop will embed the trailing NUL of
4757                                PL_linestr as the last thing it does before
4758                                terminating.  */
4759                             sv_catpvs(PL_linestr, ");");
4760                         }
4761                     }
4762                     else
4763                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4764                 }
4765             }
4766             sv_catpvs(PL_linestr, "\n");
4767             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4768             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4769             PL_last_lop = PL_last_uni = NULL;
4770             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4771                 update_debugger_info(PL_linestr, NULL, 0);
4772             goto retry;
4773         }
4774         do {
4775             fake_eof = 0;
4776             bof = PL_rsfp ? TRUE : FALSE;
4777             if (0) {
4778               fake_eof:
4779                 fake_eof = LEX_FAKE_EOF;
4780             }
4781             PL_bufptr = PL_bufend;
4782             CopLINE_inc(PL_curcop);
4783             if (!lex_next_chunk(fake_eof)) {
4784                 CopLINE_dec(PL_curcop);
4785                 s = PL_bufptr;
4786                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4787             }
4788             CopLINE_dec(PL_curcop);
4789 #ifdef PERL_MAD
4790             if (!PL_rsfp)
4791                 PL_realtokenstart = -1;
4792 #endif
4793             s = PL_bufptr;
4794             /* If it looks like the start of a BOM or raw UTF-16,
4795              * check if it in fact is. */
4796             if (bof && PL_rsfp &&
4797                      (*s == 0 ||
4798                       *(U8*)s == 0xEF ||
4799                       *(U8*)s >= 0xFE ||
4800                       s[1] == 0)) {
4801                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4802                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4803 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4804                 /* offset may include swallowed CR */
4805                 if (!bof)
4806                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4807 #endif
4808                 if (bof) {
4809                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4810                     s = swallow_bom((U8*)s);
4811                 }
4812             }
4813             if (PL_parser->in_pod) {
4814                 /* Incest with pod. */
4815 #ifdef PERL_MAD
4816                 if (PL_madskills)
4817                     sv_catsv(PL_thiswhite, PL_linestr);
4818 #endif
4819                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4820                     sv_setpvs(PL_linestr, "");
4821                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4822                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4823                     PL_last_lop = PL_last_uni = NULL;
4824                     PL_parser->in_pod = 0;
4825                 }
4826             }
4827             if (PL_rsfp)
4828                 incline(s);
4829         } while (PL_parser->in_pod);
4830         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4831         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4832         PL_last_lop = PL_last_uni = NULL;
4833         if (CopLINE(PL_curcop) == 1) {
4834             while (s < PL_bufend && isSPACE(*s))
4835                 s++;
4836             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4837                 s++;
4838 #ifdef PERL_MAD
4839             if (PL_madskills)
4840                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4841 #endif
4842             d = NULL;
4843             if (!PL_in_eval) {
4844                 if (*s == '#' && *(s+1) == '!')
4845                     d = s + 2;
4846 #ifdef ALTERNATE_SHEBANG
4847                 else {
4848                     static char const as[] = ALTERNATE_SHEBANG;
4849                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4850                         d = s + (sizeof(as) - 1);
4851                 }
4852 #endif /* ALTERNATE_SHEBANG */
4853             }
4854             if (d) {
4855                 char *ipath;
4856                 char *ipathend;
4857
4858                 while (isSPACE(*d))
4859                     d++;
4860                 ipath = d;
4861                 while (*d && !isSPACE(*d))
4862                     d++;
4863                 ipathend = d;
4864
4865 #ifdef ARG_ZERO_IS_SCRIPT
4866                 if (ipathend > ipath) {
4867                     /*
4868                      * HP-UX (at least) sets argv[0] to the script name,
4869                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4870                      * at least, set argv[0] to the basename of the Perl
4871                      * interpreter. So, having found "#!", we'll set it right.
4872                      */
4873                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4874                                                     SVt_PV)); /* $^X */
4875                     assert(SvPOK(x) || SvGMAGICAL(x));
4876                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4877                         sv_setpvn(x, ipath, ipathend - ipath);
4878                         SvSETMAGIC(x);
4879                     }
4880                     else {
4881                         STRLEN blen;
4882                         STRLEN llen;
4883                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4884                         const char * const lstart = SvPV_const(x,llen);
4885                         if (llen < blen) {
4886                             bstart += blen - llen;
4887                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4888                                 sv_setpvn(x, ipath, ipathend - ipath);
4889                                 SvSETMAGIC(x);
4890                             }
4891                         }
4892                     }
4893                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4894                 }
4895 #endif /* ARG_ZERO_IS_SCRIPT */
4896
4897                 /*
4898                  * Look for options.
4899                  */
4900                 d = instr(s,"perl -");
4901                 if (!d) {
4902                     d = instr(s,"perl");
4903 #if defined(DOSISH)
4904                     /* avoid getting into infinite loops when shebang
4905                      * line contains "Perl" rather than "perl" */
4906                     if (!d) {
4907                         for (d = ipathend-4; d >= ipath; --d) {
4908                             if ((*d == 'p' || *d == 'P')
4909                                 && !ibcmp(d, "perl", 4))
4910                             {
4911                                 break;
4912                             }
4913                         }
4914                         if (d < ipath)
4915                             d = NULL;
4916                     }
4917 #endif
4918                 }
4919 #ifdef ALTERNATE_SHEBANG
4920                 /*
4921                  * If the ALTERNATE_SHEBANG on this system starts with a
4922                  * character that can be part of a Perl expression, then if
4923                  * we see it but not "perl", we're probably looking at the
4924                  * start of Perl code, not a request to hand off to some
4925                  * other interpreter.  Similarly, if "perl" is there, but
4926                  * not in the first 'word' of the line, we assume the line
4927                  * contains the start of the Perl program.
4928                  */
4929                 if (d && *s != '#') {
4930                     const char *c = ipath;
4931                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4932                         c++;
4933                     if (c < d)
4934                         d = NULL;       /* "perl" not in first word; ignore */
4935                     else
4936                         *s = '#';       /* Don't try to parse shebang line */
4937                 }
4938 #endif /* ALTERNATE_SHEBANG */
4939                 if (!d &&
4940                     *s == '#' &&
4941                     ipathend > ipath &&
4942                     !PL_minus_c &&
4943                     !instr(s,"indir") &&
4944                     instr(PL_origargv[0],"perl"))
4945                 {
4946                     dVAR;
4947                     char **newargv;
4948
4949                     *ipathend = '\0';
4950                     s = ipathend + 1;
4951                     while (s < PL_bufend && isSPACE(*s))
4952                         s++;
4953                     if (s < PL_bufend) {
4954                         Newx(newargv,PL_origargc+3,char*);
4955                         newargv[1] = s;
4956                         while (s < PL_bufend && !isSPACE(*s))
4957                             s++;
4958                         *s = '\0';
4959                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4960                     }
4961                     else
4962                         newargv = PL_origargv;
4963                     newargv[0] = ipath;
4964                     PERL_FPU_PRE_EXEC
4965                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4966                     PERL_FPU_POST_EXEC
4967                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4968                 }
4969                 if (d) {
4970                     while (*d && !isSPACE(*d))
4971                         d++;
4972                     while (SPACE_OR_TAB(*d))
4973                         d++;
4974
4975                     if (*d++ == '-') {
4976                         const bool switches_done = PL_doswitches;
4977                         const U32 oldpdb = PL_perldb;
4978                         const bool oldn = PL_minus_n;
4979                         const bool oldp = PL_minus_p;
4980                         const char *d1 = d;
4981
4982                         do {
4983                             bool baduni = FALSE;
4984                             if (*d1 == 'C') {
4985                                 const char *d2 = d1 + 1;
4986                                 if (parse_unicode_opts((const char **)&d2)
4987                                     != PL_unicode)
4988                                     baduni = TRUE;
4989                             }
4990                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4991                                 const char * const m = d1;
4992                                 while (*d1 && !isSPACE(*d1))
4993                                     d1++;
4994                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4995                                       (int)(d1 - m), m);
4996                             }
4997                             d1 = moreswitches(d1);
4998                         } while (d1);
4999                         if (PL_doswitches && !switches_done) {
5000                             int argc = PL_origargc;
5001                             char **argv = PL_origargv;
5002                             do {
5003                                 argc--,argv++;
5004                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5005                             init_argv_symbols(argc,argv);
5006                         }
5007                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5008                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5009                               /* if we have already added "LINE: while (<>) {",
5010                                  we must not do it again */
5011                         {
5012                             sv_setpvs(PL_linestr, "");
5013                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5014                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5015                             PL_last_lop = PL_last_uni = NULL;
5016                             PL_preambled = FALSE;
5017                             if (PERLDB_LINE || PERLDB_SAVESRC)
5018                                 (void)gv_fetchfile(PL_origfilename);
5019                             goto retry;
5020                         }
5021                     }
5022                 }
5023             }
5024         }
5025         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5026             PL_bufptr = s;
5027             PL_lex_state = LEX_FORMLINE;
5028             return yylex();
5029         }
5030         goto retry;
5031     case '\r':
5032 #ifdef PERL_STRICT_CR
5033         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5034         Perl_croak(aTHX_
5035       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5036 #endif
5037     case ' ': case '\t': case '\f': case 013:
5038 #ifdef PERL_MAD
5039         PL_realtokenstart = -1;
5040         if (!PL_thiswhite)
5041             PL_thiswhite = newSVpvs("");
5042         sv_catpvn(PL_thiswhite, s, 1);
5043 #endif
5044         s++;
5045         goto retry;
5046     case '#':
5047     case '\n':
5048 #ifdef PERL_MAD
5049         PL_realtokenstart = -1;
5050         if (PL_madskills)
5051             PL_faketokens = 0;
5052 #endif
5053         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
5054             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5055                 /* handle eval qq[#line 1 "foo"\n ...] */
5056                 CopLINE_dec(PL_curcop);
5057                 incline(s);
5058             }
5059             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5060                 s = SKIPSPACE0(s);
5061                 if (!PL_in_eval || PL_rsfp)
5062                     incline(s);
5063             }
5064             else {
5065                 d = s;
5066                 while (d < PL_bufend && *d != '\n')
5067                     d++;
5068                 if (d < PL_bufend)
5069                     d++;
5070                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5071                   Perl_croak(aTHX_ "panic: input overflow");
5072 #ifdef PERL_MAD
5073                 if (PL_madskills)
5074                     PL_thiswhite = newSVpvn(s, d - s);
5075 #endif
5076                 s = d;
5077                 incline(s);
5078             }
5079             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5080                 PL_bufptr = s;
5081                 PL_lex_state = LEX_FORMLINE;
5082                 return yylex();
5083             }
5084         }
5085         else {
5086 #ifdef PERL_MAD
5087             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5088                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5089                     PL_faketokens = 0;
5090                     s = SKIPSPACE0(s);
5091                     TOKEN(PEG); /* make sure any #! line is accessible */
5092                 }
5093                 s = SKIPSPACE0(s);
5094             }
5095             else {
5096 /*              if (PL_madskills && PL_lex_formbrack) { */
5097                     d = s;
5098                     while (d < PL_bufend && *d != '\n')
5099                         d++;
5100                     if (d < PL_bufend)
5101                         d++;
5102                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5103                       Perl_croak(aTHX_ "panic: input overflow");
5104                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5105                         if (!PL_thiswhite)
5106                             PL_thiswhite = newSVpvs("");
5107                         if (CopLINE(PL_curcop) == 1) {
5108                             sv_setpvs(PL_thiswhite, "");
5109                             PL_faketokens = 0;
5110                         }
5111                         sv_catpvn(PL_thiswhite, s, d - s);
5112                     }
5113                     s = d;
5114 /*              }
5115                 *s = '\0';
5116                 PL_bufend = s; */
5117             }
5118 #else
5119             *s = '\0';
5120             PL_bufend = s;
5121 #endif
5122         }
5123         goto retry;
5124     case '-':
5125         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5126             I32 ftst = 0;
5127             char tmp;
5128
5129             s++;
5130             PL_bufptr = s;
5131             tmp = *s++;
5132
5133             while (s < PL_bufend && SPACE_OR_TAB(*s))
5134                 s++;
5135
5136             if (strnEQ(s,"=>",2)) {
5137                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5138                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5139                 OPERATOR('-');          /* unary minus */
5140             }
5141             PL_last_uni = PL_oldbufptr;
5142             switch (tmp) {
5143             case 'r': ftst = OP_FTEREAD;        break;
5144             case 'w': ftst = OP_FTEWRITE;       break;
5145             case 'x': ftst = OP_FTEEXEC;        break;
5146             case 'o': ftst = OP_FTEOWNED;       break;
5147             case 'R': ftst = OP_FTRREAD;        break;
5148             case 'W': ftst = OP_FTRWRITE;       break;
5149             case 'X': ftst = OP_FTREXEC;        break;
5150             case 'O': ftst = OP_FTROWNED;       break;
5151             case 'e': ftst = OP_FTIS;           break;
5152             case 'z': ftst = OP_FTZERO;         break;
5153             case 's': ftst = OP_FTSIZE;         break;
5154             case 'f': ftst = OP_FTFILE;         break;
5155             case 'd': ftst = OP_FTDIR;          break;
5156             case 'l': ftst = OP_FTLINK;         break;
5157             case 'p': ftst = OP_FTPIPE;         break;
5158             case 'S': ftst = OP_FTSOCK;         break;
5159             case 'u': ftst = OP_FTSUID;         break;
5160             case 'g': ftst = OP_FTSGID;         break;
5161             case 'k': ftst = OP_FTSVTX;         break;
5162             case 'b': ftst = OP_FTBLK;          break;
5163             case 'c': ftst = OP_FTCHR;          break;
5164             case 't': ftst = OP_FTTTY;          break;
5165             case 'T': ftst = OP_FTTEXT;         break;
5166             case 'B': ftst = OP_FTBINARY;       break;
5167             case 'M': case 'A': case 'C':
5168                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5169                 switch (tmp) {
5170                 case 'M': ftst = OP_FTMTIME;    break;
5171                 case 'A': ftst = OP_FTATIME;    break;
5172                 case 'C': ftst = OP_FTCTIME;    break;
5173                 default:                        break;
5174                 }
5175                 break;
5176             default:
5177                 break;
5178             }
5179             if (ftst) {
5180                 PL_last_lop_op = (OPCODE)ftst;
5181                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5182                         "### Saw file test %c\n", (int)tmp);
5183                 } );
5184                 FTST(ftst);
5185             }
5186             else {
5187                 /* Assume it was a minus followed by a one-letter named
5188                  * subroutine call (or a -bareword), then. */
5189                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5190                         "### '-%c' looked like a file test but was not\n",
5191                         (int) tmp);
5192                 } );
5193                 s = --PL_bufptr;
5194             }
5195         }
5196         {
5197             const char tmp = *s++;
5198             if (*s == tmp) {
5199                 s++;
5200                 if (PL_expect == XOPERATOR)
5201                     TERM(POSTDEC);
5202                 else
5203                     OPERATOR(PREDEC);
5204             }
5205             else if (*s == '>') {
5206                 s++;
5207                 s = SKIPSPACE1(s);
5208                 if (isIDFIRST_lazy_if(s,UTF)) {
5209                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5210                     TOKEN(ARROW);
5211                 }
5212                 else if (*s == '$')
5213                     OPERATOR(ARROW);
5214                 else
5215                     TERM(ARROW);
5216             }
5217             if (PL_expect == XOPERATOR) {
5218                 if (*s == '=' && !PL_lex_allbrackets &&
5219                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5220                     s--;
5221                     TOKEN(0);
5222                 }
5223                 Aop(OP_SUBTRACT);
5224             }
5225             else {
5226                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5227                     check_uni();
5228                 OPERATOR('-');          /* unary minus */
5229             }
5230         }
5231
5232     case '+':
5233         {
5234             const char tmp = *s++;
5235             if (*s == tmp) {
5236                 s++;
5237                 if (PL_expect == XOPERATOR)
5238                     TERM(POSTINC);
5239                 else
5240                     OPERATOR(PREINC);
5241             }
5242             if (PL_expect == XOPERATOR) {
5243                 if (*s == '=' && !PL_lex_allbrackets &&
5244                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5245                     s--;
5246                     TOKEN(0);
5247                 }
5248                 Aop(OP_ADD);
5249             }
5250             else {
5251                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5252                     check_uni();
5253                 OPERATOR('+');
5254             }
5255         }
5256
5257     case '*':
5258         if (PL_expect != XOPERATOR) {
5259             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5260             PL_expect = XOPERATOR;
5261             force_ident(PL_tokenbuf, '*');
5262             if (!*PL_tokenbuf)
5263                 PREREF('*');
5264             TERM('*');
5265         }
5266         s++;
5267         if (*s == '*') {
5268             s++;
5269             if (*s == '=' && !PL_lex_allbrackets &&
5270                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5271                 s -= 2;
5272                 TOKEN(0);
5273             }
5274             PWop(OP_POW);
5275         }
5276         if (*s == '=' && !PL_lex_allbrackets &&
5277                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5278             s--;
5279             TOKEN(0);
5280         }
5281         Mop(OP_MULTIPLY);
5282
5283     case '%':
5284         if (PL_expect == XOPERATOR) {
5285             if (s[1] == '=' && !PL_lex_allbrackets &&
5286                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5287                 TOKEN(0);
5288             ++s;
5289             Mop(OP_MODULO);
5290         }
5291         PL_tokenbuf[0] = '%';
5292         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5293                 sizeof PL_tokenbuf - 1, FALSE);
5294         if (!PL_tokenbuf[1]) {
5295             PREREF('%');
5296         }
5297         PL_pending_ident = '%';
5298         TERM('%');
5299
5300     case '^':
5301         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5302                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5303             TOKEN(0);
5304         s++;
5305         BOop(OP_BIT_XOR);
5306     case '[':
5307         if (PL_lex_brackets > 100)
5308             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5309         PL_lex_brackstack[PL_lex_brackets++] = 0;
5310         PL_lex_allbrackets++;
5311         {
5312             const char tmp = *s++;
5313             OPERATOR(tmp);
5314         }
5315     case '~':
5316         if (s[1] == '~'
5317             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5318         {
5319             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5320                 TOKEN(0);
5321             s += 2;
5322             Eop(OP_SMARTMATCH);
5323         }
5324         s++;
5325         OPERATOR('~');
5326     case ',':
5327         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5328             TOKEN(0);
5329         s++;
5330         OPERATOR(',');
5331     case ':':
5332         if (s[1] == ':') {
5333             len = 0;
5334             goto just_a_word_zero_gv;
5335         }
5336         s++;
5337         switch (PL_expect) {
5338             OP *attrs;
5339 #ifdef PERL_MAD
5340             I32 stuffstart;
5341 #endif
5342         case XOPERATOR:
5343             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5344                 break;
5345             PL_bufptr = s;      /* update in case we back off */
5346             if (*s == '=') {
5347                 Perl_croak(aTHX_
5348                            "Use of := for an empty attribute list is not allowed");
5349             }
5350             goto grabattrs;
5351         case XATTRBLOCK:
5352             PL_expect = XBLOCK;
5353             goto grabattrs;
5354         case XATTRTERM:
5355             PL_expect = XTERMBLOCK;
5356          grabattrs:
5357 #ifdef PERL_MAD
5358             stuffstart = s - SvPVX(PL_linestr) - 1;
5359 #endif
5360             s = PEEKSPACE(s);
5361             attrs = NULL;
5362             while (isIDFIRST_lazy_if(s,UTF)) {
5363                 I32 tmp;
5364                 SV *sv;
5365                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5366                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5367                     if (tmp < 0) tmp = -tmp;
5368                     switch (tmp) {
5369                     case KEY_or:
5370                     case KEY_and:
5371                     case KEY_for:
5372                     case KEY_foreach:
5373                     case KEY_unless:
5374                     case KEY_if:
5375                     case KEY_while:
5376                     case KEY_until:
5377                         goto got_attrs;
5378                     default:
5379                         break;
5380                     }
5381                 }
5382                 sv = newSVpvn(s, len);
5383                 if (*d == '(') {
5384                     d = scan_str(d,TRUE,TRUE);
5385                     if (!d) {
5386                         /* MUST advance bufptr here to avoid bogus
5387                            "at end of line" context messages from yyerror().
5388                          */
5389                         PL_bufptr = s + len;
5390                         yyerror("Unterminated attribute parameter in attribute list");
5391                         if (attrs)
5392                             op_free(attrs);
5393                         sv_free(sv);
5394                         return REPORT(0);       /* EOF indicator */
5395                     }
5396                 }
5397                 if (PL_lex_stuff) {
5398                     sv_catsv(sv, PL_lex_stuff);
5399                     attrs = op_append_elem(OP_LIST, attrs,
5400                                         newSVOP(OP_CONST, 0, sv));
5401                     SvREFCNT_dec(PL_lex_stuff);
5402                     PL_lex_stuff = NULL;
5403                 }
5404                 else {
5405                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5406                         sv_free(sv);
5407                         if (PL_in_my == KEY_our) {
5408                             deprecate(":unique");
5409                         }
5410                         else
5411                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5412                     }
5413
5414                     /* NOTE: any CV attrs applied here need to be part of
5415                        the CVf_BUILTIN_ATTRS define in cv.h! */
5416                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5417                         sv_free(sv);
5418                         CvLVALUE_on(PL_compcv);
5419                     }
5420                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5421                         sv_free(sv);
5422                         deprecate(":locked");
5423                     }
5424                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5425                         sv_free(sv);
5426                         CvMETHOD_on(PL_compcv);
5427                     }
5428                     /* After we've set the flags, it could be argued that
5429                        we don't need to do the attributes.pm-based setting
5430                        process, and shouldn't bother appending recognized
5431                        flags.  To experiment with that, uncomment the
5432                        following "else".  (Note that's already been
5433                        uncommented.  That keeps the above-applied built-in
5434                        attributes from being intercepted (and possibly
5435                        rejected) by a package's attribute routines, but is
5436                        justified by the performance win for the common case
5437                        of applying only built-in attributes.) */
5438                     else
5439                         attrs = op_append_elem(OP_LIST, attrs,
5440                                             newSVOP(OP_CONST, 0,
5441                                                     sv));
5442                 }
5443                 s = PEEKSPACE(d);
5444                 if (*s == ':' && s[1] != ':')
5445                     s = PEEKSPACE(s+1);
5446                 else if (s == d)
5447                     break;      /* require real whitespace or :'s */
5448                 /* XXX losing whitespace on sequential attributes here */
5449             }
5450             {
5451                 const char tmp
5452                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5453                 if (*s != ';' && *s != '}' && *s != tmp
5454                     && (tmp != '=' || *s != ')')) {
5455                     const char q = ((*s == '\'') ? '"' : '\'');
5456                     /* If here for an expression, and parsed no attrs, back
5457                        off. */
5458                     if (tmp == '=' && !attrs) {
5459                         s = PL_bufptr;
5460                         break;
5461                     }
5462                     /* MUST advance bufptr here to avoid bogus "at end of line"
5463                        context messages from yyerror().
5464                     */
5465                     PL_bufptr = s;
5466                     yyerror( (const char *)
5467                              (*s
5468                               ? Perl_form(aTHX_ "Invalid separator character "
5469                                           "%c%c%c in attribute list", q, *s, q)
5470                               : "Unterminated attribute list" ) );
5471                     if (attrs)
5472                         op_free(attrs);
5473                     OPERATOR(':');
5474                 }
5475             }
5476         got_attrs:
5477             if (attrs) {
5478                 start_force(PL_curforce);
5479                 NEXTVAL_NEXTTOKE.opval = attrs;
5480                 CURMAD('_', PL_nextwhite);
5481                 force_next(THING);
5482             }
5483 #ifdef PERL_MAD
5484             if (PL_madskills) {
5485                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5486                                      (s - SvPVX(PL_linestr)) - stuffstart);
5487             }
5488 #endif
5489             TOKEN(COLONATTR);
5490         }
5491         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5492             s--;
5493             TOKEN(0);
5494         }
5495         PL_lex_allbrackets--;
5496         OPERATOR(':');
5497     case '(':
5498         s++;
5499         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5500             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5501         else
5502             PL_expect = XTERM;
5503         s = SKIPSPACE1(s);
5504         PL_lex_allbrackets++;
5505         TOKEN('(');
5506     case ';':
5507         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5508             TOKEN(0);
5509         CLINE;
5510         s++;
5511         OPERATOR(';');
5512     case ')':
5513         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5514             TOKEN(0);
5515         s++;
5516         PL_lex_allbrackets--;
5517         s = SKIPSPACE1(s);
5518         if (*s == '{')
5519             PREBLOCK(')');
5520         TERM(')');
5521     case ']':
5522         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5523             TOKEN(0);
5524         s++;
5525         if (PL_lex_brackets <= 0)
5526             yyerror("Unmatched right square bracket");
5527         else
5528             --PL_lex_brackets;
5529         PL_lex_allbrackets--;
5530         if (PL_lex_state == LEX_INTERPNORMAL) {
5531             if (PL_lex_brackets == 0) {
5532                 if (*s == '-' && s[1] == '>')
5533                     PL_lex_state = LEX_INTERPENDMAYBE;
5534                 else if (*s != '[' && *s != '{')
5535                     PL_lex_state = LEX_INTERPEND;
5536             }
5537         }
5538         TERM(']');
5539     case '{':
5540       leftbracket:
5541         s++;
5542         if (PL_lex_brackets > 100) {
5543             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5544         }
5545         switch (PL_expect) {
5546         case XTERM:
5547             if (PL_lex_formbrack) {
5548                 s--;
5549                 PRETERMBLOCK(DO);
5550             }
5551             if (PL_oldoldbufptr == PL_last_lop)
5552                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5553             else
5554                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5555             PL_lex_allbrackets++;
5556             OPERATOR(HASHBRACK);
5557         case XOPERATOR:
5558             while (s < PL_bufend && SPACE_OR_TAB(*s))
5559                 s++;
5560             d = s;
5561             PL_tokenbuf[0] = '\0';
5562             if (d < PL_bufend && *d == '-') {
5563                 PL_tokenbuf[0] = '-';
5564                 d++;
5565                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5566                     d++;
5567             }
5568             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5569                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5570                               FALSE, &len);
5571                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5572                     d++;
5573                 if (*d == '}') {
5574                     const char minus = (PL_tokenbuf[0] == '-');
5575                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5576                     if (minus)
5577                         force_next('-');
5578                 }
5579             }
5580             /* FALL THROUGH */
5581         case XATTRBLOCK:
5582         case XBLOCK:
5583             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5584             PL_lex_allbrackets++;
5585             PL_expect = XSTATE;
5586             break;
5587         case XATTRTERM:
5588         case XTERMBLOCK:
5589             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5590             PL_lex_allbrackets++;
5591             PL_expect = XSTATE;
5592             break;
5593         default: {
5594                 const char *t;
5595                 if (PL_oldoldbufptr == PL_last_lop)
5596                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5597                 else
5598                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5599                 PL_lex_allbrackets++;
5600                 s = SKIPSPACE1(s);
5601                 if (*s == '}') {
5602                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5603                         PL_expect = XTERM;
5604                         /* This hack is to get the ${} in the message. */
5605                         PL_bufptr = s+1;
5606                         yyerror("syntax error");
5607                         break;
5608                     }
5609                     OPERATOR(HASHBRACK);
5610                 }
5611                 /* This hack serves to disambiguate a pair of curlies
5612                  * as being a block or an anon hash.  Normally, expectation
5613                  * determines that, but in cases where we're not in a
5614                  * position to expect anything in particular (like inside
5615                  * eval"") we have to resolve the ambiguity.  This code
5616                  * covers the case where the first term in the curlies is a
5617                  * quoted string.  Most other cases need to be explicitly
5618                  * disambiguated by prepending a "+" before the opening
5619                  * curly in order to force resolution as an anon hash.
5620                  *
5621                  * XXX should probably propagate the outer expectation
5622                  * into eval"" to rely less on this hack, but that could
5623                  * potentially break current behavior of eval"".
5624                  * GSAR 97-07-21
5625                  */
5626                 t = s;
5627                 if (*s == '\'' || *s == '"' || *s == '`') {
5628                     /* common case: get past first string, handling escapes */
5629                     for (t++; t < PL_bufend && *t != *s;)
5630                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5631                             t++;
5632                     t++;
5633                 }
5634                 else if (*s == 'q') {
5635                     if (++t < PL_bufend
5636                         && (!isALNUM(*t)
5637                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5638                                 && !isALNUM(*t))))
5639                     {
5640                         /* skip q//-like construct */
5641                         const char *tmps;
5642                         char open, close, term;
5643                         I32 brackets = 1;
5644
5645                         while (t < PL_bufend && isSPACE(*t))
5646                             t++;
5647                         /* check for q => */
5648                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5649                             OPERATOR(HASHBRACK);
5650                         }
5651                         term = *t;
5652                         open = term;
5653                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5654                             term = tmps[5];
5655                         close = term;
5656                         if (open == close)
5657                             for (t++; t < PL_bufend; t++) {
5658                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5659                                     t++;
5660                                 else if (*t == open)
5661                                     break;
5662                             }
5663                         else {
5664                             for (t++; t < PL_bufend; t++) {
5665                                 if (*t == '\\' && t+1 < PL_bufend)
5666                                     t++;
5667                                 else if (*t == close && --brackets <= 0)
5668                                     break;
5669                                 else if (*t == open)
5670                                     brackets++;
5671                             }
5672                         }
5673                         t++;
5674                     }
5675                     else
5676                         /* skip plain q word */
5677                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5678                              t += UTF8SKIP(t);
5679                 }
5680                 else if (isALNUM_lazy_if(t,UTF)) {
5681                     t += UTF8SKIP(t);
5682                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5683                          t += UTF8SKIP(t);
5684                 }
5685                 while (t < PL_bufend && isSPACE(*t))
5686                     t++;
5687                 /* if comma follows first term, call it an anon hash */
5688                 /* XXX it could be a comma expression with loop modifiers */
5689                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5690                                    || (*t == '=' && t[1] == '>')))
5691                     OPERATOR(HASHBRACK);
5692                 if (PL_expect == XREF)
5693                     PL_expect = XTERM;
5694                 else {
5695                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5696                     PL_expect = XSTATE;
5697                 }
5698             }
5699             break;
5700         }
5701         pl_yylval.ival = CopLINE(PL_curcop);
5702         if (isSPACE(*s) || *s == '#')
5703             PL_copline = NOLINE;   /* invalidate current command line number */
5704         TOKEN('{');
5705     case '}':
5706         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5707             TOKEN(0);
5708       rightbracket:
5709         s++;
5710         if (PL_lex_brackets <= 0)
5711             yyerror("Unmatched right curly bracket");
5712         else
5713             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5714         PL_lex_allbrackets--;
5715         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5716             PL_lex_formbrack = 0;
5717         if (PL_lex_state == LEX_INTERPNORMAL) {
5718             if (PL_lex_brackets == 0) {
5719                 if (PL_expect & XFAKEBRACK) {
5720                     PL_expect &= XENUMMASK;
5721                     PL_lex_state = LEX_INTERPEND;
5722                     PL_bufptr = s;
5723 #if 0
5724                     if (PL_madskills) {
5725                         if (!PL_thiswhite)
5726                             PL_thiswhite = newSVpvs("");
5727                         sv_catpvs(PL_thiswhite,"}");
5728                     }
5729 #endif
5730                     return yylex();     /* ignore fake brackets */
5731                 }
5732                 if (*s == '-' && s[1] == '>')
5733                     PL_lex_state = LEX_INTERPENDMAYBE;
5734                 else if (*s != '[' && *s != '{')
5735                     PL_lex_state = LEX_INTERPEND;
5736             }
5737         }
5738         if (PL_expect & XFAKEBRACK) {
5739             PL_expect &= XENUMMASK;
5740             PL_bufptr = s;
5741             return yylex();             /* ignore fake brackets */
5742         }
5743         start_force(PL_curforce);
5744         if (PL_madskills) {
5745             curmad('X', newSVpvn(s-1,1));
5746             CURMAD('_', PL_thiswhite);
5747         }
5748         force_next('}');
5749 #ifdef PERL_MAD
5750         if (!PL_thistoken)
5751             PL_thistoken = newSVpvs("");
5752 #endif
5753         TOKEN(';');
5754     case '&':
5755         s++;
5756         if (*s++ == '&') {
5757             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5758                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5759                 s -= 2;
5760                 TOKEN(0);
5761             }
5762             AOPERATOR(ANDAND);
5763         }
5764         s--;
5765         if (PL_expect == XOPERATOR) {
5766             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5767                 && isIDFIRST_lazy_if(s,UTF))
5768             {
5769                 CopLINE_dec(PL_curcop);
5770                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5771                 CopLINE_inc(PL_curcop);
5772             }
5773             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5774                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5775                 s--;
5776                 TOKEN(0);
5777             }
5778             BAop(OP_BIT_AND);
5779         }
5780
5781         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5782         if (*PL_tokenbuf) {
5783             PL_expect = XOPERATOR;
5784             force_ident(PL_tokenbuf, '&');
5785         }
5786         else
5787             PREREF('&');
5788         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5789         TERM('&');
5790
5791     case '|':
5792         s++;
5793         if (*s++ == '|') {
5794             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5795                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5796                 s -= 2;
5797                 TOKEN(0);
5798             }
5799             AOPERATOR(OROR);
5800         }
5801         s--;
5802         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5803                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5804             s--;
5805             TOKEN(0);
5806         }
5807         BOop(OP_BIT_OR);
5808     case '=':
5809         s++;
5810         {
5811             const char tmp = *s++;
5812             if (tmp == '=') {
5813                 if (!PL_lex_allbrackets &&
5814                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5815                     s -= 2;
5816                     TOKEN(0);
5817                 }
5818                 Eop(OP_EQ);
5819             }
5820             if (tmp == '>') {
5821                 if (!PL_lex_allbrackets &&
5822                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5823                     s -= 2;
5824                     TOKEN(0);
5825                 }
5826                 OPERATOR(',');
5827             }
5828             if (tmp == '~')
5829                 PMop(OP_MATCH);
5830             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5831                 && strchr("+-*/%.^&|<",tmp))
5832                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5833                             "Reversed %c= operator",(int)tmp);
5834             s--;
5835             if (PL_expect == XSTATE && isALPHA(tmp) &&
5836                 (s == PL_linestart+1 || s[-2] == '\n') )
5837                 {
5838                     if (PL_in_eval && !PL_rsfp) {
5839                         d = PL_bufend;
5840                         while (s < d) {
5841                             if (*s++ == '\n') {
5842                                 incline(s);
5843                                 if (strnEQ(s,"=cut",4)) {
5844                                     s = strchr(s,'\n');
5845                                     if (s)
5846                                         s++;
5847                                     else
5848                                         s = d;
5849                                     incline(s);
5850                                     goto retry;
5851                                 }
5852                             }
5853                         }
5854                         goto retry;
5855                     }
5856 #ifdef PERL_MAD
5857                     if (PL_madskills) {
5858                         if (!PL_thiswhite)
5859                             PL_thiswhite = newSVpvs("");
5860                         sv_catpvn(PL_thiswhite, PL_linestart,
5861                                   PL_bufend - PL_linestart);
5862                     }
5863 #endif
5864                     s = PL_bufend;
5865                     PL_parser->in_pod = 1;
5866                     goto retry;
5867                 }
5868         }
5869         if (PL_lex_brackets < PL_lex_formbrack) {
5870             const char *t = s;
5871 #ifdef PERL_STRICT_CR
5872             while (SPACE_OR_TAB(*t))
5873 #else
5874             while (SPACE_OR_TAB(*t) || *t == '\r')
5875 #endif
5876                 t++;
5877             if (*t == '\n' || *t == '#') {
5878                 s--;
5879                 PL_expect = XBLOCK;
5880                 goto leftbracket;
5881             }
5882         }
5883         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5884             s--;
5885             TOKEN(0);
5886         }
5887         pl_yylval.ival = 0;
5888         OPERATOR(ASSIGNOP);
5889     case '!':
5890         s++;
5891         {
5892             const char tmp = *s++;
5893             if (tmp == '=') {
5894                 /* was this !=~ where !~ was meant?
5895                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5896
5897                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5898                     const char *t = s+1;
5899
5900                     while (t < PL_bufend && isSPACE(*t))
5901                         ++t;
5902
5903                     if (*t == '/' || *t == '?' ||
5904                         ((*t == 'm' || *t == 's' || *t == 'y')
5905                          && !isALNUM(t[1])) ||
5906                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5907                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5908                                     "!=~ should be !~");
5909                 }
5910                 if (!PL_lex_allbrackets &&
5911                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5912                     s -= 2;
5913                     TOKEN(0);
5914                 }
5915                 Eop(OP_NE);
5916             }
5917             if (tmp == '~')
5918                 PMop(OP_NOT);
5919         }
5920         s--;
5921         OPERATOR('!');
5922     case '<':
5923         if (PL_expect != XOPERATOR) {
5924             if (s[1] != '<' && !strchr(s,'>'))
5925                 check_uni();
5926             if (s[1] == '<')
5927                 s = scan_heredoc(s);
5928             else
5929                 s = scan_inputsymbol(s);
5930             TERM(sublex_start());
5931         }
5932         s++;
5933         {
5934             char tmp = *s++;
5935             if (tmp == '<') {
5936                 if (*s == '=' && !PL_lex_allbrackets &&
5937                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5938                     s -= 2;
5939                     TOKEN(0);
5940                 }
5941                 SHop(OP_LEFT_SHIFT);
5942             }
5943             if (tmp == '=') {
5944                 tmp = *s++;
5945                 if (tmp == '>') {
5946                     if (!PL_lex_allbrackets &&
5947                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5948                         s -= 3;
5949                         TOKEN(0);
5950                     }
5951                     Eop(OP_NCMP);
5952                 }
5953                 s--;
5954                 if (!PL_lex_allbrackets &&
5955                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5956                     s -= 2;
5957                     TOKEN(0);
5958                 }
5959                 Rop(OP_LE);
5960             }
5961         }
5962         s--;
5963         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5964             s--;
5965             TOKEN(0);
5966         }
5967         Rop(OP_LT);
5968     case '>':
5969         s++;
5970         {
5971             const char tmp = *s++;
5972             if (tmp == '>') {
5973                 if (*s == '=' && !PL_lex_allbrackets &&
5974                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5975                     s -= 2;
5976                     TOKEN(0);
5977                 }
5978                 SHop(OP_RIGHT_SHIFT);
5979             }
5980             else if (tmp == '=') {
5981                 if (!PL_lex_allbrackets &&
5982                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5983                     s -= 2;
5984                     TOKEN(0);
5985                 }
5986                 Rop(OP_GE);
5987             }
5988         }
5989         s--;
5990         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5991             s--;
5992             TOKEN(0);
5993         }
5994         Rop(OP_GT);
5995
5996     case '$':
5997         CLINE;
5998
5999         if (PL_expect == XOPERATOR) {
6000             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6001                 return deprecate_commaless_var_list();
6002             }
6003         }
6004
6005         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6006             PL_tokenbuf[0] = '@';
6007             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6008                            sizeof PL_tokenbuf - 1, FALSE);
6009             if (PL_expect == XOPERATOR)
6010                 no_op("Array length", s);
6011             if (!PL_tokenbuf[1])
6012                 PREREF(DOLSHARP);
6013             PL_expect = XOPERATOR;
6014             PL_pending_ident = '#';
6015             TOKEN(DOLSHARP);
6016         }
6017
6018         PL_tokenbuf[0] = '$';
6019         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6020                        sizeof PL_tokenbuf - 1, FALSE);
6021         if (PL_expect == XOPERATOR)
6022             no_op("Scalar", s);
6023         if (!PL_tokenbuf[1]) {
6024             if (s == PL_bufend)
6025                 yyerror("Final $ should be \\$ or $name");
6026             PREREF('$');
6027         }
6028
6029         d = s;
6030         {
6031             const char tmp = *s;
6032             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6033                 s = SKIPSPACE1(s);
6034
6035             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6036                 && intuit_more(s)) {
6037                 if (*s == '[') {
6038                     PL_tokenbuf[0] = '@';
6039                     if (ckWARN(WARN_SYNTAX)) {
6040                         char *t = s+1;
6041
6042                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6043                             t++;
6044                         if (*t++ == ',') {
6045                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6046                             while (t < PL_bufend && *t != ']')
6047                                 t++;
6048                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6049                                         "Multidimensional syntax %.*s not supported",
6050                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6051                         }
6052                     }
6053                 }
6054                 else if (*s == '{') {
6055                     char *t;
6056                     PL_tokenbuf[0] = '%';
6057                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6058                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6059                         {
6060                             char tmpbuf[sizeof PL_tokenbuf];
6061                             do {
6062                                 t++;
6063                             } while (isSPACE(*t));
6064                             if (isIDFIRST_lazy_if(t,UTF)) {
6065                                 STRLEN len;
6066                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6067                                               &len);
6068                                 while (isSPACE(*t))
6069                                     t++;
6070                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
6071                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6072                                                 "You need to quote \"%s\"",
6073                                                 tmpbuf);
6074                             }
6075                         }
6076                 }
6077             }
6078
6079             PL_expect = XOPERATOR;
6080             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6081                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6082                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6083                     PL_expect = XOPERATOR;
6084                 else if (strchr("$@\"'`q", *s))
6085                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6086                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6087                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6088                 else if (isIDFIRST_lazy_if(s,UTF)) {
6089                     char tmpbuf[sizeof PL_tokenbuf];
6090                     int t2;
6091                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6092                     if ((t2 = keyword(tmpbuf, len, 0))) {
6093                         /* binary operators exclude handle interpretations */
6094                         switch (t2) {
6095                         case -KEY_x:
6096                         case -KEY_eq:
6097                         case -KEY_ne:
6098                         case -KEY_gt:
6099                         case -KEY_lt:
6100                         case -KEY_ge:
6101                         case -KEY_le:
6102                         case -KEY_cmp:
6103                             break;
6104                         default:
6105                             PL_expect = XTERM;  /* e.g. print $fh length() */
6106                             break;
6107                         }
6108                     }
6109                     else {
6110                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6111                     }
6112                 }
6113                 else if (isDIGIT(*s))
6114                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6115                 else if (*s == '.' && isDIGIT(s[1]))
6116                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6117                 else if ((*s == '?' || *s == '-' || *s == '+')
6118                          && !isSPACE(s[1]) && s[1] != '=')
6119                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6120                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6121                          && s[1] != '/')
6122                     PL_expect = XTERM;          /* e.g. print $fh /.../
6123                                                    XXX except DORDOR operator
6124                                                 */
6125                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6126                          && s[2] != '=')
6127                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6128             }
6129         }
6130         PL_pending_ident = '$';
6131         TOKEN('$');
6132
6133     case '@':
6134         if (PL_expect == XOPERATOR)
6135             no_op("Array", s);
6136         PL_tokenbuf[0] = '@';
6137         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6138         if (!PL_tokenbuf[1]) {
6139             PREREF('@');
6140         }
6141         if (PL_lex_state == LEX_NORMAL)
6142             s = SKIPSPACE1(s);
6143         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6144             if (*s == '{')
6145                 PL_tokenbuf[0] = '%';
6146
6147             /* Warn about @ where they meant $. */
6148             if (*s == '[' || *s == '{') {
6149                 if (ckWARN(WARN_SYNTAX)) {
6150                     const char *t = s + 1;
6151                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6152                         t++;
6153                     if (*t == '}' || *t == ']') {
6154                         t++;
6155                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6156                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6157                             "Scalar value %.*s better written as $%.*s",
6158                             (int)(t-PL_bufptr), PL_bufptr,
6159                             (int)(t-PL_bufptr-1), PL_bufptr+1);
6160                     }
6161                 }
6162             }
6163         }
6164         PL_pending_ident = '@';
6165         TERM('@');
6166
6167      case '/':                  /* may be division, defined-or, or pattern */
6168         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6169             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6170                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6171                 TOKEN(0);
6172             s += 2;
6173             AOPERATOR(DORDOR);
6174         }
6175      case '?':                  /* may either be conditional or pattern */
6176         if (PL_expect == XOPERATOR) {
6177              char tmp = *s++;
6178              if(tmp == '?') {
6179                 if (!PL_lex_allbrackets &&
6180                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6181                     s--;
6182                     TOKEN(0);
6183                 }
6184                 PL_lex_allbrackets++;
6185                 OPERATOR('?');
6186              }
6187              else {
6188                  tmp = *s++;
6189                  if(tmp == '/') {
6190                      /* A // operator. */
6191                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6192                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6193                                             LEX_FAKEEOF_LOGIC)) {
6194                         s -= 2;
6195                         TOKEN(0);
6196                     }
6197                     AOPERATOR(DORDOR);
6198                  }
6199                  else {
6200                      s--;
6201                      if (*s == '=' && !PL_lex_allbrackets &&
6202                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6203                          s--;
6204                          TOKEN(0);
6205                      }
6206                      Mop(OP_DIVIDE);
6207                  }
6208              }
6209          }
6210          else {
6211              /* Disable warning on "study /blah/" */
6212              if (PL_oldoldbufptr == PL_last_uni
6213               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6214                   || memNE(PL_last_uni, "study", 5)
6215                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6216               ))
6217                  check_uni();
6218              if (*s == '?')
6219                  deprecate("?PATTERN? without explicit operator");
6220              s = scan_pat(s,OP_MATCH);
6221              TERM(sublex_start());
6222          }
6223
6224     case '.':
6225         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6226 #ifdef PERL_STRICT_CR
6227             && s[1] == '\n'
6228 #else
6229             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6230 #endif
6231             && (s == PL_linestart || s[-1] == '\n') )
6232         {
6233             PL_lex_formbrack = 0;
6234             PL_expect = XSTATE;
6235             goto rightbracket;
6236         }
6237         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6238             s += 3;
6239             OPERATOR(YADAYADA);
6240         }
6241         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6242             char tmp = *s++;
6243             if (*s == tmp) {
6244                 if (!PL_lex_allbrackets &&
6245                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6246                     s--;
6247                     TOKEN(0);
6248                 }
6249                 s++;
6250                 if (*s == tmp) {
6251                     s++;
6252                     pl_yylval.ival = OPf_SPECIAL;
6253                 }
6254                 else
6255                     pl_yylval.ival = 0;
6256                 OPERATOR(DOTDOT);
6257             }
6258             if (*s == '=' && !PL_lex_allbrackets &&
6259                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6260                 s--;
6261                 TOKEN(0);
6262             }
6263             Aop(OP_CONCAT);
6264         }
6265         /* FALL THROUGH */
6266     case '0': case '1': case '2': case '3': case '4':
6267     case '5': case '6': case '7': case '8': case '9':
6268         s = scan_num(s, &pl_yylval);
6269         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6270         if (PL_expect == XOPERATOR)
6271             no_op("Number",s);
6272         TERM(THING);
6273
6274     case '\'':
6275         s = scan_str(s,!!PL_madskills,FALSE);
6276         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6277         if (PL_expect == XOPERATOR) {
6278             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6279                 return deprecate_commaless_var_list();
6280             }
6281             else
6282                 no_op("String",s);
6283         }
6284         if (!s)
6285             missingterm(NULL);
6286         pl_yylval.ival = OP_CONST;
6287         TERM(sublex_start());
6288
6289     case '"':
6290         s = scan_str(s,!!PL_madskills,FALSE);
6291         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6292         if (PL_expect == XOPERATOR) {
6293             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6294                 return deprecate_commaless_var_list();
6295             }
6296             else
6297                 no_op("String",s);
6298         }
6299         if (!s)
6300             missingterm(NULL);
6301         pl_yylval.ival = OP_CONST;
6302         /* FIXME. I think that this can be const if char *d is replaced by
6303            more localised variables.  */
6304         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6305             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6306                 pl_yylval.ival = OP_STRINGIFY;
6307                 break;
6308             }
6309         }
6310         TERM(sublex_start());
6311
6312     case '`':
6313         s = scan_str(s,!!PL_madskills,FALSE);
6314         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6315         if (PL_expect == XOPERATOR)
6316             no_op("Backticks",s);
6317         if (!s)
6318             missingterm(NULL);
6319         readpipe_override();
6320         TERM(sublex_start());
6321
6322     case '\\':
6323         s++;
6324         if (PL_lex_inwhat && isDIGIT(*s))
6325             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6326                            *s, *s);
6327         if (PL_expect == XOPERATOR)
6328             no_op("Backslash",s);
6329         OPERATOR(REFGEN);
6330
6331     case 'v':
6332         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6333             char *start = s + 2;
6334             while (isDIGIT(*start) || *start == '_')
6335                 start++;
6336             if (*start == '.' && isDIGIT(start[1])) {
6337                 s = scan_num(s, &pl_yylval);
6338                 TERM(THING);
6339             }
6340             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6341             else if (!isALPHA(*start) && (PL_expect == XTERM
6342                         || PL_expect == XREF || PL_expect == XSTATE
6343                         || PL_expect == XTERMORDORDOR)) {
6344                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6345                 if (!gv) {
6346                     s = scan_num(s, &pl_yylval);
6347                     TERM(THING);
6348                 }
6349             }
6350         }
6351         goto keylookup;
6352     case 'x':
6353         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6354             s++;
6355             Mop(OP_REPEAT);
6356         }
6357         goto keylookup;
6358
6359     case '_':
6360     case 'a': case 'A':
6361     case 'b': case 'B':
6362     case 'c': case 'C':
6363     case 'd': case 'D':
6364     case 'e': case 'E':
6365     case 'f': case 'F':
6366     case 'g': case 'G':
6367     case 'h': case 'H':
6368     case 'i': case 'I':
6369     case 'j': case 'J':
6370     case 'k': case 'K':
6371     case 'l': case 'L':
6372     case 'm': case 'M':
6373     case 'n': case 'N':
6374     case 'o': case 'O':
6375     case 'p': case 'P':
6376     case 'q': case 'Q':
6377     case 'r': case 'R':
6378     case 's': case 'S':
6379     case 't': case 'T':
6380     case 'u': case 'U':
6381               case 'V':
6382     case 'w': case 'W':
6383               case 'X':
6384     case 'y': case 'Y':
6385     case 'z': case 'Z':
6386
6387       keylookup: {
6388         bool anydelim;
6389         I32 tmp;
6390
6391         orig_keyword = 0;
6392         gv = NULL;
6393         gvp = NULL;
6394
6395         PL_bufptr = s;
6396         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6397
6398         /* Some keywords can be followed by any delimiter, including ':' */
6399         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6400
6401         /* x::* is just a word, unless x is "CORE" */
6402         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6403             goto just_a_word;
6404
6405         d = s;
6406         while (d < PL_bufend && isSPACE(*d))
6407                 d++;    /* no comments skipped here, or s### is misparsed */
6408
6409         /* Is this a word before a => operator? */
6410         if (*d == '=' && d[1] == '>') {
6411             CLINE;
6412             pl_yylval.opval
6413                 = (OP*)newSVOP(OP_CONST, 0,
6414                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6415             pl_yylval.opval->op_private = OPpCONST_BARE;
6416             TERM(WORD);
6417         }
6418
6419         /* Check for plugged-in keyword */
6420         {
6421             OP *o;
6422             int result;
6423             char *saved_bufptr = PL_bufptr;
6424             PL_bufptr = s;
6425             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6426             s = PL_bufptr;
6427             if (result == KEYWORD_PLUGIN_DECLINE) {
6428                 /* not a plugged-in keyword */
6429                 PL_bufptr = saved_bufptr;
6430             } else if (result == KEYWORD_PLUGIN_STMT) {
6431                 pl_yylval.opval = o;
6432                 CLINE;
6433                 PL_expect = XSTATE;
6434                 return REPORT(PLUGSTMT);
6435             } else if (result == KEYWORD_PLUGIN_EXPR) {
6436                 pl_yylval.opval = o;
6437                 CLINE;
6438                 PL_expect = XOPERATOR;
6439                 return REPORT(PLUGEXPR);
6440             } else {
6441                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6442                                         PL_tokenbuf);
6443             }
6444         }
6445
6446         /* Check for built-in keyword */
6447         tmp = keyword(PL_tokenbuf, len, 0);
6448
6449         /* Is this a label? */
6450         if (!anydelim && PL_expect == XSTATE
6451               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6452             s = d + 1;
6453             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6454             CLINE;
6455             TOKEN(LABEL);
6456         }
6457
6458         if (tmp < 0) {                  /* second-class keyword? */
6459             GV *ogv = NULL;     /* override (winner) */
6460             GV *hgv = NULL;     /* hidden (loser) */
6461             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6462                 CV *cv;
6463                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6464                     (cv = GvCVu(gv)))
6465                 {
6466                     if (GvIMPORTED_CV(gv))
6467                         ogv = gv;
6468                     else if (! CvMETHOD(cv))
6469                         hgv = gv;
6470                 }
6471                 if (!ogv &&
6472                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6473                     (gv = *gvp) && isGV_with_GP(gv) &&
6474                     GvCVu(gv) && GvIMPORTED_CV(gv))
6475                 {
6476                     ogv = gv;
6477                 }
6478             }
6479             if (ogv) {
6480                 orig_keyword = tmp;
6481                 tmp = 0;                /* overridden by import or by GLOBAL */
6482             }
6483             else if (gv && !gvp
6484                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6485                      && GvCVu(gv))
6486             {
6487                 tmp = 0;                /* any sub overrides "weak" keyword */
6488             }
6489             else {                      /* no override */
6490                 tmp = -tmp;
6491                 if (tmp == KEY_dump) {
6492                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6493                                    "dump() better written as CORE::dump()");
6494                 }
6495                 gv = NULL;
6496                 gvp = 0;
6497                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6498                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6499                                    "Ambiguous call resolved as CORE::%s(), "
6500                                    "qualify as such or use &",
6501                                    GvENAME(hgv));
6502             }
6503         }
6504
6505       reserved_word:
6506         switch (tmp) {
6507
6508         default:                        /* not a keyword */
6509             /* Trade off - by using this evil construction we can pull the
6510                variable gv into the block labelled keylookup. If not, then
6511                we have to give it function scope so that the goto from the
6512                earlier ':' case doesn't bypass the initialisation.  */
6513             if (0) {
6514             just_a_word_zero_gv:
6515                 gv = NULL;
6516                 gvp = NULL;
6517                 orig_keyword = 0;
6518             }
6519           just_a_word: {
6520                 SV *sv;
6521                 int pkgname = 0;
6522                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6523                 OP *rv2cv_op;
6524                 CV *cv;
6525 #ifdef PERL_MAD
6526                 SV *nextPL_nextwhite = 0;
6527 #endif
6528
6529
6530                 /* Get the rest if it looks like a package qualifier */
6531
6532                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6533                     STRLEN morelen;
6534                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6535                                   TRUE, &morelen);
6536                     if (!morelen)
6537                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6538                                 *s == '\'' ? "'" : "::");
6539                     len += morelen;
6540                     pkgname = 1;
6541                 }
6542
6543                 if (PL_expect == XOPERATOR) {
6544                     if (PL_bufptr == PL_linestart) {
6545                         CopLINE_dec(PL_curcop);
6546                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6547                         CopLINE_inc(PL_curcop);
6548                     }
6549                     else
6550                         no_op("Bareword",s);
6551                 }
6552
6553                 /* Look for a subroutine with this name in current package,
6554                    unless name is "Foo::", in which case Foo is a bareword
6555                    (and a package name). */
6556
6557                 if (len > 2 && !PL_madskills &&
6558                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6559                 {
6560                     if (ckWARN(WARN_BAREWORD)
6561                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6562                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6563                             "Bareword \"%s\" refers to nonexistent package",
6564                              PL_tokenbuf);
6565                     len -= 2;
6566                     PL_tokenbuf[len] = '\0';
6567                     gv = NULL;
6568                     gvp = 0;
6569                 }
6570                 else {
6571                     if (!gv) {
6572                         /* Mustn't actually add anything to a symbol table.
6573                            But also don't want to "initialise" any placeholder
6574                            constants that might already be there into full
6575                            blown PVGVs with attached PVCV.  */
6576                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6577                                                GV_NOADD_NOINIT, SVt_PVCV);
6578                     }
6579                     len = 0;
6580                 }
6581
6582                 /* if we saw a global override before, get the right name */
6583
6584                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6585                     len ? len : strlen(PL_tokenbuf));
6586                 if (gvp) {
6587                     SV * const tmp_sv = sv;
6588                     sv = newSVpvs("CORE::GLOBAL::");
6589                     sv_catsv(sv, tmp_sv);
6590                     SvREFCNT_dec(tmp_sv);
6591                 }
6592
6593 #ifdef PERL_MAD
6594                 if (PL_madskills && !PL_thistoken) {
6595                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6596                     PL_thistoken = newSVpvn(start,s - start);
6597                     PL_realtokenstart = s - SvPVX(PL_linestr);
6598                 }
6599 #endif
6600
6601                 /* Presume this is going to be a bareword of some sort. */
6602                 CLINE;
6603                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6604                 pl_yylval.opval->op_private = OPpCONST_BARE;
6605
6606                 /* And if "Foo::", then that's what it certainly is. */
6607                 if (len)
6608                     goto safe_bareword;
6609
6610                 {
6611                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6612                     const_op->op_private = OPpCONST_BARE;
6613                     rv2cv_op = newCVREF(0, const_op);
6614                 }
6615                 cv = rv2cv_op_cv(rv2cv_op, 0);
6616
6617                 /* See if it's the indirect object for a list operator. */
6618
6619                 if (PL_oldoldbufptr &&
6620                     PL_oldoldbufptr < PL_bufptr &&
6621                     (PL_oldoldbufptr == PL_last_lop
6622                      || PL_oldoldbufptr == PL_last_uni) &&
6623                     /* NO SKIPSPACE BEFORE HERE! */
6624                     (PL_expect == XREF ||
6625                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6626                 {
6627                     bool immediate_paren = *s == '(';
6628
6629                     /* (Now we can afford to cross potential line boundary.) */
6630                     s = SKIPSPACE2(s,nextPL_nextwhite);
6631 #ifdef PERL_MAD
6632                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6633 #endif
6634
6635                     /* Two barewords in a row may indicate method call. */
6636
6637                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6638                         (tmp = intuit_method(s, gv, cv))) {
6639                         op_free(rv2cv_op);
6640                         if (tmp == METHOD && !PL_lex_allbrackets &&
6641                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6642                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6643                         return REPORT(tmp);
6644                     }
6645
6646                     /* If not a declared subroutine, it's an indirect object. */
6647                     /* (But it's an indir obj regardless for sort.) */
6648                     /* Also, if "_" follows a filetest operator, it's a bareword */
6649
6650                     if (
6651                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6652                          (!cv &&
6653                         (PL_last_lop_op != OP_MAPSTART &&
6654                          PL_last_lop_op != OP_GREPSTART))))
6655                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6656                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6657                        )
6658                     {
6659                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6660                         goto bareword;
6661                     }
6662                 }
6663
6664                 PL_expect = XOPERATOR;
6665 #ifdef PERL_MAD
6666                 if (isSPACE(*s))
6667                     s = SKIPSPACE2(s,nextPL_nextwhite);
6668                 PL_nextwhite = nextPL_nextwhite;
6669 #else
6670                 s = skipspace(s);
6671 #endif
6672
6673                 /* Is this a word before a => operator? */
6674                 if (*s == '=' && s[1] == '>' && !pkgname) {
6675                     op_free(rv2cv_op);
6676                     CLINE;
6677                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6678                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6679                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6680                     TERM(WORD);
6681                 }
6682
6683                 /* If followed by a paren, it's certainly a subroutine. */
6684                 if (*s == '(') {
6685                     CLINE;
6686                     if (cv) {
6687                         d = s + 1;
6688                         while (SPACE_OR_TAB(*d))
6689                             d++;
6690                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6691                             s = d + 1;
6692                             goto its_constant;
6693                         }
6694                     }
6695 #ifdef PERL_MAD
6696                     if (PL_madskills) {
6697                         PL_nextwhite = PL_thiswhite;
6698                         PL_thiswhite = 0;
6699                     }
6700                     start_force(PL_curforce);
6701 #endif
6702                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6703                     PL_expect = XOPERATOR;
6704 #ifdef PERL_MAD
6705                     if (PL_madskills) {
6706                         PL_nextwhite = nextPL_nextwhite;
6707                         curmad('X', PL_thistoken);
6708                         PL_thistoken = newSVpvs("");
6709                     }
6710 #endif
6711                     op_free(rv2cv_op);
6712                     force_next(WORD);
6713                     pl_yylval.ival = 0;
6714                     TOKEN('&');
6715                 }
6716
6717                 /* If followed by var or block, call it a method (unless sub) */
6718
6719                 if ((*s == '$' || *s == '{') && !cv) {
6720                     op_free(rv2cv_op);
6721                     PL_last_lop = PL_oldbufptr;
6722                     PL_last_lop_op = OP_METHOD;
6723                     if (!PL_lex_allbrackets &&
6724                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6725                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6726                     PREBLOCK(METHOD);
6727                 }
6728
6729                 /* If followed by a bareword, see if it looks like indir obj. */
6730
6731                 if (!orig_keyword
6732                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6733                         && (tmp = intuit_method(s, gv, cv))) {
6734                     op_free(rv2cv_op);
6735                     if (tmp == METHOD && !PL_lex_allbrackets &&
6736                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6737                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6738                     return REPORT(tmp);
6739                 }
6740
6741                 /* Not a method, so call it a subroutine (if defined) */
6742
6743                 if (cv) {
6744                     if (lastchar == '-')
6745                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6746                                          "Ambiguous use of -%s resolved as -&%s()",
6747                                          PL_tokenbuf, PL_tokenbuf);
6748                     /* Check for a constant sub */
6749                     if ((sv = cv_const_sv(cv))) {
6750                   its_constant:
6751                         op_free(rv2cv_op);
6752                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6753                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6754                         pl_yylval.opval->op_private = 0;
6755                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
6756                         TOKEN(WORD);
6757                     }
6758
6759                     op_free(pl_yylval.opval);
6760                     pl_yylval.opval = rv2cv_op;
6761                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6762                     PL_last_lop = PL_oldbufptr;
6763                     PL_last_lop_op = OP_ENTERSUB;
6764                     /* Is there a prototype? */
6765                     if (
6766 #ifdef PERL_MAD
6767                         cv &&
6768 #endif
6769                         SvPOK(cv))
6770                     {
6771                         STRLEN protolen;
6772                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6773                         if (!protolen)
6774                             TERM(FUNC0SUB);
6775                         while (*proto == ';')
6776                             proto++;
6777                         if (
6778                             (
6779                                 (
6780                                     *proto == '$' || *proto == '_'
6781                                  || *proto == '*' || *proto == '+'
6782                                 )
6783                              && proto[1] == '\0'
6784                             )
6785                          || (
6786                              *proto == '\\' && proto[1] && proto[2] == '\0'
6787                             )
6788                         )
6789                             OPERATOR(UNIOPSUB);
6790                         if (*proto == '\\' && proto[1] == '[') {
6791                             const char *p = proto + 2;
6792                             while(*p && *p != ']')
6793                                 ++p;
6794                             if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6795                         }
6796                         if (*proto == '&' && *s == '{') {
6797                             if (PL_curstash)
6798                                 sv_setpvs(PL_subname, "__ANON__");
6799                             else
6800                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6801                             if (!PL_lex_allbrackets &&
6802                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6803                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6804                             PREBLOCK(LSTOPSUB);
6805                         }
6806                     }
6807 #ifdef PERL_MAD
6808                     {
6809                         if (PL_madskills) {
6810                             PL_nextwhite = PL_thiswhite;
6811                             PL_thiswhite = 0;
6812                         }
6813                         start_force(PL_curforce);
6814                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6815                         PL_expect = XTERM;
6816                         if (PL_madskills) {
6817                             PL_nextwhite = nextPL_nextwhite;
6818                             curmad('X', PL_thistoken);
6819                             PL_thistoken = newSVpvs("");
6820                         }
6821                         force_next(WORD);
6822                         if (!PL_lex_allbrackets &&
6823                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6824                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6825                         TOKEN(NOAMP);
6826                     }
6827                 }
6828
6829                 /* Guess harder when madskills require "best effort". */
6830                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6831                     int probable_sub = 0;
6832                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6833                         probable_sub = 1;
6834                     else if (isALPHA(*s)) {
6835                         char tmpbuf[1024];
6836                         STRLEN tmplen;
6837                         d = s;
6838                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6839                         if (!keyword(tmpbuf, tmplen, 0))
6840                             probable_sub = 1;
6841                         else {
6842                             while (d < PL_bufend && isSPACE(*d))
6843                                 d++;
6844                             if (*d == '=' && d[1] == '>')
6845                                 probable_sub = 1;
6846                         }
6847                     }
6848                     if (probable_sub) {
6849                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6850                         op_free(pl_yylval.opval);
6851                         pl_yylval.opval = rv2cv_op;
6852                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6853                         PL_last_lop = PL_oldbufptr;
6854                         PL_last_lop_op = OP_ENTERSUB;
6855                         PL_nextwhite = PL_thiswhite;
6856                         PL_thiswhite = 0;
6857                         start_force(PL_curforce);
6858                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6859                         PL_expect = XTERM;
6860                         PL_nextwhite = nextPL_nextwhite;
6861                         curmad('X', PL_thistoken);
6862                         PL_thistoken = newSVpvs("");
6863                         force_next(WORD);
6864                         if (!PL_lex_allbrackets &&
6865                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6866                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6867                         TOKEN(NOAMP);
6868                     }
6869 #else
6870                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6871                     PL_expect = XTERM;
6872                     force_next(WORD);
6873                     if (!PL_lex_allbrackets &&
6874                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6875                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6876                     TOKEN(NOAMP);
6877 #endif
6878                 }
6879
6880                 /* Call it a bare word */
6881
6882                 if (PL_hints & HINT_STRICT_SUBS)
6883                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6884                 else {
6885                 bareword:
6886                     /* after "print" and similar functions (corresponding to
6887                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6888                      * a filehandle should be subject to "strict subs".
6889                      * Likewise for the optional indirect-object argument to system
6890                      * or exec, which can't be a bareword */
6891                     if ((PL_last_lop_op == OP_PRINT
6892                             || PL_last_lop_op == OP_PRTF
6893                             || PL_last_lop_op == OP_SAY
6894                             || PL_last_lop_op == OP_SYSTEM
6895                             || PL_last_lop_op == OP_EXEC)
6896                             && (PL_hints & HINT_STRICT_SUBS))
6897                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6898                     if (lastchar != '-') {
6899                         if (ckWARN(WARN_RESERVED)) {
6900                             d = PL_tokenbuf;
6901                             while (isLOWER(*d))
6902                                 d++;
6903                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6904                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6905                                        PL_tokenbuf);
6906                         }
6907                     }
6908                 }
6909                 op_free(rv2cv_op);
6910
6911             safe_bareword:
6912                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6913                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6914                                      "Operator or semicolon missing before %c%s",
6915                                      lastchar, PL_tokenbuf);
6916                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6917                                      "Ambiguous use of %c resolved as operator %c",
6918                                      lastchar, lastchar);
6919                 }
6920                 TOKEN(WORD);
6921             }
6922
6923         case KEY___FILE__:
6924             FUN0OP(
6925                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
6926             );
6927
6928         case KEY___LINE__:
6929             FUN0OP(
6930                 (OP*)newSVOP(OP_CONST, 0,
6931                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6932             );
6933
6934         case KEY___PACKAGE__:
6935             FUN0OP(
6936                 (OP*)newSVOP(OP_CONST, 0,
6937                                         (PL_curstash
6938                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6939                                          : &PL_sv_undef))
6940             );
6941
6942         case KEY___DATA__:
6943         case KEY___END__: {
6944             GV *gv;
6945             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6946                 const char *pname = "main";
6947                 if (PL_tokenbuf[2] == 'D')
6948                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6949                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6950                                 SVt_PVIO);
6951                 GvMULTI_on(gv);
6952                 if (!GvIO(gv))
6953                     GvIOp(gv) = newIO();
6954                 IoIFP(GvIOp(gv)) = PL_rsfp;
6955 #if defined(HAS_FCNTL) && defined(F_SETFD)
6956                 {
6957                     const int fd = PerlIO_fileno(PL_rsfp);
6958                     fcntl(fd,F_SETFD,fd >= 3);
6959                 }
6960 #endif
6961                 /* Mark this internal pseudo-handle as clean */
6962                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6963                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6964                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6965                 else
6966                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6967 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6968                 /* if the script was opened in binmode, we need to revert
6969                  * it to text mode for compatibility; but only iff it has CRs
6970                  * XXX this is a questionable hack at best. */
6971                 if (PL_bufend-PL_bufptr > 2
6972                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6973                 {
6974                     Off_t loc = 0;
6975                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6976                         loc = PerlIO_tell(PL_rsfp);
6977                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6978                     }
6979 #ifdef NETWARE
6980                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6981 #else
6982                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6983 #endif  /* NETWARE */
6984                         if (loc > 0)
6985                             PerlIO_seek(PL_rsfp, loc, 0);
6986                     }
6987                 }
6988 #endif
6989 #ifdef PERLIO_LAYERS
6990                 if (!IN_BYTES) {
6991                     if (UTF)
6992                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6993                     else if (PL_encoding) {
6994                         SV *name;
6995                         dSP;
6996                         ENTER;
6997                         SAVETMPS;
6998                         PUSHMARK(sp);
6999                         EXTEND(SP, 1);
7000                         XPUSHs(PL_encoding);
7001                         PUTBACK;
7002                         call_method("name", G_SCALAR);
7003                         SPAGAIN;
7004                         name = POPs;
7005                         PUTBACK;
7006                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7007                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7008                                                       SVfARG(name)));
7009                         FREETMPS;
7010                         LEAVE;
7011                     }
7012                 }
7013 #endif
7014 #ifdef PERL_MAD
7015                 if (PL_madskills) {
7016                     if (PL_realtokenstart >= 0) {
7017                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7018                         if (!PL_endwhite)
7019                             PL_endwhite = newSVpvs("");
7020                         sv_catsv(PL_endwhite, PL_thiswhite);
7021                         PL_thiswhite = 0;
7022                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7023                         PL_realtokenstart = -1;
7024                     }
7025                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7026                            != NULL) ;
7027                 }
7028 #endif
7029                 PL_rsfp = NULL;
7030             }
7031             goto fake_eof;
7032         }
7033
7034         case KEY_AUTOLOAD:
7035         case KEY_DESTROY:
7036         case KEY_BEGIN:
7037         case KEY_UNITCHECK:
7038         case KEY_CHECK:
7039         case KEY_INIT:
7040         case KEY_END:
7041             if (PL_expect == XSTATE) {
7042                 s = PL_bufptr;
7043                 goto really_sub;
7044             }
7045             goto just_a_word;
7046
7047         case KEY_CORE:
7048             if (*s == ':' && s[1] == ':') {
7049                 s += 2;
7050                 d = s;
7051                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7052                 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
7053                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7054                 if (tmp < 0)
7055                     tmp = -tmp;
7056                 else if (tmp == KEY_require || tmp == KEY_do)
7057                     /* that's a way to remember we saw "CORE::" */
7058                     orig_keyword = tmp;
7059                 goto reserved_word;
7060             }
7061             goto just_a_word;
7062
7063         case KEY_abs:
7064             UNI(OP_ABS);
7065
7066         case KEY_alarm:
7067             UNI(OP_ALARM);
7068
7069         case KEY_accept:
7070             LOP(OP_ACCEPT,XTERM);
7071
7072         case KEY_and:
7073             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7074                 return REPORT(0);
7075             OPERATOR(ANDOP);
7076
7077         case KEY_atan2:
7078             LOP(OP_ATAN2,XTERM);
7079
7080         case KEY_bind:
7081             LOP(OP_BIND,XTERM);
7082
7083         case KEY_binmode:
7084             LOP(OP_BINMODE,XTERM);
7085
7086         case KEY_bless:
7087             LOP(OP_BLESS,XTERM);
7088
7089         case KEY_break:
7090             FUN0(OP_BREAK);
7091
7092         case KEY_chop:
7093             UNI(OP_CHOP);
7094
7095         case KEY_continue:
7096                     /* We have to disambiguate the two senses of
7097                       "continue". If the next token is a '{' then
7098                       treat it as the start of a continue block;
7099                       otherwise treat it as a control operator.
7100                      */
7101                     s = skipspace(s);
7102                     if (*s == '{')
7103             PREBLOCK(CONTINUE);
7104                     else
7105                         FUN0(OP_CONTINUE);
7106
7107         case KEY_chdir:
7108             /* may use HOME */
7109             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7110             UNI(OP_CHDIR);
7111
7112         case KEY_close:
7113             UNI(OP_CLOSE);
7114
7115         case KEY_closedir:
7116             UNI(OP_CLOSEDIR);
7117
7118         case KEY_cmp:
7119             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7120                 return REPORT(0);
7121             Eop(OP_SCMP);
7122
7123         case KEY_caller:
7124             UNI(OP_CALLER);
7125
7126         case KEY_crypt:
7127 #ifdef FCRYPT
7128             if (!PL_cryptseen) {
7129                 PL_cryptseen = TRUE;
7130                 init_des();
7131             }
7132 #endif
7133             LOP(OP_CRYPT,XTERM);
7134
7135         case KEY_chmod:
7136             LOP(OP_CHMOD,XTERM);
7137
7138         case KEY_chown:
7139             LOP(OP_CHOWN,XTERM);
7140
7141         case KEY_connect:
7142             LOP(OP_CONNECT,XTERM);
7143
7144         case KEY_chr:
7145             UNI(OP_CHR);
7146
7147         case KEY_cos:
7148             UNI(OP_COS);
7149
7150         case KEY_chroot:
7151             UNI(OP_CHROOT);
7152
7153         case KEY_default:
7154             PREBLOCK(DEFAULT);
7155
7156         case KEY_do:
7157             s = SKIPSPACE1(s);
7158             if (*s == '{')
7159                 PRETERMBLOCK(DO);
7160             if (*s != '\'')
7161                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7162             if (orig_keyword == KEY_do) {
7163                 orig_keyword = 0;
7164                 pl_yylval.ival = 1;
7165             }
7166             else
7167                 pl_yylval.ival = 0;
7168             OPERATOR(DO);
7169
7170         case KEY_die:
7171             PL_hints |= HINT_BLOCK_SCOPE;
7172             LOP(OP_DIE,XTERM);
7173
7174         case KEY_defined:
7175             UNI(OP_DEFINED);
7176
7177         case KEY_delete:
7178             UNI(OP_DELETE);
7179
7180         case KEY_dbmopen:
7181             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7182                               STR_WITH_LEN("NDBM_File::"),
7183                               STR_WITH_LEN("DB_File::"),
7184                               STR_WITH_LEN("GDBM_File::"),
7185                               STR_WITH_LEN("SDBM_File::"),
7186                               STR_WITH_LEN("ODBM_File::"),
7187                               NULL);
7188             LOP(OP_DBMOPEN,XTERM);
7189
7190         case KEY_dbmclose:
7191             UNI(OP_DBMCLOSE);
7192
7193         case KEY_dump:
7194             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7195             LOOPX(OP_DUMP);
7196
7197         case KEY_else:
7198             PREBLOCK(ELSE);
7199
7200         case KEY_elsif:
7201             pl_yylval.ival = CopLINE(PL_curcop);
7202             OPERATOR(ELSIF);
7203
7204         case KEY_eq:
7205             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7206                 return REPORT(0);
7207             Eop(OP_SEQ);
7208
7209         case KEY_exists:
7210             UNI(OP_EXISTS);
7211
7212         case KEY_exit:
7213             if (PL_madskills)
7214                 UNI(OP_INT);
7215             UNI(OP_EXIT);
7216
7217         case KEY_eval:
7218             s = SKIPSPACE1(s);
7219             if (*s == '{') { /* block eval */
7220                 PL_expect = XTERMBLOCK;
7221                 UNIBRACK(OP_ENTERTRY);
7222             }
7223             else { /* string eval */
7224                 PL_expect = XTERM;
7225                 UNIBRACK(OP_ENTEREVAL);
7226             }
7227
7228         case KEY_eof:
7229             UNI(OP_EOF);
7230
7231         case KEY_exp:
7232             UNI(OP_EXP);
7233
7234         case KEY_each:
7235             UNI(OP_EACH);
7236
7237         case KEY_exec:
7238             LOP(OP_EXEC,XREF);
7239
7240         case KEY_endhostent:
7241             FUN0(OP_EHOSTENT);
7242
7243         case KEY_endnetent:
7244             FUN0(OP_ENETENT);
7245
7246         case KEY_endservent:
7247             FUN0(OP_ESERVENT);
7248
7249         case KEY_endprotoent:
7250             FUN0(OP_EPROTOENT);
7251
7252         case KEY_endpwent:
7253             FUN0(OP_EPWENT);
7254
7255         case KEY_endgrent:
7256             FUN0(OP_EGRENT);
7257
7258         case KEY_for:
7259         case KEY_foreach:
7260             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7261                 return REPORT(0);
7262             pl_yylval.ival = CopLINE(PL_curcop);
7263             s = SKIPSPACE1(s);
7264             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7265                 char *p = s;
7266 #ifdef PERL_MAD
7267                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7268 #endif
7269
7270                 if ((PL_bufend - p) >= 3 &&
7271                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7272                     p += 2;
7273                 else if ((PL_bufend - p) >= 4 &&
7274                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7275                     p += 3;
7276                 p = PEEKSPACE(p);
7277                 if (isIDFIRST_lazy_if(p,UTF)) {
7278                     p = scan_ident(p, PL_bufend,
7279                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7280                     p = PEEKSPACE(p);
7281                 }
7282                 if (*p != '$')
7283                     Perl_croak(aTHX_ "Missing $ on loop variable");
7284 #ifdef PERL_MAD
7285                 s = SvPVX(PL_linestr) + soff;
7286 #endif
7287             }
7288             OPERATOR(FOR);
7289
7290         case KEY_formline:
7291             LOP(OP_FORMLINE,XTERM);
7292
7293         case KEY_fork:
7294             FUN0(OP_FORK);
7295
7296         case KEY_fcntl:
7297             LOP(OP_FCNTL,XTERM);
7298
7299         case KEY_fileno:
7300             UNI(OP_FILENO);
7301
7302         case KEY_flock:
7303             LOP(OP_FLOCK,XTERM);
7304
7305         case KEY_gt:
7306             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7307                 return REPORT(0);
7308             Rop(OP_SGT);
7309
7310         case KEY_ge:
7311             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7312                 return REPORT(0);
7313             Rop(OP_SGE);
7314
7315         case KEY_grep:
7316             LOP(OP_GREPSTART, XREF);
7317
7318         case KEY_goto:
7319             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7320             LOOPX(OP_GOTO);
7321
7322         case KEY_gmtime:
7323             UNI(OP_GMTIME);
7324
7325         case KEY_getc:
7326             UNIDOR(OP_GETC);
7327
7328         case KEY_getppid:
7329             FUN0(OP_GETPPID);
7330
7331         case KEY_getpgrp:
7332             UNI(OP_GETPGRP);
7333
7334         case KEY_getpriority:
7335             LOP(OP_GETPRIORITY,XTERM);
7336
7337         case KEY_getprotobyname:
7338             UNI(OP_GPBYNAME);
7339
7340         case KEY_getprotobynumber:
7341             LOP(OP_GPBYNUMBER,XTERM);
7342
7343         case KEY_getprotoent:
7344             FUN0(OP_GPROTOENT);
7345
7346         case KEY_getpwent:
7347             FUN0(OP_GPWENT);
7348
7349         case KEY_getpwnam:
7350             UNI(OP_GPWNAM);
7351
7352         case KEY_getpwuid:
7353             UNI(OP_GPWUID);
7354
7355         case KEY_getpeername:
7356             UNI(OP_GETPEERNAME);
7357
7358         case KEY_gethostbyname:
7359             UNI(OP_GHBYNAME);
7360
7361         case KEY_gethostbyaddr:
7362             LOP(OP_GHBYADDR,XTERM);
7363
7364         case KEY_gethostent:
7365             FUN0(OP_GHOSTENT);
7366
7367         case KEY_getnetbyname:
7368             UNI(OP_GNBYNAME);
7369
7370         case KEY_getnetbyaddr:
7371             LOP(OP_GNBYADDR,XTERM);
7372
7373         case KEY_getnetent:
7374             FUN0(OP_GNETENT);
7375
7376         case KEY_getservbyname:
7377             LOP(OP_GSBYNAME,XTERM);
7378
7379         case KEY_getservbyport:
7380             LOP(OP_GSBYPORT,XTERM);
7381
7382         case KEY_getservent:
7383             FUN0(OP_GSERVENT);
7384
7385         case KEY_getsockname:
7386             UNI(OP_GETSOCKNAME);
7387
7388         case KEY_getsockopt:
7389             LOP(OP_GSOCKOPT,XTERM);
7390
7391         case KEY_getgrent:
7392             FUN0(OP_GGRENT);
7393
7394         case KEY_getgrnam:
7395             UNI(OP_GGRNAM);
7396
7397         case KEY_getgrgid:
7398             UNI(OP_GGRGID);
7399
7400         case KEY_getlogin:
7401             FUN0(OP_GETLOGIN);
7402
7403         case KEY_given:
7404             pl_yylval.ival = CopLINE(PL_curcop);
7405             OPERATOR(GIVEN);
7406
7407         case KEY_glob:
7408             LOP(OP_GLOB,XTERM);
7409
7410         case KEY_hex:
7411             UNI(OP_HEX);
7412
7413         case KEY_if:
7414             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7415                 return REPORT(0);
7416             pl_yylval.ival = CopLINE(PL_curcop);
7417             OPERATOR(IF);
7418
7419         case KEY_index:
7420             LOP(OP_INDEX,XTERM);
7421
7422         case KEY_int:
7423             UNI(OP_INT);
7424
7425         case KEY_ioctl:
7426             LOP(OP_IOCTL,XTERM);
7427
7428         case KEY_join:
7429             LOP(OP_JOIN,XTERM);
7430
7431         case KEY_keys:
7432             UNI(OP_KEYS);
7433
7434         case KEY_kill:
7435             LOP(OP_KILL,XTERM);
7436
7437         case KEY_last:
7438             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7439             LOOPX(OP_LAST);
7440
7441         case KEY_lc:
7442             UNI(OP_LC);
7443
7444         case KEY_lcfirst:
7445             UNI(OP_LCFIRST);
7446
7447         case KEY_local:
7448             pl_yylval.ival = 0;
7449             OPERATOR(LOCAL);
7450
7451         case KEY_length:
7452             UNI(OP_LENGTH);
7453
7454         case KEY_lt:
7455             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7456                 return REPORT(0);
7457             Rop(OP_SLT);
7458
7459         case KEY_le:
7460             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7461                 return REPORT(0);
7462             Rop(OP_SLE);
7463
7464         case KEY_localtime:
7465             UNI(OP_LOCALTIME);
7466
7467         case KEY_log:
7468             UNI(OP_LOG);
7469
7470         case KEY_link:
7471             LOP(OP_LINK,XTERM);
7472
7473         case KEY_listen:
7474             LOP(OP_LISTEN,XTERM);
7475
7476         case KEY_lock:
7477             UNI(OP_LOCK);
7478
7479         case KEY_lstat:
7480             UNI(OP_LSTAT);
7481
7482         case KEY_m:
7483             s = scan_pat(s,OP_MATCH);
7484             TERM(sublex_start());
7485
7486         case KEY_map:
7487             LOP(OP_MAPSTART, XREF);
7488
7489         case KEY_mkdir:
7490             LOP(OP_MKDIR,XTERM);
7491
7492         case KEY_msgctl:
7493             LOP(OP_MSGCTL,XTERM);
7494
7495         case KEY_msgget:
7496             LOP(OP_MSGGET,XTERM);
7497
7498         case KEY_msgrcv:
7499             LOP(OP_MSGRCV,XTERM);
7500
7501         case KEY_msgsnd:
7502             LOP(OP_MSGSND,XTERM);
7503
7504         case KEY_our:
7505         case KEY_my:
7506         case KEY_state:
7507             PL_in_my = (U16)tmp;
7508             s = SKIPSPACE1(s);
7509             if (isIDFIRST_lazy_if(s,UTF)) {
7510 #ifdef PERL_MAD
7511                 char* start = s;
7512 #endif
7513                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7514                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7515                     goto really_sub;
7516                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7517                 if (!PL_in_my_stash) {
7518                     char tmpbuf[1024];
7519                     PL_bufptr = s;
7520                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7521                     yyerror(tmpbuf);
7522                 }
7523 #ifdef PERL_MAD
7524                 if (PL_madskills) {     /* just add type to declarator token */
7525                     sv_catsv(PL_thistoken, PL_nextwhite);
7526                     PL_nextwhite = 0;
7527                     sv_catpvn(PL_thistoken, start, s - start);
7528                 }
7529 #endif
7530             }
7531             pl_yylval.ival = 1;
7532             OPERATOR(MY);
7533
7534         case KEY_next:
7535             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7536             LOOPX(OP_NEXT);
7537
7538         case KEY_ne:
7539             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7540                 return REPORT(0);
7541             Eop(OP_SNE);
7542
7543         case KEY_no:
7544             s = tokenize_use(0, s);
7545             OPERATOR(USE);
7546
7547         case KEY_not:
7548             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7549                 FUN1(OP_NOT);
7550             else {
7551                 if (!PL_lex_allbrackets &&
7552                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7553                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7554                 OPERATOR(NOTOP);
7555             }
7556
7557         case KEY_open:
7558             s = SKIPSPACE1(s);
7559             if (isIDFIRST_lazy_if(s,UTF)) {
7560                 const char *t;
7561                 for (d = s; isALNUM_lazy_if(d,UTF);)
7562                     d++;
7563                 for (t=d; isSPACE(*t);)
7564                     t++;
7565                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7566                     /* [perl #16184] */
7567                     && !(t[0] == '=' && t[1] == '>')
7568                 ) {
7569                     int parms_len = (int)(d-s);
7570                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7571                            "Precedence problem: open %.*s should be open(%.*s)",
7572                             parms_len, s, parms_len, s);
7573                 }
7574             }
7575             LOP(OP_OPEN,XTERM);
7576
7577         case KEY_or:
7578             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7579                 return REPORT(0);
7580             pl_yylval.ival = OP_OR;
7581             OPERATOR(OROP);
7582
7583         case KEY_ord:
7584             UNI(OP_ORD);
7585
7586         case KEY_oct:
7587             UNI(OP_OCT);
7588
7589         case KEY_opendir:
7590             LOP(OP_OPEN_DIR,XTERM);
7591
7592         case KEY_print:
7593             checkcomma(s,PL_tokenbuf,"filehandle");
7594             LOP(OP_PRINT,XREF);
7595
7596         case KEY_printf:
7597             checkcomma(s,PL_tokenbuf,"filehandle");
7598             LOP(OP_PRTF,XREF);
7599
7600         case KEY_prototype:
7601             UNI(OP_PROTOTYPE);
7602
7603         case KEY_push:
7604             LOP(OP_PUSH,XTERM);
7605
7606         case KEY_pop:
7607             UNIDOR(OP_POP);
7608
7609         case KEY_pos:
7610             UNIDOR(OP_POS);
7611
7612         case KEY_pack:
7613             LOP(OP_PACK,XTERM);
7614
7615         case KEY_package:
7616             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7617             s = SKIPSPACE1(s);
7618             s = force_strict_version(s);
7619             PL_lex_expect = XBLOCK;
7620             OPERATOR(PACKAGE);
7621
7622         case KEY_pipe:
7623             LOP(OP_PIPE_OP,XTERM);
7624
7625         case KEY_q:
7626             s = scan_str(s,!!PL_madskills,FALSE);
7627             if (!s)
7628                 missingterm(NULL);
7629             pl_yylval.ival = OP_CONST;
7630             TERM(sublex_start());
7631
7632         case KEY_quotemeta:
7633             UNI(OP_QUOTEMETA);
7634
7635         case KEY_qw: {
7636             OP *words = NULL;
7637             s = scan_str(s,!!PL_madskills,FALSE);
7638             if (!s)
7639                 missingterm(NULL);
7640             PL_expect = XOPERATOR;
7641             if (SvCUR(PL_lex_stuff)) {
7642                 int warned_comma = !ckWARN(WARN_QW);
7643                 int warned_comment = warned_comma;
7644                 d = SvPV_force(PL_lex_stuff, len);
7645                 while (len) {
7646                     for (; isSPACE(*d) && len; --len, ++d)
7647                         /**/;
7648                     if (len) {
7649                         SV *sv;
7650                         const char *b = d;
7651                         if (!warned_comma || !warned_comment) {
7652                             for (; !isSPACE(*d) && len; --len, ++d) {
7653                                 if (!warned_comma && *d == ',') {
7654                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7655                                         "Possible attempt to separate words with commas");
7656                                     ++warned_comma;
7657                                 }
7658                                 else if (!warned_comment && *d == '#') {
7659                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7660                                         "Possible attempt to put comments in qw() list");
7661                                     ++warned_comment;
7662                                 }
7663                             }
7664                         }
7665                         else {
7666                             for (; !isSPACE(*d) && len; --len, ++d)
7667                                 /**/;
7668                         }
7669                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7670                         words = op_append_elem(OP_LIST, words,
7671                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7672                     }
7673                 }
7674             }
7675             if (!words)
7676                 words = newNULLLIST();
7677             if (PL_lex_stuff) {
7678                 SvREFCNT_dec(PL_lex_stuff);
7679                 PL_lex_stuff = NULL;
7680             }
7681             PL_expect = XOPERATOR;
7682             pl_yylval.opval = sawparens(words);
7683             TOKEN(QWLIST);
7684         }
7685
7686         case KEY_qq:
7687             s = scan_str(s,!!PL_madskills,FALSE);
7688             if (!s)
7689                 missingterm(NULL);
7690             pl_yylval.ival = OP_STRINGIFY;
7691             if (SvIVX(PL_lex_stuff) == '\'')
7692                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7693             TERM(sublex_start());
7694
7695         case KEY_qr:
7696             s = scan_pat(s,OP_QR);
7697             TERM(sublex_start());
7698
7699         case KEY_qx:
7700             s = scan_str(s,!!PL_madskills,FALSE);
7701             if (!s)
7702                 missingterm(NULL);
7703             readpipe_override();
7704             TERM(sublex_start());
7705
7706         case KEY_return:
7707             OLDLOP(OP_RETURN);
7708
7709         case KEY_require:
7710             s = SKIPSPACE1(s);
7711             if (isDIGIT(*s)) {
7712                 s = force_version(s, FALSE);
7713             }
7714             else if (*s != 'v' || !isDIGIT(s[1])
7715                     || (s = force_version(s, TRUE), *s == 'v'))
7716             {
7717                 *PL_tokenbuf = '\0';
7718                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7719                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7720                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7721                 else if (*s == '<')
7722                     yyerror("<> should be quotes");
7723             }
7724             if (orig_keyword == KEY_require) {
7725                 orig_keyword = 0;
7726                 pl_yylval.ival = 1;
7727             }
7728             else
7729                 pl_yylval.ival = 0;
7730             PL_expect = XTERM;
7731             PL_bufptr = s;
7732             PL_last_uni = PL_oldbufptr;
7733             PL_last_lop_op = OP_REQUIRE;
7734             s = skipspace(s);
7735             return REPORT( (int)REQUIRE );
7736
7737         case KEY_reset:
7738             UNI(OP_RESET);
7739
7740         case KEY_redo:
7741             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7742             LOOPX(OP_REDO);
7743
7744         case KEY_rename:
7745             LOP(OP_RENAME,XTERM);
7746
7747         case KEY_rand:
7748             UNI(OP_RAND);
7749
7750         case KEY_rmdir:
7751             UNI(OP_RMDIR);
7752
7753         case KEY_rindex:
7754             LOP(OP_RINDEX,XTERM);
7755
7756         case KEY_read:
7757             LOP(OP_READ,XTERM);
7758
7759         case KEY_readdir:
7760             UNI(OP_READDIR);
7761
7762         case KEY_readline:
7763             UNIDOR(OP_READLINE);
7764
7765         case KEY_readpipe:
7766             UNIDOR(OP_BACKTICK);
7767
7768         case KEY_rewinddir:
7769             UNI(OP_REWINDDIR);
7770
7771         case KEY_recv:
7772             LOP(OP_RECV,XTERM);
7773
7774         case KEY_reverse:
7775             LOP(OP_REVERSE,XTERM);
7776
7777         case KEY_readlink:
7778             UNIDOR(OP_READLINK);
7779
7780         case KEY_ref:
7781             UNI(OP_REF);
7782
7783         case KEY_s:
7784             s = scan_subst(s);
7785             if (pl_yylval.opval)
7786                 TERM(sublex_start());
7787             else
7788                 TOKEN(1);       /* force error */
7789
7790         case KEY_say:
7791             checkcomma(s,PL_tokenbuf,"filehandle");
7792             LOP(OP_SAY,XREF);
7793
7794         case KEY_chomp:
7795             UNI(OP_CHOMP);
7796
7797         case KEY_scalar:
7798             UNI(OP_SCALAR);
7799
7800         case KEY_select:
7801             LOP(OP_SELECT,XTERM);
7802
7803         case KEY_seek:
7804             LOP(OP_SEEK,XTERM);
7805
7806         case KEY_semctl:
7807             LOP(OP_SEMCTL,XTERM);
7808
7809         case KEY_semget:
7810             LOP(OP_SEMGET,XTERM);
7811
7812         case KEY_semop:
7813             LOP(OP_SEMOP,XTERM);
7814
7815         case KEY_send:
7816             LOP(OP_SEND,XTERM);
7817
7818         case KEY_setpgrp:
7819             LOP(OP_SETPGRP,XTERM);
7820
7821         case KEY_setpriority:
7822             LOP(OP_SETPRIORITY,XTERM);
7823
7824         case KEY_sethostent:
7825             UNI(OP_SHOSTENT);
7826
7827         case KEY_setnetent:
7828             UNI(OP_SNETENT);
7829
7830         case KEY_setservent:
7831             UNI(OP_SSERVENT);
7832
7833         case KEY_setprotoent:
7834             UNI(OP_SPROTOENT);
7835
7836         case KEY_setpwent:
7837             FUN0(OP_SPWENT);
7838
7839         case KEY_setgrent:
7840             FUN0(OP_SGRENT);
7841
7842         case KEY_seekdir:
7843             LOP(OP_SEEKDIR,XTERM);
7844
7845         case KEY_setsockopt:
7846             LOP(OP_SSOCKOPT,XTERM);
7847
7848         case KEY_shift:
7849             UNIDOR(OP_SHIFT);
7850
7851         case KEY_shmctl:
7852             LOP(OP_SHMCTL,XTERM);
7853
7854         case KEY_shmget:
7855             LOP(OP_SHMGET,XTERM);
7856
7857         case KEY_shmread:
7858             LOP(OP_SHMREAD,XTERM);
7859
7860         case KEY_shmwrite:
7861             LOP(OP_SHMWRITE,XTERM);
7862
7863         case KEY_shutdown:
7864             LOP(OP_SHUTDOWN,XTERM);
7865
7866         case KEY_sin:
7867             UNI(OP_SIN);
7868
7869         case KEY_sleep:
7870             UNI(OP_SLEEP);
7871
7872         case KEY_socket:
7873             LOP(OP_SOCKET,XTERM);
7874
7875         case KEY_socketpair:
7876             LOP(OP_SOCKPAIR,XTERM);
7877
7878         case KEY_sort:
7879             checkcomma(s,PL_tokenbuf,"subroutine name");
7880             s = SKIPSPACE1(s);
7881             if (*s == ';' || *s == ')')         /* probably a close */
7882                 Perl_croak(aTHX_ "sort is now a reserved word");
7883             PL_expect = XTERM;
7884             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7885             LOP(OP_SORT,XREF);
7886
7887         case KEY_split:
7888             LOP(OP_SPLIT,XTERM);
7889
7890         case KEY_sprintf:
7891             LOP(OP_SPRINTF,XTERM);
7892
7893         case KEY_splice:
7894             LOP(OP_SPLICE,XTERM);
7895
7896         case KEY_sqrt:
7897             UNI(OP_SQRT);
7898
7899         case KEY_srand:
7900             UNI(OP_SRAND);
7901
7902         case KEY_stat:
7903             UNI(OP_STAT);
7904
7905         case KEY_study:
7906             UNI(OP_STUDY);
7907
7908         case KEY_substr:
7909             LOP(OP_SUBSTR,XTERM);
7910
7911         case KEY_format:
7912         case KEY_sub:
7913           really_sub:
7914             {
7915                 char tmpbuf[sizeof PL_tokenbuf];
7916                 SSize_t tboffset = 0;
7917                 expectation attrful;
7918                 bool have_name, have_proto;
7919                 const int key = tmp;
7920
7921 #ifdef PERL_MAD
7922                 SV *tmpwhite = 0;
7923
7924                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7925                 SV *subtoken = newSVpvn(tstart, s - tstart);
7926                 PL_thistoken = 0;
7927
7928                 d = s;
7929                 s = SKIPSPACE2(s,tmpwhite);
7930 #else
7931                 s = skipspace(s);
7932 #endif
7933
7934                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7935                     (*s == ':' && s[1] == ':'))
7936                 {
7937 #ifdef PERL_MAD
7938                     SV *nametoke = NULL;
7939 #endif
7940
7941                     PL_expect = XBLOCK;
7942                     attrful = XATTRBLOCK;
7943                     /* remember buffer pos'n for later force_word */
7944                     tboffset = s - PL_oldbufptr;
7945                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7946 #ifdef PERL_MAD
7947                     if (PL_madskills)
7948                         nametoke = newSVpvn(s, d - s);
7949 #endif
7950                     if (memchr(tmpbuf, ':', len))
7951                         sv_setpvn(PL_subname, tmpbuf, len);
7952                     else {
7953                         sv_setsv(PL_subname,PL_curstname);
7954                         sv_catpvs(PL_subname,"::");
7955                         sv_catpvn(PL_subname,tmpbuf,len);
7956                     }
7957                     have_name = TRUE;
7958
7959 #ifdef PERL_MAD
7960
7961                     start_force(0);
7962                     CURMAD('X', nametoke);
7963                     CURMAD('_', tmpwhite);
7964                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7965                                       FALSE, TRUE, TRUE);
7966
7967                     s = SKIPSPACE2(d,tmpwhite);
7968 #else
7969                     s = skipspace(d);
7970 #endif
7971                 }
7972                 else {
7973                     if (key == KEY_my)
7974                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7975                     PL_expect = XTERMBLOCK;
7976                     attrful = XATTRTERM;
7977                     sv_setpvs(PL_subname,"?");
7978                     have_name = FALSE;
7979                 }
7980
7981                 if (key == KEY_format) {
7982                     if (*s == '=')
7983                         PL_lex_formbrack = PL_lex_brackets + 1;
7984 #ifdef PERL_MAD
7985                     PL_thistoken = subtoken;
7986                     s = d;
7987 #else
7988                     if (have_name)
7989                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7990                                           FALSE, TRUE, TRUE);
7991 #endif
7992                     OPERATOR(FORMAT);
7993                 }
7994
7995                 /* Look for a prototype */
7996                 if (*s == '(') {
7997                     char *p;
7998                     bool bad_proto = FALSE;
7999                     bool in_brackets = FALSE;
8000                     char greedy_proto = ' ';
8001                     bool proto_after_greedy_proto = FALSE;
8002                     bool must_be_last = FALSE;
8003                     bool underscore = FALSE;
8004                     bool seen_underscore = FALSE;
8005                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8006
8007                     s = scan_str(s,!!PL_madskills,FALSE);
8008                     if (!s)
8009                         Perl_croak(aTHX_ "Prototype not terminated");
8010                     /* strip spaces and check for bad characters */
8011                     d = SvPVX(PL_lex_stuff);
8012                     tmp = 0;
8013                     for (p = d; *p; ++p) {
8014                         if (!isSPACE(*p)) {
8015                             d[tmp++] = *p;
8016
8017                             if (warnillegalproto) {
8018                                 if (must_be_last)
8019                                     proto_after_greedy_proto = TRUE;
8020                                 if (!strchr("$@%*;[]&\\_+", *p)) {
8021                                     bad_proto = TRUE;
8022                                 }
8023                                 else {
8024                                     if ( underscore ) {
8025                                         if ( *p != ';' )
8026                                             bad_proto = TRUE;
8027                                         underscore = FALSE;
8028                                     }
8029                                     if ( *p == '[' ) {
8030                                         in_brackets = TRUE;
8031                                     }
8032                                     else if ( *p == ']' ) {
8033                                         in_brackets = FALSE;
8034                                     }
8035                                     else if ( (*p == '@' || *p == '%') &&
8036                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8037                                          !in_brackets ) {
8038                                         must_be_last = TRUE;
8039                                         greedy_proto = *p;
8040                                     }
8041                                     else if ( *p == '_' ) {
8042                                         underscore = seen_underscore = TRUE;
8043                                     }
8044                                 }
8045                             }
8046                         }
8047                     }
8048                     d[tmp] = '\0';
8049                     if (proto_after_greedy_proto)
8050                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8051                                     "Prototype after '%c' for %"SVf" : %s",
8052                                     greedy_proto, SVfARG(PL_subname), d);
8053                     if (bad_proto)
8054                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8055                                     "Illegal character %sin prototype for %"SVf" : %s",
8056                                     seen_underscore ? "after '_' " : "",
8057                                     SVfARG(PL_subname), d);
8058                     SvCUR_set(PL_lex_stuff, tmp);
8059                     have_proto = TRUE;
8060
8061 #ifdef PERL_MAD
8062                     start_force(0);
8063                     CURMAD('q', PL_thisopen);
8064                     CURMAD('_', tmpwhite);
8065                     CURMAD('=', PL_thisstuff);
8066                     CURMAD('Q', PL_thisclose);
8067                     NEXTVAL_NEXTTOKE.opval =
8068                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8069                     PL_lex_stuff = NULL;
8070                     force_next(THING);
8071
8072                     s = SKIPSPACE2(s,tmpwhite);
8073 #else
8074                     s = skipspace(s);
8075 #endif
8076                 }
8077                 else
8078                     have_proto = FALSE;
8079
8080                 if (*s == ':' && s[1] != ':')
8081                     PL_expect = attrful;
8082                 else if (*s != '{' && key == KEY_sub) {
8083                     if (!have_name)
8084                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8085                     else if (*s != ';' && *s != '}')
8086                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8087                 }
8088
8089 #ifdef PERL_MAD
8090                 start_force(0);
8091                 if (tmpwhite) {
8092                     if (PL_madskills)
8093                         curmad('^', newSVpvs(""));
8094                     CURMAD('_', tmpwhite);
8095                 }
8096                 force_next(0);
8097
8098                 PL_thistoken = subtoken;
8099 #else
8100                 if (have_proto) {
8101                     NEXTVAL_NEXTTOKE.opval =
8102                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8103                     PL_lex_stuff = NULL;
8104                     force_next(THING);
8105                 }
8106 #endif
8107                 if (!have_name) {
8108                     if (PL_curstash)
8109                         sv_setpvs(PL_subname, "__ANON__");
8110                     else
8111                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8112                     TOKEN(ANONSUB);
8113                 }
8114 #ifndef PERL_MAD
8115                 (void) force_word(PL_oldbufptr + tboffset, WORD,
8116                                   FALSE, TRUE, TRUE);
8117 #endif
8118                 if (key == KEY_my)
8119                     TOKEN(MYSUB);
8120                 TOKEN(SUB);
8121             }
8122
8123         case KEY_system:
8124             LOP(OP_SYSTEM,XREF);
8125
8126         case KEY_symlink:
8127             LOP(OP_SYMLINK,XTERM);
8128
8129         case KEY_syscall:
8130             LOP(OP_SYSCALL,XTERM);
8131
8132         case KEY_sysopen:
8133             LOP(OP_SYSOPEN,XTERM);
8134
8135         case KEY_sysseek:
8136             LOP(OP_SYSSEEK,XTERM);
8137
8138         case KEY_sysread:
8139             LOP(OP_SYSREAD,XTERM);
8140
8141         case KEY_syswrite:
8142             LOP(OP_SYSWRITE,XTERM);
8143
8144         case KEY_tr:
8145             s = scan_trans(s);
8146             TERM(sublex_start());
8147
8148         case KEY_tell:
8149             UNI(OP_TELL);
8150
8151         case KEY_telldir:
8152             UNI(OP_TELLDIR);
8153
8154         case KEY_tie:
8155             LOP(OP_TIE,XTERM);
8156
8157         case KEY_tied:
8158             UNI(OP_TIED);
8159
8160         case KEY_time:
8161             FUN0(OP_TIME);
8162
8163         case KEY_times:
8164             FUN0(OP_TMS);
8165
8166         case KEY_truncate:
8167             LOP(OP_TRUNCATE,XTERM);
8168
8169         case KEY_uc:
8170             UNI(OP_UC);
8171
8172         case KEY_ucfirst:
8173             UNI(OP_UCFIRST);
8174
8175         case KEY_untie:
8176             UNI(OP_UNTIE);
8177
8178         case KEY_until:
8179             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8180                 return REPORT(0);
8181             pl_yylval.ival = CopLINE(PL_curcop);
8182             OPERATOR(UNTIL);
8183
8184         case KEY_unless:
8185             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8186                 return REPORT(0);
8187             pl_yylval.ival = CopLINE(PL_curcop);
8188             OPERATOR(UNLESS);
8189
8190         case KEY_unlink:
8191             LOP(OP_UNLINK,XTERM);
8192
8193         case KEY_undef:
8194             UNIDOR(OP_UNDEF);
8195
8196         case KEY_unpack:
8197             LOP(OP_UNPACK,XTERM);
8198
8199         case KEY_utime:
8200             LOP(OP_UTIME,XTERM);
8201
8202         case KEY_umask:
8203             UNIDOR(OP_UMASK);
8204
8205         case KEY_unshift:
8206             LOP(OP_UNSHIFT,XTERM);
8207
8208         case KEY_use:
8209             s = tokenize_use(1, s);
8210             OPERATOR(USE);
8211
8212         case KEY_values:
8213             UNI(OP_VALUES);
8214
8215         case KEY_vec:
8216             LOP(OP_VEC,XTERM);
8217
8218         case KEY_when:
8219             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8220                 return REPORT(0);
8221             pl_yylval.ival = CopLINE(PL_curcop);
8222             OPERATOR(WHEN);
8223
8224         case KEY_while:
8225             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8226                 return REPORT(0);
8227             pl_yylval.ival = CopLINE(PL_curcop);
8228             OPERATOR(WHILE);
8229
8230         case KEY_warn:
8231             PL_hints |= HINT_BLOCK_SCOPE;
8232             LOP(OP_WARN,XTERM);
8233
8234         case KEY_wait:
8235             FUN0(OP_WAIT);
8236
8237         case KEY_waitpid:
8238             LOP(OP_WAITPID,XTERM);
8239
8240         case KEY_wantarray:
8241             FUN0(OP_WANTARRAY);
8242
8243         case KEY_write:
8244 #ifdef EBCDIC
8245         {
8246             char ctl_l[2];
8247             ctl_l[0] = toCTRL('L');
8248             ctl_l[1] = '\0';
8249             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8250         }
8251 #else
8252             /* Make sure $^L is defined */
8253             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8254 #endif
8255             UNI(OP_ENTERWRITE);
8256
8257         case KEY_x:
8258             if (PL_expect == XOPERATOR) {
8259                 if (*s == '=' && !PL_lex_allbrackets &&
8260                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8261                     return REPORT(0);
8262                 Mop(OP_REPEAT);
8263             }
8264             check_uni();
8265             goto just_a_word;
8266
8267         case KEY_xor:
8268             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8269                 return REPORT(0);
8270             pl_yylval.ival = OP_XOR;
8271             OPERATOR(OROP);
8272
8273         case KEY_y:
8274             s = scan_trans(s);
8275             TERM(sublex_start());
8276         }
8277     }}
8278 }
8279 #ifdef __SC__
8280 #pragma segment Main
8281 #endif
8282
8283 static int
8284 S_pending_ident(pTHX)
8285 {
8286     dVAR;
8287     register char *d;
8288     PADOFFSET tmp = 0;
8289     /* pit holds the identifier we read and pending_ident is reset */
8290     char pit = PL_pending_ident;
8291     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8292     /* All routes through this function want to know if there is a colon.  */
8293     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8294     PL_pending_ident = 0;
8295
8296     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8297     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8298           "### Pending identifier '%s'\n", PL_tokenbuf); });
8299
8300     /* if we're in a my(), we can't allow dynamics here.
8301        $foo'bar has already been turned into $foo::bar, so
8302        just check for colons.
8303
8304        if it's a legal name, the OP is a PADANY.
8305     */
8306     if (PL_in_my) {
8307         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8308             if (has_colon)
8309                 yyerror(Perl_form(aTHX_ "No package name allowed for "
8310                                   "variable %s in \"our\"",
8311                                   PL_tokenbuf));
8312             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8313         }
8314         else {
8315             if (has_colon)
8316                 yyerror(Perl_form(aTHX_ PL_no_myglob,
8317                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8318
8319             pl_yylval.opval = newOP(OP_PADANY, 0);
8320             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8321                                                         UTF ? SVf_UTF8 : 0);
8322             return PRIVATEREF;
8323         }
8324     }
8325
8326     /*
8327        build the ops for accesses to a my() variable.
8328
8329        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8330        then used in a comparison.  This catches most, but not
8331        all cases.  For instance, it catches
8332            sort { my($a); $a <=> $b }
8333        but not
8334            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8335        (although why you'd do that is anyone's guess).
8336     */
8337
8338     if (!has_colon) {
8339         if (!PL_in_my)
8340             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8341                                     UTF ? SVf_UTF8 : 0);
8342         if (tmp != NOT_IN_PAD) {
8343             /* might be an "our" variable" */
8344             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8345                 /* build ops for a bareword */
8346                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8347                 HEK * const stashname = HvNAME_HEK(stash);
8348                 SV *  const sym = newSVhek(stashname);
8349                 sv_catpvs(sym, "::");
8350                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8351                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8352                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8353                 gv_fetchsv(sym,
8354                     (PL_in_eval
8355                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8356                         : GV_ADDMULTI
8357                     ),
8358                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8359                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8360                      : SVt_PVHV));
8361                 return WORD;
8362             }
8363
8364             /* if it's a sort block and they're naming $a or $b */
8365             if (PL_last_lop_op == OP_SORT &&
8366                 PL_tokenbuf[0] == '$' &&
8367                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8368                 && !PL_tokenbuf[2])
8369             {
8370                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8371                      d < PL_bufend && *d != '\n';
8372                      d++)
8373                 {
8374                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8375                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8376                               PL_tokenbuf);
8377                     }
8378                 }
8379             }
8380
8381             pl_yylval.opval = newOP(OP_PADANY, 0);
8382             pl_yylval.opval->op_targ = tmp;
8383             return PRIVATEREF;
8384         }
8385     }
8386
8387     /*
8388        Whine if they've said @foo in a doublequoted string,
8389        and @foo isn't a variable we can find in the symbol
8390        table.
8391     */
8392     if (ckWARN(WARN_AMBIGUOUS) &&
8393         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8394         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8395                                          SVt_PVAV);
8396         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8397                 /* DO NOT warn for @- and @+ */
8398                 && !( PL_tokenbuf[2] == '\0' &&
8399                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8400            )
8401         {
8402             /* Downgraded from fatal to warning 20000522 mjd */
8403             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8404                         "Possible unintended interpolation of %s in string",
8405                         PL_tokenbuf);
8406         }
8407     }
8408
8409     /* build ops for a bareword */
8410     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8411                                                       tokenbuf_len - 1));
8412     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8413     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8414                      PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8415                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8416                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8417                       : SVt_PVHV));
8418     return WORD;
8419 }
8420
8421 STATIC void
8422 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8423 {
8424     dVAR;
8425
8426     PERL_ARGS_ASSERT_CHECKCOMMA;
8427
8428     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8429         if (ckWARN(WARN_SYNTAX)) {
8430             int level = 1;
8431             const char *w;
8432             for (w = s+2; *w && level; w++) {
8433                 if (*w == '(')
8434                     ++level;
8435                 else if (*w == ')')
8436                     --level;
8437             }
8438             while (isSPACE(*w))
8439                 ++w;
8440             /* the list of chars below is for end of statements or
8441              * block / parens, boolean operators (&&, ||, //) and branch
8442              * constructs (or, and, if, until, unless, while, err, for).
8443              * Not a very solid hack... */
8444             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8445                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8446                             "%s (...) interpreted as function",name);
8447         }
8448     }
8449     while (s < PL_bufend && isSPACE(*s))
8450         s++;
8451     if (*s == '(')
8452         s++;
8453     while (s < PL_bufend && isSPACE(*s))
8454         s++;
8455     if (isIDFIRST_lazy_if(s,UTF)) {
8456         const char * const w = s++;
8457         while (isALNUM_lazy_if(s,UTF))
8458             s++;
8459         while (s < PL_bufend && isSPACE(*s))
8460             s++;
8461         if (*s == ',') {
8462             GV* gv;
8463             if (keyword(w, s - w, 0))
8464                 return;
8465
8466             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
8467             if (gv && GvCVu(gv))
8468                 return;
8469             Perl_croak(aTHX_ "No comma allowed after %s", what);
8470         }
8471     }
8472 }
8473
8474 /* Either returns sv, or mortalizes sv and returns a new SV*.
8475    Best used as sv=new_constant(..., sv, ...).
8476    If s, pv are NULL, calls subroutine with one argument,
8477    and type is used with error messages only. */
8478
8479 STATIC SV *
8480 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8481                SV *sv, SV *pv, const char *type, STRLEN typelen)
8482 {
8483     dVAR; dSP;
8484     HV * const table = GvHV(PL_hintgv);          /* ^H */
8485     SV *res;
8486     SV **cvp;
8487     SV *cv, *typesv;
8488     const char *why1 = "", *why2 = "", *why3 = "";
8489
8490     PERL_ARGS_ASSERT_NEW_CONSTANT;
8491
8492     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8493         SV *msg;
8494
8495         why2 = (const char *)
8496             (strEQ(key,"charnames")
8497              ? "(possibly a missing \"use charnames ...\")"
8498              : "");
8499         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8500                             (type ? type: "undef"), why2);
8501
8502         /* This is convoluted and evil ("goto considered harmful")
8503          * but I do not understand the intricacies of all the different
8504          * failure modes of %^H in here.  The goal here is to make
8505          * the most probable error message user-friendly. --jhi */
8506
8507         goto msgdone;
8508
8509     report:
8510         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8511                             (type ? type: "undef"), why1, why2, why3);
8512     msgdone:
8513         yyerror(SvPVX_const(msg));
8514         SvREFCNT_dec(msg);
8515         return sv;
8516     }
8517
8518     /* charnames doesn't work well if there have been errors found */
8519     if (PL_error_count > 0 && strEQ(key,"charnames"))
8520         return &PL_sv_undef;
8521
8522     cvp = hv_fetch(table, key, keylen, FALSE);
8523     if (!cvp || !SvOK(*cvp)) {
8524         why1 = "$^H{";
8525         why2 = key;
8526         why3 = "} is not defined";
8527         goto report;
8528     }
8529     sv_2mortal(sv);                     /* Parent created it permanently */
8530     cv = *cvp;
8531     if (!pv && s)
8532         pv = newSVpvn_flags(s, len, SVs_TEMP);
8533     if (type && pv)
8534         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8535     else
8536         typesv = &PL_sv_undef;
8537
8538     PUSHSTACKi(PERLSI_OVERLOAD);
8539     ENTER ;
8540     SAVETMPS;
8541
8542     PUSHMARK(SP) ;
8543     EXTEND(sp, 3);
8544     if (pv)
8545         PUSHs(pv);
8546     PUSHs(sv);
8547     if (pv)
8548         PUSHs(typesv);
8549     PUTBACK;
8550     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8551
8552     SPAGAIN ;
8553
8554     /* Check the eval first */
8555     if (!PL_in_eval && SvTRUE(ERRSV)) {
8556         sv_catpvs(ERRSV, "Propagated");
8557         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8558         (void)POPs;
8559         res = SvREFCNT_inc_simple(sv);
8560     }
8561     else {
8562         res = POPs;
8563         SvREFCNT_inc_simple_void(res);
8564     }
8565
8566     PUTBACK ;
8567     FREETMPS ;
8568     LEAVE ;
8569     POPSTACK;
8570
8571     if (!SvOK(res)) {
8572         why1 = "Call to &{$^H{";
8573         why2 = key;
8574         why3 = "}} did not return a defined value";
8575         sv = res;
8576         goto report;
8577     }
8578
8579     return res;
8580 }
8581
8582 /* Returns a NUL terminated string, with the length of the string written to
8583    *slp
8584    */
8585 STATIC char *
8586 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8587 {
8588     dVAR;
8589     register char *d = dest;
8590     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8591
8592     PERL_ARGS_ASSERT_SCAN_WORD;
8593
8594     for (;;) {
8595         if (d >= e)
8596             Perl_croak(aTHX_ ident_too_long);
8597         if (isALNUM(*s))        /* UTF handled below */
8598             *d++ = *s++;
8599         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8600             *d++ = ':';
8601             *d++ = ':';
8602             s++;
8603         }
8604         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8605             *d++ = *s++;
8606             *d++ = *s++;
8607         }
8608         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8609             char *t = s + UTF8SKIP(s);
8610             size_t len;
8611             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8612                 t += UTF8SKIP(t);
8613             len = t - s;
8614             if (d + len > e)
8615                 Perl_croak(aTHX_ ident_too_long);
8616             Copy(s, d, len, char);
8617             d += len;
8618             s = t;
8619         }
8620         else {
8621             *d = '\0';
8622             *slp = d - dest;
8623             return s;
8624         }
8625     }
8626 }
8627
8628 STATIC char *
8629 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8630 {
8631     dVAR;
8632     char *bracket = NULL;
8633     char funny = *s++;
8634     register char *d = dest;
8635     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8636
8637     PERL_ARGS_ASSERT_SCAN_IDENT;
8638
8639     if (isSPACE(*s))
8640         s = PEEKSPACE(s);
8641     if (isDIGIT(*s)) {
8642         while (isDIGIT(*s)) {
8643             if (d >= e)
8644                 Perl_croak(aTHX_ ident_too_long);
8645             *d++ = *s++;
8646         }
8647     }
8648     else {
8649         for (;;) {
8650             if (d >= e)
8651                 Perl_croak(aTHX_ ident_too_long);
8652             if (isALNUM(*s))    /* UTF handled below */
8653                 *d++ = *s++;
8654             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8655                 *d++ = ':';
8656                 *d++ = ':';
8657                 s++;
8658             }
8659             else if (*s == ':' && s[1] == ':') {
8660                 *d++ = *s++;
8661                 *d++ = *s++;
8662             }
8663             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8664                 char *t = s + UTF8SKIP(s);
8665                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8666                     t += UTF8SKIP(t);
8667                 if (d + (t - s) > e)
8668                     Perl_croak(aTHX_ ident_too_long);
8669                 Copy(s, d, t - s, char);
8670                 d += t - s;
8671                 s = t;
8672             }
8673             else
8674                 break;
8675         }
8676     }
8677     *d = '\0';
8678     d = dest;
8679     if (*d) {
8680         if (PL_lex_state != LEX_NORMAL)
8681             PL_lex_state = LEX_INTERPENDMAYBE;
8682         return s;
8683     }
8684     if (*s == '$' && s[1] &&
8685         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8686     {
8687         return s;
8688     }
8689     if (*s == '{') {
8690         bracket = s;
8691         s++;
8692     }
8693     else if (ck_uni)
8694         check_uni();
8695     if (s < send)
8696         *d = *s++;
8697     d[1] = '\0';
8698     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8699         *d = toCTRL(*s);
8700         s++;
8701     }
8702     if (bracket) {
8703         if (isSPACE(s[-1])) {
8704             while (s < send) {
8705                 const char ch = *s++;
8706                 if (!SPACE_OR_TAB(ch)) {
8707                     *d = ch;
8708                     break;
8709                 }
8710             }
8711         }
8712         if (isIDFIRST_lazy_if(d,UTF)) {
8713             d++;
8714             if (UTF) {
8715                 char *end = s;
8716                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8717                     end += UTF8SKIP(end);
8718                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8719                         end += UTF8SKIP(end);
8720                 }
8721                 Copy(s, d, end - s, char);
8722                 d += end - s;
8723                 s = end;
8724             }
8725             else {
8726                 while ((isALNUM(*s) || *s == ':') && d < e)
8727                     *d++ = *s++;
8728                 if (d >= e)
8729                     Perl_croak(aTHX_ ident_too_long);
8730             }
8731             *d = '\0';
8732             while (s < send && SPACE_OR_TAB(*s))
8733                 s++;
8734             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8735                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8736                     const char * const brack =
8737                         (const char *)
8738                         ((*s == '[') ? "[...]" : "{...}");
8739    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8740                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8741                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8742                         funny, dest, brack, funny, dest, brack);
8743                 }
8744                 bracket++;
8745                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8746                 PL_lex_allbrackets++;
8747                 return s;
8748             }
8749         }
8750         /* Handle extended ${^Foo} variables
8751          * 1999-02-27 mjd-perl-patch@plover.com */
8752         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8753                  && isALNUM(*s))
8754         {
8755             d++;
8756             while (isALNUM(*s) && d < e) {
8757                 *d++ = *s++;
8758             }
8759             if (d >= e)
8760                 Perl_croak(aTHX_ ident_too_long);
8761             *d = '\0';
8762         }
8763         if (*s == '}') {
8764             s++;
8765             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8766                 PL_lex_state = LEX_INTERPEND;
8767                 PL_expect = XREF;
8768             }
8769             if (PL_lex_state == LEX_NORMAL) {
8770                 if (ckWARN(WARN_AMBIGUOUS) &&
8771                     (keyword(dest, d - dest, 0)
8772                      || get_cvn_flags(dest, d - dest, 0)))
8773                 {
8774                     if (funny == '#')
8775                         funny = '@';
8776                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8777                         "Ambiguous use of %c{%s} resolved to %c%s",
8778                         funny, dest, funny, dest);
8779                 }
8780             }
8781         }
8782         else {
8783             s = bracket;                /* let the parser handle it */
8784             *dest = '\0';
8785         }
8786     }
8787     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8788         PL_lex_state = LEX_INTERPEND;
8789     return s;
8790 }
8791
8792 static bool
8793 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8794
8795     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8796      * the parse starting at 's', based on the subset that are valid in this
8797      * context input to this routine in 'valid_flags'. Advances s.  Returns
8798      * TRUE if the input was a valid flag, so the next char may be as well;
8799      * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8800      * current regex.  This routine will set it to any charset modifier found.
8801      * The caller shouldn't change it.  This way, another charset modifier
8802      * encountered in the parse can be detected as an error, as we have decided
8803      * allow only one */
8804
8805     const char c = **s;
8806
8807     if (! strchr(valid_flags, c)) {
8808         if (isALNUM(c)) {
8809             goto deprecate;
8810         }
8811         return FALSE;
8812     }
8813
8814     switch (c) {
8815
8816         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8817         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
8818         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
8819         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
8820         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
8821         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8822         case LOCALE_PAT_MOD:
8823
8824             /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8825              * can't be regex modifiers.
8826              * In 5.14, s///le is legal and ambiguous.  Try to disambiguate as
8827              * much as easily done.  s///lei, for example, has to mean regex
8828              * modifiers if it's not an error (as does any word character
8829              * following the 'e').  Otherwise, we resolve to the backwards-
8830              * compatible, but less likely 's/// le ...', i.e. as meaning
8831              * less-than-or-equal.  The reason it's not likely is that s//
8832              * returns a number for code in the field (/r returns a string, but
8833              * that wasn't added until the 5.13 series), and so '<=' should be
8834              * used for comparing, not 'le'. */
8835             if (*((*s) + 1) == 't') {
8836                 goto deprecate;
8837             }
8838             else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
8839
8840                 /* 'e' is valid only for substitutes, s///e.  If it is not
8841                  * valid in the current context, then 'm//le' must mean the
8842                  * comparison operator, so use the regular deprecation message.
8843                  */
8844                 if (! strchr(valid_flags, 'e')) {
8845                     goto deprecate;
8846                 }
8847                 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8848                     "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'.  In Perl 5.16, it will be resolved the other way");
8849                 return FALSE;
8850             }
8851             if (*charset) {
8852                 goto multiple_charsets;
8853             }
8854             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8855             *charset = c;
8856             break;
8857         case UNICODE_PAT_MOD:
8858             /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8859              * 'n' means they can't be regex modifiers */
8860             if (*((*s) + 1) == 'n') {
8861                 goto deprecate;
8862             }
8863             if (*charset) {
8864                 goto multiple_charsets;
8865             }
8866             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8867             *charset = c;
8868             break;
8869         case ASCII_RESTRICT_PAT_MOD:
8870             /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8871              * can't be regex modifiers */
8872             if (*((*s) + 1) == 'n') {
8873                 goto deprecate;
8874             }
8875
8876             if (! *charset) {
8877                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8878             }
8879             else {
8880
8881                 /* Error if previous modifier wasn't an 'a', but if it was, see
8882                  * if, and accept, a second occurrence (only) */
8883                 if (*charset != 'a'
8884                     || get_regex_charset(*pmfl)
8885                         != REGEX_ASCII_RESTRICTED_CHARSET)
8886                 {
8887                         goto multiple_charsets;
8888                 }
8889                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8890             }
8891             *charset = c;
8892             break;
8893         case DEPENDS_PAT_MOD:
8894             if (*charset) {
8895                 goto multiple_charsets;
8896             }
8897             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8898             *charset = c;
8899             break;
8900     }
8901
8902     (*s)++;
8903     return TRUE;
8904
8905     deprecate:
8906         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8907             "Having no space between pattern and following word is deprecated");
8908         return FALSE;
8909
8910     multiple_charsets:
8911         if (*charset != c) {
8912             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8913         }
8914         else if (c == 'a') {
8915             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8916         }
8917         else {
8918             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8919         }
8920
8921         /* Pretend that it worked, so will continue processing before dieing */
8922         (*s)++;
8923         return TRUE;
8924 }
8925
8926 STATIC char *
8927 S_scan_pat(pTHX_ char *start, I32 type)
8928 {
8929     dVAR;
8930     PMOP *pm;
8931     char *s = scan_str(start,!!PL_madskills,FALSE);
8932     const char * const valid_flags =
8933         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8934     char charset = '\0';    /* character set modifier */
8935 #ifdef PERL_MAD
8936     char *modstart;
8937 #endif
8938
8939     PERL_ARGS_ASSERT_SCAN_PAT;
8940
8941     if (!s) {
8942         const char * const delimiter = skipspace(start);
8943         Perl_croak(aTHX_
8944                    (const char *)
8945                    (*delimiter == '?'
8946                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
8947                     : "Search pattern not terminated" ));
8948     }
8949
8950     pm = (PMOP*)newPMOP(type, 0);
8951     if (PL_multi_open == '?') {
8952         /* This is the only point in the code that sets PMf_ONCE:  */
8953         pm->op_pmflags |= PMf_ONCE;
8954
8955         /* Hence it's safe to do this bit of PMOP book-keeping here, which
8956            allows us to restrict the list needed by reset to just the ??
8957            matches.  */
8958         assert(type != OP_TRANS);
8959         if (PL_curstash) {
8960             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8961             U32 elements;
8962             if (!mg) {
8963                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8964                                  0);
8965             }
8966             elements = mg->mg_len / sizeof(PMOP**);
8967             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8968             ((PMOP**)mg->mg_ptr) [elements++] = pm;
8969             mg->mg_len = elements * sizeof(PMOP**);
8970             PmopSTASH_set(pm,PL_curstash);
8971         }
8972     }
8973 #ifdef PERL_MAD
8974     modstart = s;
8975 #endif
8976     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
8977 #ifdef PERL_MAD
8978     if (PL_madskills && modstart != s) {
8979         SV* tmptoken = newSVpvn(modstart, s - modstart);
8980         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
8981     }
8982 #endif
8983     /* issue a warning if /c is specified,but /g is not */
8984     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8985     {
8986         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8987                        "Use of /c modifier is meaningless without /g" );
8988     }
8989
8990     PL_lex_op = (OP*)pm;
8991     pl_yylval.ival = OP_MATCH;
8992     return s;
8993 }
8994
8995 STATIC char *
8996 S_scan_subst(pTHX_ char *start)
8997 {
8998     dVAR;
8999     char *s;
9000     register PMOP *pm;
9001     I32 first_start;
9002     I32 es = 0;
9003     char charset = '\0';    /* character set modifier */
9004 #ifdef PERL_MAD
9005     char *modstart;
9006 #endif
9007
9008     PERL_ARGS_ASSERT_SCAN_SUBST;
9009
9010     pl_yylval.ival = OP_NULL;
9011
9012     s = scan_str(start,!!PL_madskills,FALSE);
9013
9014     if (!s)
9015         Perl_croak(aTHX_ "Substitution pattern not terminated");
9016
9017     if (s[-1] == PL_multi_open)
9018         s--;
9019 #ifdef PERL_MAD
9020     if (PL_madskills) {
9021         CURMAD('q', PL_thisopen);
9022         CURMAD('_', PL_thiswhite);
9023         CURMAD('E', PL_thisstuff);
9024         CURMAD('Q', PL_thisclose);
9025         PL_realtokenstart = s - SvPVX(PL_linestr);
9026     }
9027 #endif
9028
9029     first_start = PL_multi_start;
9030     s = scan_str(s,!!PL_madskills,FALSE);
9031     if (!s) {
9032         if (PL_lex_stuff) {
9033             SvREFCNT_dec(PL_lex_stuff);
9034             PL_lex_stuff = NULL;
9035         }
9036         Perl_croak(aTHX_ "Substitution replacement not terminated");
9037     }
9038     PL_multi_start = first_start;       /* so whole substitution is taken together */
9039
9040     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9041
9042 #ifdef PERL_MAD
9043     if (PL_madskills) {
9044         CURMAD('z', PL_thisopen);
9045         CURMAD('R', PL_thisstuff);
9046         CURMAD('Z', PL_thisclose);
9047     }
9048     modstart = s;
9049 #endif
9050
9051     while (*s) {
9052         if (*s == EXEC_PAT_MOD) {
9053             s++;
9054             es++;
9055         }
9056         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9057         {
9058             break;
9059         }
9060     }
9061
9062 #ifdef PERL_MAD
9063     if (PL_madskills) {
9064         if (modstart != s)
9065             curmad('m', newSVpvn(modstart, s - modstart));
9066         append_madprops(PL_thismad, (OP*)pm, 0);
9067         PL_thismad = 0;
9068     }
9069 #endif
9070     if ((pm->op_pmflags & PMf_CONTINUE)) {
9071         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9072     }
9073
9074     if (es) {
9075         SV * const repl = newSVpvs("");
9076
9077         PL_sublex_info.super_bufptr = s;
9078         PL_sublex_info.super_bufend = PL_bufend;
9079         PL_multi_end = 0;
9080         pm->op_pmflags |= PMf_EVAL;
9081         while (es-- > 0) {
9082             if (es)
9083                 sv_catpvs(repl, "eval ");
9084             else
9085                 sv_catpvs(repl, "do ");
9086         }
9087         sv_catpvs(repl, "{");
9088         sv_catsv(repl, PL_lex_repl);
9089         if (strchr(SvPVX(PL_lex_repl), '#'))
9090             sv_catpvs(repl, "\n");
9091         sv_catpvs(repl, "}");
9092         SvEVALED_on(repl);
9093         SvREFCNT_dec(PL_lex_repl);
9094         PL_lex_repl = repl;
9095     }
9096
9097     PL_lex_op = (OP*)pm;
9098     pl_yylval.ival = OP_SUBST;
9099     return s;
9100 }
9101
9102 STATIC char *
9103 S_scan_trans(pTHX_ char *start)
9104 {
9105     dVAR;
9106     register char* s;
9107     OP *o;
9108     short *tbl;
9109     U8 squash;
9110     U8 del;
9111     U8 complement;
9112     bool nondestruct = 0;
9113 #ifdef PERL_MAD
9114     char *modstart;
9115 #endif
9116
9117     PERL_ARGS_ASSERT_SCAN_TRANS;
9118
9119     pl_yylval.ival = OP_NULL;
9120
9121     s = scan_str(start,!!PL_madskills,FALSE);
9122     if (!s)
9123         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9124
9125     if (s[-1] == PL_multi_open)
9126         s--;
9127 #ifdef PERL_MAD
9128     if (PL_madskills) {
9129         CURMAD('q', PL_thisopen);
9130         CURMAD('_', PL_thiswhite);
9131         CURMAD('E', PL_thisstuff);
9132         CURMAD('Q', PL_thisclose);
9133         PL_realtokenstart = s - SvPVX(PL_linestr);
9134     }
9135 #endif
9136
9137     s = scan_str(s,!!PL_madskills,FALSE);
9138     if (!s) {
9139         if (PL_lex_stuff) {
9140             SvREFCNT_dec(PL_lex_stuff);
9141             PL_lex_stuff = NULL;
9142         }
9143         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9144     }
9145     if (PL_madskills) {
9146         CURMAD('z', PL_thisopen);
9147         CURMAD('R', PL_thisstuff);
9148         CURMAD('Z', PL_thisclose);
9149     }
9150
9151     complement = del = squash = 0;
9152 #ifdef PERL_MAD
9153     modstart = s;
9154 #endif
9155     while (1) {
9156         switch (*s) {
9157         case 'c':
9158             complement = OPpTRANS_COMPLEMENT;
9159             break;
9160         case 'd':
9161             del = OPpTRANS_DELETE;
9162             break;
9163         case 's':
9164             squash = OPpTRANS_SQUASH;
9165             break;
9166         case 'r':
9167             nondestruct = 1;
9168             break;
9169         default:
9170             goto no_more;
9171         }
9172         s++;
9173     }
9174   no_more:
9175
9176     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
9177     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
9178     o->op_private &= ~OPpTRANS_ALL;
9179     o->op_private |= del|squash|complement|
9180       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9181       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
9182
9183     PL_lex_op = o;
9184     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9185
9186 #ifdef PERL_MAD
9187     if (PL_madskills) {
9188         if (modstart != s)
9189             curmad('m', newSVpvn(modstart, s - modstart));
9190         append_madprops(PL_thismad, o, 0);
9191         PL_thismad = 0;
9192     }
9193 #endif
9194
9195     return s;
9196 }
9197
9198 STATIC char *
9199 S_scan_heredoc(pTHX_ register char *s)
9200 {
9201     dVAR;
9202     SV *herewas;
9203     I32 op_type = OP_SCALAR;
9204     I32 len;
9205     SV *tmpstr;
9206     char term;
9207     const char *found_newline;
9208     register char *d;
9209     register char *e;
9210     char *peek;
9211     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9212 #ifdef PERL_MAD
9213     I32 stuffstart = s - SvPVX(PL_linestr);
9214     char *tstart;
9215
9216     PL_realtokenstart = -1;
9217 #endif
9218
9219     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9220
9221     s += 2;
9222     d = PL_tokenbuf;
9223     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9224     if (!outer)
9225         *d++ = '\n';
9226     peek = s;
9227     while (SPACE_OR_TAB(*peek))
9228         peek++;
9229     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9230         s = peek;
9231         term = *s++;
9232         s = delimcpy(d, e, s, PL_bufend, term, &len);
9233         d += len;
9234         if (s < PL_bufend)
9235             s++;
9236     }
9237     else {
9238         if (*s == '\\')
9239             s++, term = '\'';
9240         else
9241             term = '"';
9242         if (!isALNUM_lazy_if(s,UTF))
9243             deprecate("bare << to mean <<\"\"");
9244         for (; isALNUM_lazy_if(s,UTF); s++) {
9245             if (d < e)
9246                 *d++ = *s;
9247         }
9248     }
9249     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9250         Perl_croak(aTHX_ "Delimiter for here document is too long");
9251     *d++ = '\n';
9252     *d = '\0';
9253     len = d - PL_tokenbuf;
9254
9255 #ifdef PERL_MAD
9256     if (PL_madskills) {
9257         tstart = PL_tokenbuf + !outer;
9258         PL_thisclose = newSVpvn(tstart, len - !outer);
9259         tstart = SvPVX(PL_linestr) + stuffstart;
9260         PL_thisopen = newSVpvn(tstart, s - tstart);
9261         stuffstart = s - SvPVX(PL_linestr);
9262     }
9263 #endif
9264 #ifndef PERL_STRICT_CR
9265     d = strchr(s, '\r');
9266     if (d) {
9267         char * const olds = s;
9268         s = d;
9269         while (s < PL_bufend) {
9270             if (*s == '\r') {
9271                 *d++ = '\n';
9272                 if (*++s == '\n')
9273                     s++;
9274             }
9275             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9276                 *d++ = *s++;
9277                 s++;
9278             }
9279             else
9280                 *d++ = *s++;
9281         }
9282         *d = '\0';
9283         PL_bufend = d;
9284         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9285         s = olds;
9286     }
9287 #endif
9288 #ifdef PERL_MAD
9289     found_newline = 0;
9290 #endif
9291     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9292         herewas = newSVpvn(s,PL_bufend-s);
9293     }
9294     else {
9295 #ifdef PERL_MAD
9296         herewas = newSVpvn(s-1,found_newline-s+1);
9297 #else
9298         s--;
9299         herewas = newSVpvn(s,found_newline-s);
9300 #endif
9301     }
9302 #ifdef PERL_MAD
9303     if (PL_madskills) {
9304         tstart = SvPVX(PL_linestr) + stuffstart;
9305         if (PL_thisstuff)
9306             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9307         else
9308             PL_thisstuff = newSVpvn(tstart, s - tstart);
9309     }
9310 #endif
9311     s += SvCUR(herewas);
9312
9313 #ifdef PERL_MAD
9314     stuffstart = s - SvPVX(PL_linestr);
9315
9316     if (found_newline)
9317         s--;
9318 #endif
9319
9320     tmpstr = newSV_type(SVt_PVIV);
9321     SvGROW(tmpstr, 80);
9322     if (term == '\'') {
9323         op_type = OP_CONST;
9324         SvIV_set(tmpstr, -1);
9325     }
9326     else if (term == '`') {
9327         op_type = OP_BACKTICK;
9328         SvIV_set(tmpstr, '\\');
9329     }
9330
9331     CLINE;
9332     PL_multi_start = CopLINE(PL_curcop);
9333     PL_multi_open = PL_multi_close = '<';
9334     term = *PL_tokenbuf;
9335     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9336         char * const bufptr = PL_sublex_info.super_bufptr;
9337         char * const bufend = PL_sublex_info.super_bufend;
9338         char * const olds = s - SvCUR(herewas);
9339         s = strchr(bufptr, '\n');
9340         if (!s)
9341             s = bufend;
9342         d = s;
9343         while (s < bufend &&
9344           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9345             if (*s++ == '\n')
9346                 CopLINE_inc(PL_curcop);
9347         }
9348         if (s >= bufend) {
9349             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9350             missingterm(PL_tokenbuf);
9351         }
9352         sv_setpvn(herewas,bufptr,d-bufptr+1);
9353         sv_setpvn(tmpstr,d+1,s-d);
9354         s += len - 1;
9355         sv_catpvn(herewas,s,bufend-s);
9356         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9357
9358         s = olds;
9359         goto retval;
9360     }
9361     else if (!outer) {
9362         d = s;
9363         while (s < PL_bufend &&
9364           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9365             if (*s++ == '\n')
9366                 CopLINE_inc(PL_curcop);
9367         }
9368         if (s >= PL_bufend) {
9369             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9370             missingterm(PL_tokenbuf);
9371         }
9372         sv_setpvn(tmpstr,d+1,s-d);
9373 #ifdef PERL_MAD
9374         if (PL_madskills) {
9375             if (PL_thisstuff)
9376                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9377             else
9378                 PL_thisstuff = newSVpvn(d + 1, s - d);
9379             stuffstart = s - SvPVX(PL_linestr);
9380         }
9381 #endif
9382         s += len - 1;
9383         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9384
9385         sv_catpvn(herewas,s,PL_bufend-s);
9386         sv_setsv(PL_linestr,herewas);
9387         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9388         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9389         PL_last_lop = PL_last_uni = NULL;
9390     }
9391     else
9392         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9393     while (s >= PL_bufend) {    /* multiple line string? */
9394 #ifdef PERL_MAD
9395         if (PL_madskills) {
9396             tstart = SvPVX(PL_linestr) + stuffstart;
9397             if (PL_thisstuff)
9398                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9399             else
9400                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9401         }
9402 #endif
9403         PL_bufptr = s;
9404         CopLINE_inc(PL_curcop);
9405         if (!outer || !lex_next_chunk(0)) {
9406             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9407             missingterm(PL_tokenbuf);
9408         }
9409         CopLINE_dec(PL_curcop);
9410         s = PL_bufptr;
9411 #ifdef PERL_MAD
9412         stuffstart = s - SvPVX(PL_linestr);
9413 #endif
9414         CopLINE_inc(PL_curcop);
9415         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9416         PL_last_lop = PL_last_uni = NULL;
9417 #ifndef PERL_STRICT_CR
9418         if (PL_bufend - PL_linestart >= 2) {
9419             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9420                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9421             {
9422                 PL_bufend[-2] = '\n';
9423                 PL_bufend--;
9424                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9425             }
9426             else if (PL_bufend[-1] == '\r')
9427                 PL_bufend[-1] = '\n';
9428         }
9429         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9430             PL_bufend[-1] = '\n';
9431 #endif
9432         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9433             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9434             *(SvPVX(PL_linestr) + off ) = ' ';
9435             lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
9436             sv_catsv(PL_linestr,herewas);
9437             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9438             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9439         }
9440         else {
9441             s = PL_bufend;
9442             sv_catsv(tmpstr,PL_linestr);
9443         }
9444     }
9445     s++;
9446 retval:
9447     PL_multi_end = CopLINE(PL_curcop);
9448     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9449         SvPV_shrink_to_cur(tmpstr);
9450     }
9451     SvREFCNT_dec(herewas);
9452     if (!IN_BYTES) {
9453         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9454             SvUTF8_on(tmpstr);
9455         else if (PL_encoding)
9456             sv_recode_to_utf8(tmpstr, PL_encoding);
9457     }
9458     PL_lex_stuff = tmpstr;
9459     pl_yylval.ival = op_type;
9460     return s;
9461 }
9462
9463 /* scan_inputsymbol
9464    takes: current position in input buffer
9465    returns: new position in input buffer
9466    side-effects: pl_yylval and lex_op are set.
9467
9468    This code handles:
9469
9470    <>           read from ARGV
9471    <FH>         read from filehandle
9472    <pkg::FH>    read from package qualified filehandle
9473    <pkg'FH>     read from package qualified filehandle
9474    <$fh>        read from filehandle in $fh
9475    <*.h>        filename glob
9476
9477 */
9478
9479 STATIC char *
9480 S_scan_inputsymbol(pTHX_ char *start)
9481 {
9482     dVAR;
9483     register char *s = start;           /* current position in buffer */
9484     char *end;
9485     I32 len;
9486     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9487     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9488
9489     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9490
9491     end = strchr(s, '\n');
9492     if (!end)
9493         end = PL_bufend;
9494     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9495
9496     /* die if we didn't have space for the contents of the <>,
9497        or if it didn't end, or if we see a newline
9498     */
9499
9500     if (len >= (I32)sizeof PL_tokenbuf)
9501         Perl_croak(aTHX_ "Excessively long <> operator");
9502     if (s >= end)
9503         Perl_croak(aTHX_ "Unterminated <> operator");
9504
9505     s++;
9506
9507     /* check for <$fh>
9508        Remember, only scalar variables are interpreted as filehandles by
9509        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9510        treated as a glob() call.
9511        This code makes use of the fact that except for the $ at the front,
9512        a scalar variable and a filehandle look the same.
9513     */
9514     if (*d == '$' && d[1]) d++;
9515
9516     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9517     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9518         d++;
9519
9520     /* If we've tried to read what we allow filehandles to look like, and
9521        there's still text left, then it must be a glob() and not a getline.
9522        Use scan_str to pull out the stuff between the <> and treat it
9523        as nothing more than a string.
9524     */
9525
9526     if (d - PL_tokenbuf != len) {
9527         pl_yylval.ival = OP_GLOB;
9528         s = scan_str(start,!!PL_madskills,FALSE);
9529         if (!s)
9530            Perl_croak(aTHX_ "Glob not terminated");
9531         return s;
9532     }
9533     else {
9534         bool readline_overriden = FALSE;
9535         GV *gv_readline;
9536         GV **gvp;
9537         /* we're in a filehandle read situation */
9538         d = PL_tokenbuf;
9539
9540         /* turn <> into <ARGV> */
9541         if (!len)
9542             Copy("ARGV",d,5,char);
9543
9544         /* Check whether readline() is overriden */
9545         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9546         if ((gv_readline
9547                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9548                 ||
9549                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9550                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9551                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9552             readline_overriden = TRUE;
9553
9554         /* if <$fh>, create the ops to turn the variable into a
9555            filehandle
9556         */
9557         if (*d == '$') {
9558             /* try to find it in the pad for this block, otherwise find
9559                add symbol table ops
9560             */
9561             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9562             if (tmp != NOT_IN_PAD) {
9563                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9564                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9565                     HEK * const stashname = HvNAME_HEK(stash);
9566                     SV * const sym = sv_2mortal(newSVhek(stashname));
9567                     sv_catpvs(sym, "::");
9568                     sv_catpv(sym, d+1);
9569                     d = SvPVX(sym);
9570                     goto intro_sym;
9571                 }
9572                 else {
9573                     OP * const o = newOP(OP_PADSV, 0);
9574                     o->op_targ = tmp;
9575                     PL_lex_op = readline_overriden
9576                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9577                                 op_append_elem(OP_LIST, o,
9578                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9579                         : (OP*)newUNOP(OP_READLINE, 0, o);
9580                 }
9581             }
9582             else {
9583                 GV *gv;
9584                 ++d;
9585 intro_sym:
9586                 gv = gv_fetchpv(d,
9587                                 (PL_in_eval
9588                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9589                                  : GV_ADDMULTI),
9590                                 SVt_PV);
9591                 PL_lex_op = readline_overriden
9592                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9593                             op_append_elem(OP_LIST,
9594                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9595                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9596                     : (OP*)newUNOP(OP_READLINE, 0,
9597                             newUNOP(OP_RV2SV, 0,
9598                                 newGVOP(OP_GV, 0, gv)));
9599             }
9600             if (!readline_overriden)
9601                 PL_lex_op->op_flags |= OPf_SPECIAL;
9602             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9603             pl_yylval.ival = OP_NULL;
9604         }
9605
9606         /* If it's none of the above, it must be a literal filehandle
9607            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9608         else {
9609             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9610             PL_lex_op = readline_overriden
9611                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9612                         op_append_elem(OP_LIST,
9613                             newGVOP(OP_GV, 0, gv),
9614                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9615                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9616             pl_yylval.ival = OP_NULL;
9617         }
9618     }
9619
9620     return s;
9621 }
9622
9623
9624 /* scan_str
9625    takes: start position in buffer
9626           keep_quoted preserve \ on the embedded delimiter(s)
9627           keep_delims preserve the delimiters around the string
9628    returns: position to continue reading from buffer
9629    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9630         updates the read buffer.
9631
9632    This subroutine pulls a string out of the input.  It is called for:
9633         q               single quotes           q(literal text)
9634         '               single quotes           'literal text'
9635         qq              double quotes           qq(interpolate $here please)
9636         "               double quotes           "interpolate $here please"
9637         qx              backticks               qx(/bin/ls -l)
9638         `               backticks               `/bin/ls -l`
9639         qw              quote words             @EXPORT_OK = qw( func() $spam )
9640         m//             regexp match            m/this/
9641         s///            regexp substitute       s/this/that/
9642         tr///           string transliterate    tr/this/that/
9643         y///            string transliterate    y/this/that/
9644         ($*@)           sub prototypes          sub foo ($)
9645         (stuff)         sub attr parameters     sub foo : attr(stuff)
9646         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9647
9648    In most of these cases (all but <>, patterns and transliterate)
9649    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9650    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9651    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9652    calls scan_str().
9653
9654    It skips whitespace before the string starts, and treats the first
9655    character as the delimiter.  If the delimiter is one of ([{< then
9656    the corresponding "close" character )]}> is used as the closing
9657    delimiter.  It allows quoting of delimiters, and if the string has
9658    balanced delimiters ([{<>}]) it allows nesting.
9659
9660    On success, the SV with the resulting string is put into lex_stuff or,
9661    if that is already non-NULL, into lex_repl. The second case occurs only
9662    when parsing the RHS of the special constructs s/// and tr/// (y///).
9663    For convenience, the terminating delimiter character is stuffed into
9664    SvIVX of the SV.
9665 */
9666
9667 STATIC char *
9668 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9669 {
9670     dVAR;
9671     SV *sv;                             /* scalar value: string */
9672     const char *tmps;                   /* temp string, used for delimiter matching */
9673     register char *s = start;           /* current position in the buffer */
9674     register char term;                 /* terminating character */
9675     register char *to;                  /* current position in the sv's data */
9676     I32 brackets = 1;                   /* bracket nesting level */
9677     bool has_utf8 = FALSE;              /* is there any utf8 content? */
9678     I32 termcode;                       /* terminating char. code */
9679     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
9680     STRLEN termlen;                     /* length of terminating string */
9681     int last_off = 0;                   /* last position for nesting bracket */
9682 #ifdef PERL_MAD
9683     int stuffstart;
9684     char *tstart;
9685 #endif
9686
9687     PERL_ARGS_ASSERT_SCAN_STR;
9688
9689     /* skip space before the delimiter */
9690     if (isSPACE(*s)) {
9691         s = PEEKSPACE(s);
9692     }
9693
9694 #ifdef PERL_MAD
9695     if (PL_realtokenstart >= 0) {
9696         stuffstart = PL_realtokenstart;
9697         PL_realtokenstart = -1;
9698     }
9699     else
9700         stuffstart = start - SvPVX(PL_linestr);
9701 #endif
9702     /* mark where we are, in case we need to report errors */
9703     CLINE;
9704
9705     /* after skipping whitespace, the next character is the terminator */
9706     term = *s;
9707     if (!UTF) {
9708         termcode = termstr[0] = term;
9709         termlen = 1;
9710     }
9711     else {
9712         termcode = utf8_to_uvchr((U8*)s, &termlen);
9713         Copy(s, termstr, termlen, U8);
9714         if (!UTF8_IS_INVARIANT(term))
9715             has_utf8 = TRUE;
9716     }
9717
9718     /* mark where we are */
9719     PL_multi_start = CopLINE(PL_curcop);
9720     PL_multi_open = term;
9721
9722     /* find corresponding closing delimiter */
9723     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9724         termcode = termstr[0] = term = tmps[5];
9725
9726     PL_multi_close = term;
9727
9728     /* create a new SV to hold the contents.  79 is the SV's initial length.
9729        What a random number. */
9730     sv = newSV_type(SVt_PVIV);
9731     SvGROW(sv, 80);
9732     SvIV_set(sv, termcode);
9733     (void)SvPOK_only(sv);               /* validate pointer */
9734
9735     /* move past delimiter and try to read a complete string */
9736     if (keep_delims)
9737         sv_catpvn(sv, s, termlen);
9738     s += termlen;
9739 #ifdef PERL_MAD
9740     tstart = SvPVX(PL_linestr) + stuffstart;
9741     if (!PL_thisopen && !keep_delims) {
9742         PL_thisopen = newSVpvn(tstart, s - tstart);
9743         stuffstart = s - SvPVX(PL_linestr);
9744     }
9745 #endif
9746     for (;;) {
9747         if (PL_encoding && !UTF) {
9748             bool cont = TRUE;
9749
9750             while (cont) {
9751                 int offset = s - SvPVX_const(PL_linestr);
9752                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9753                                            &offset, (char*)termstr, termlen);
9754                 const char * const ns = SvPVX_const(PL_linestr) + offset;
9755                 char * const svlast = SvEND(sv) - 1;
9756
9757                 for (; s < ns; s++) {
9758                     if (*s == '\n' && !PL_rsfp)
9759                         CopLINE_inc(PL_curcop);
9760                 }
9761                 if (!found)
9762                     goto read_more_line;
9763                 else {
9764                     /* handle quoted delimiters */
9765                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9766                         const char *t;
9767                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9768                             t--;
9769                         if ((svlast-1 - t) % 2) {
9770                             if (!keep_quoted) {
9771                                 *(svlast-1) = term;
9772                                 *svlast = '\0';
9773                                 SvCUR_set(sv, SvCUR(sv) - 1);
9774                             }
9775                             continue;
9776                         }
9777                     }
9778                     if (PL_multi_open == PL_multi_close) {
9779                         cont = FALSE;
9780                     }
9781                     else {
9782                         const char *t;
9783                         char *w;
9784                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9785                             /* At here, all closes are "was quoted" one,
9786                                so we don't check PL_multi_close. */
9787                             if (*t == '\\') {
9788                                 if (!keep_quoted && *(t+1) == PL_multi_open)
9789                                     t++;
9790                                 else
9791                                     *w++ = *t++;
9792                             }
9793                             else if (*t == PL_multi_open)
9794                                 brackets++;
9795
9796                             *w = *t;
9797                         }
9798                         if (w < t) {
9799                             *w++ = term;
9800                             *w = '\0';
9801                             SvCUR_set(sv, w - SvPVX_const(sv));
9802                         }
9803                         last_off = w - SvPVX(sv);
9804                         if (--brackets <= 0)
9805                             cont = FALSE;
9806                     }
9807                 }
9808             }
9809             if (!keep_delims) {
9810                 SvCUR_set(sv, SvCUR(sv) - 1);
9811                 *SvEND(sv) = '\0';
9812             }
9813             break;
9814         }
9815
9816         /* extend sv if need be */
9817         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9818         /* set 'to' to the next character in the sv's string */
9819         to = SvPVX(sv)+SvCUR(sv);
9820
9821         /* if open delimiter is the close delimiter read unbridle */
9822         if (PL_multi_open == PL_multi_close) {
9823             for (; s < PL_bufend; s++,to++) {
9824                 /* embedded newlines increment the current line number */
9825                 if (*s == '\n' && !PL_rsfp)
9826                     CopLINE_inc(PL_curcop);
9827                 /* handle quoted delimiters */
9828                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9829                     if (!keep_quoted && s[1] == term)
9830                         s++;
9831                 /* any other quotes are simply copied straight through */
9832                     else
9833                         *to++ = *s++;
9834                 }
9835                 /* terminate when run out of buffer (the for() condition), or
9836                    have found the terminator */
9837                 else if (*s == term) {
9838                     if (termlen == 1)
9839                         break;
9840                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9841                         break;
9842                 }
9843                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9844                     has_utf8 = TRUE;
9845                 *to = *s;
9846             }
9847         }
9848
9849         /* if the terminator isn't the same as the start character (e.g.,
9850            matched brackets), we have to allow more in the quoting, and
9851            be prepared for nested brackets.
9852         */
9853         else {
9854             /* read until we run out of string, or we find the terminator */
9855             for (; s < PL_bufend; s++,to++) {
9856                 /* embedded newlines increment the line count */
9857                 if (*s == '\n' && !PL_rsfp)
9858                     CopLINE_inc(PL_curcop);
9859                 /* backslashes can escape the open or closing characters */
9860                 if (*s == '\\' && s+1 < PL_bufend) {
9861                     if (!keep_quoted &&
9862                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9863                         s++;
9864                     else
9865                         *to++ = *s++;
9866                 }
9867                 /* allow nested opens and closes */
9868                 else if (*s == PL_multi_close && --brackets <= 0)
9869                     break;
9870                 else if (*s == PL_multi_open)
9871                     brackets++;
9872                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9873                     has_utf8 = TRUE;
9874                 *to = *s;
9875             }
9876         }
9877         /* terminate the copied string and update the sv's end-of-string */
9878         *to = '\0';
9879         SvCUR_set(sv, to - SvPVX_const(sv));
9880
9881         /*
9882          * this next chunk reads more into the buffer if we're not done yet
9883          */
9884
9885         if (s < PL_bufend)
9886             break;              /* handle case where we are done yet :-) */
9887
9888 #ifndef PERL_STRICT_CR
9889         if (to - SvPVX_const(sv) >= 2) {
9890             if ((to[-2] == '\r' && to[-1] == '\n') ||
9891                 (to[-2] == '\n' && to[-1] == '\r'))
9892             {
9893                 to[-2] = '\n';
9894                 to--;
9895                 SvCUR_set(sv, to - SvPVX_const(sv));
9896             }
9897             else if (to[-1] == '\r')
9898                 to[-1] = '\n';
9899         }
9900         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9901             to[-1] = '\n';
9902 #endif
9903
9904      read_more_line:
9905         /* if we're out of file, or a read fails, bail and reset the current
9906            line marker so we can report where the unterminated string began
9907         */
9908 #ifdef PERL_MAD
9909         if (PL_madskills) {
9910             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9911             if (PL_thisstuff)
9912                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9913             else
9914                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9915         }
9916 #endif
9917         CopLINE_inc(PL_curcop);
9918         PL_bufptr = PL_bufend;
9919         if (!lex_next_chunk(0)) {
9920             sv_free(sv);
9921             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9922             return NULL;
9923         }
9924         s = PL_bufptr;
9925 #ifdef PERL_MAD
9926         stuffstart = 0;
9927 #endif
9928     }
9929
9930     /* at this point, we have successfully read the delimited string */
9931
9932     if (!PL_encoding || UTF) {
9933 #ifdef PERL_MAD
9934         if (PL_madskills) {
9935             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9936             const int len = s - tstart;
9937             if (PL_thisstuff)
9938                 sv_catpvn(PL_thisstuff, tstart, len);
9939             else
9940                 PL_thisstuff = newSVpvn(tstart, len);
9941             if (!PL_thisclose && !keep_delims)
9942                 PL_thisclose = newSVpvn(s,termlen);
9943         }
9944 #endif
9945
9946         if (keep_delims)
9947             sv_catpvn(sv, s, termlen);
9948         s += termlen;
9949     }
9950 #ifdef PERL_MAD
9951     else {
9952         if (PL_madskills) {
9953             char * const tstart = SvPVX(PL_linestr) + stuffstart;
9954             const int len = s - tstart - termlen;
9955             if (PL_thisstuff)
9956                 sv_catpvn(PL_thisstuff, tstart, len);
9957             else
9958                 PL_thisstuff = newSVpvn(tstart, len);
9959             if (!PL_thisclose && !keep_delims)
9960                 PL_thisclose = newSVpvn(s - termlen,termlen);
9961         }
9962     }
9963 #endif
9964     if (has_utf8 || PL_encoding)
9965         SvUTF8_on(sv);
9966
9967     PL_multi_end = CopLINE(PL_curcop);
9968
9969     /* if we allocated too much space, give some back */
9970     if (SvCUR(sv) + 5 < SvLEN(sv)) {
9971         SvLEN_set(sv, SvCUR(sv) + 1);
9972         SvPV_renew(sv, SvLEN(sv));
9973     }
9974
9975     /* decide whether this is the first or second quoted string we've read
9976        for this op
9977     */
9978
9979     if (PL_lex_stuff)
9980         PL_lex_repl = sv;
9981     else
9982         PL_lex_stuff = sv;
9983     return s;
9984 }
9985
9986 /*
9987   scan_num
9988   takes: pointer to position in buffer
9989   returns: pointer to new position in buffer
9990   side-effects: builds ops for the constant in pl_yylval.op
9991
9992   Read a number in any of the formats that Perl accepts:
9993
9994   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
9995   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
9996   0b[01](_?[01])*
9997   0[0-7](_?[0-7])*
9998   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
9999
10000   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10001   thing it reads.
10002
10003   If it reads a number without a decimal point or an exponent, it will
10004   try converting the number to an integer and see if it can do so
10005   without loss of precision.
10006 */
10007
10008 char *
10009 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10010 {
10011     dVAR;
10012     register const char *s = start;     /* current position in buffer */
10013     register char *d;                   /* destination in temp buffer */
10014     register char *e;                   /* end of temp buffer */
10015     NV nv;                              /* number read, as a double */
10016     SV *sv = NULL;                      /* place to put the converted number */
10017     bool floatit;                       /* boolean: int or float? */
10018     const char *lastub = NULL;          /* position of last underbar */
10019     static char const number_too_long[] = "Number too long";
10020
10021     PERL_ARGS_ASSERT_SCAN_NUM;
10022
10023     /* We use the first character to decide what type of number this is */
10024
10025     switch (*s) {
10026     default:
10027       Perl_croak(aTHX_ "panic: scan_num");
10028
10029     /* if it starts with a 0, it could be an octal number, a decimal in
10030        0.13 disguise, or a hexadecimal number, or a binary number. */
10031     case '0':
10032         {
10033           /* variables:
10034              u          holds the "number so far"
10035              shift      the power of 2 of the base
10036                         (hex == 4, octal == 3, binary == 1)
10037              overflowed was the number more than we can hold?
10038
10039              Shift is used when we add a digit.  It also serves as an "are
10040              we in octal/hex/binary?" indicator to disallow hex characters
10041              when in octal mode.
10042            */
10043             NV n = 0.0;
10044             UV u = 0;
10045             I32 shift;
10046             bool overflowed = FALSE;
10047             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10048             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10049             static const char* const bases[5] =
10050               { "", "binary", "", "octal", "hexadecimal" };
10051             static const char* const Bases[5] =
10052               { "", "Binary", "", "Octal", "Hexadecimal" };
10053             static const char* const maxima[5] =
10054               { "",
10055                 "0b11111111111111111111111111111111",
10056                 "",
10057                 "037777777777",
10058                 "0xffffffff" };
10059             const char *base, *Base, *max;
10060
10061             /* check for hex */
10062             if (s[1] == 'x' || s[1] == 'X') {
10063                 shift = 4;
10064                 s += 2;
10065                 just_zero = FALSE;
10066             } else if (s[1] == 'b' || s[1] == 'B') {
10067                 shift = 1;
10068                 s += 2;
10069                 just_zero = FALSE;
10070             }
10071             /* check for a decimal in disguise */
10072             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10073                 goto decimal;
10074             /* so it must be octal */
10075             else {
10076                 shift = 3;
10077                 s++;
10078             }
10079
10080             if (*s == '_') {
10081                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10082                                "Misplaced _ in number");
10083                lastub = s++;
10084             }
10085
10086             base = bases[shift];
10087             Base = Bases[shift];
10088             max  = maxima[shift];
10089
10090             /* read the rest of the number */
10091             for (;;) {
10092                 /* x is used in the overflow test,
10093                    b is the digit we're adding on. */
10094                 UV x, b;
10095
10096                 switch (*s) {
10097
10098                 /* if we don't mention it, we're done */
10099                 default:
10100                     goto out;
10101
10102                 /* _ are ignored -- but warned about if consecutive */
10103                 case '_':
10104                     if (lastub && s == lastub + 1)
10105                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10106                                        "Misplaced _ in number");
10107                     lastub = s++;
10108                     break;
10109
10110                 /* 8 and 9 are not octal */
10111                 case '8': case '9':
10112                     if (shift == 3)
10113                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10114                     /* FALL THROUGH */
10115
10116                 /* octal digits */
10117                 case '2': case '3': case '4':
10118                 case '5': case '6': case '7':
10119                     if (shift == 1)
10120                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10121                     /* FALL THROUGH */
10122
10123                 case '0': case '1':
10124                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10125                     goto digit;
10126
10127                 /* hex digits */
10128                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10129                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10130                     /* make sure they said 0x */
10131                     if (shift != 4)
10132                         goto out;
10133                     b = (*s++ & 7) + 9;
10134
10135                     /* Prepare to put the digit we have onto the end
10136                        of the number so far.  We check for overflows.
10137                     */
10138
10139                   digit:
10140                     just_zero = FALSE;
10141                     if (!overflowed) {
10142                         x = u << shift; /* make room for the digit */
10143
10144                         if ((x >> shift) != u
10145                             && !(PL_hints & HINT_NEW_BINARY)) {
10146                             overflowed = TRUE;
10147                             n = (NV) u;
10148                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10149                                              "Integer overflow in %s number",
10150                                              base);
10151                         } else
10152                             u = x | b;          /* add the digit to the end */
10153                     }
10154                     if (overflowed) {
10155                         n *= nvshift[shift];
10156                         /* If an NV has not enough bits in its
10157                          * mantissa to represent an UV this summing of
10158                          * small low-order numbers is a waste of time
10159                          * (because the NV cannot preserve the
10160                          * low-order bits anyway): we could just
10161                          * remember when did we overflow and in the
10162                          * end just multiply n by the right
10163                          * amount. */
10164                         n += (NV) b;
10165                     }
10166                     break;
10167                 }
10168             }
10169
10170           /* if we get here, we had success: make a scalar value from
10171              the number.
10172           */
10173           out:
10174
10175             /* final misplaced underbar check */
10176             if (s[-1] == '_') {
10177                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10178             }
10179
10180             if (overflowed) {
10181                 if (n > 4294967295.0)
10182                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10183                                    "%s number > %s non-portable",
10184                                    Base, max);
10185                 sv = newSVnv(n);
10186             }
10187             else {
10188 #if UVSIZE > 4
10189                 if (u > 0xffffffff)
10190                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10191                                    "%s number > %s non-portable",
10192                                    Base, max);
10193 #endif
10194                 sv = newSVuv(u);
10195             }
10196             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10197                 sv = new_constant(start, s - start, "integer",
10198                                   sv, NULL, NULL, 0);
10199             else if (PL_hints & HINT_NEW_BINARY)
10200                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10201         }
10202         break;
10203
10204     /*
10205       handle decimal numbers.
10206       we're also sent here when we read a 0 as the first digit
10207     */
10208     case '1': case '2': case '3': case '4': case '5':
10209     case '6': case '7': case '8': case '9': case '.':
10210       decimal:
10211         d = PL_tokenbuf;
10212         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10213         floatit = FALSE;
10214
10215         /* read next group of digits and _ and copy into d */
10216         while (isDIGIT(*s) || *s == '_') {
10217             /* skip underscores, checking for misplaced ones
10218                if -w is on
10219             */
10220             if (*s == '_') {
10221                 if (lastub && s == lastub + 1)
10222                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10223                                    "Misplaced _ in number");
10224                 lastub = s++;
10225             }
10226             else {
10227                 /* check for end of fixed-length buffer */
10228                 if (d >= e)
10229                     Perl_croak(aTHX_ number_too_long);
10230                 /* if we're ok, copy the character */
10231                 *d++ = *s++;
10232             }
10233         }
10234
10235         /* final misplaced underbar check */
10236         if (lastub && s == lastub + 1) {
10237             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10238         }
10239
10240         /* read a decimal portion if there is one.  avoid
10241            3..5 being interpreted as the number 3. followed
10242            by .5
10243         */
10244         if (*s == '.' && s[1] != '.') {
10245             floatit = TRUE;
10246             *d++ = *s++;
10247
10248             if (*s == '_') {
10249                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10250                                "Misplaced _ in number");
10251                 lastub = s;
10252             }
10253
10254             /* copy, ignoring underbars, until we run out of digits.
10255             */
10256             for (; isDIGIT(*s) || *s == '_'; s++) {
10257                 /* fixed length buffer check */
10258                 if (d >= e)
10259                     Perl_croak(aTHX_ number_too_long);
10260                 if (*s == '_') {
10261                    if (lastub && s == lastub + 1)
10262                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10263                                       "Misplaced _ in number");
10264                    lastub = s;
10265                 }
10266                 else
10267                     *d++ = *s;
10268             }
10269             /* fractional part ending in underbar? */
10270             if (s[-1] == '_') {
10271                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10272                                "Misplaced _ in number");
10273             }
10274             if (*s == '.' && isDIGIT(s[1])) {
10275                 /* oops, it's really a v-string, but without the "v" */
10276                 s = start;
10277                 goto vstring;
10278             }
10279         }
10280
10281         /* read exponent part, if present */
10282         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10283             floatit = TRUE;
10284             s++;
10285
10286             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10287             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10288
10289             /* stray preinitial _ */
10290             if (*s == '_') {
10291                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10292                                "Misplaced _ in number");
10293                 lastub = s++;
10294             }
10295
10296             /* allow positive or negative exponent */
10297             if (*s == '+' || *s == '-')
10298                 *d++ = *s++;
10299
10300             /* stray initial _ */
10301             if (*s == '_') {
10302                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10303                                "Misplaced _ in number");
10304                 lastub = s++;
10305             }
10306
10307             /* read digits of exponent */
10308             while (isDIGIT(*s) || *s == '_') {
10309                 if (isDIGIT(*s)) {
10310                     if (d >= e)
10311                         Perl_croak(aTHX_ number_too_long);
10312                     *d++ = *s++;
10313                 }
10314                 else {
10315                    if (((lastub && s == lastub + 1) ||
10316                         (!isDIGIT(s[1]) && s[1] != '_')))
10317                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10318                                       "Misplaced _ in number");
10319                    lastub = s++;
10320                 }
10321             }
10322         }
10323
10324
10325         /*
10326            We try to do an integer conversion first if no characters
10327            indicating "float" have been found.
10328          */
10329
10330         if (!floatit) {
10331             UV uv;
10332             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10333
10334             if (flags == IS_NUMBER_IN_UV) {
10335               if (uv <= IV_MAX)
10336                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10337               else
10338                 sv = newSVuv(uv);
10339             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10340               if (uv <= (UV) IV_MIN)
10341                 sv = newSViv(-(IV)uv);
10342               else
10343                 floatit = TRUE;
10344             } else
10345               floatit = TRUE;
10346         }
10347         if (floatit) {
10348             /* terminate the string */
10349             *d = '\0';
10350             nv = Atof(PL_tokenbuf);
10351             sv = newSVnv(nv);
10352         }
10353
10354         if ( floatit
10355              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10356             const char *const key = floatit ? "float" : "integer";
10357             const STRLEN keylen = floatit ? 5 : 7;
10358             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10359                                 key, keylen, sv, NULL, NULL, 0);
10360         }
10361         break;
10362
10363     /* if it starts with a v, it could be a v-string */
10364     case 'v':
10365 vstring:
10366                 sv = newSV(5); /* preallocate storage space */
10367                 s = scan_vstring(s, PL_bufend, sv);
10368         break;
10369     }
10370
10371     /* make the op for the constant and return */
10372
10373     if (sv)
10374         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10375     else
10376         lvalp->opval = NULL;
10377
10378     return (char *)s;
10379 }
10380
10381 STATIC char *
10382 S_scan_formline(pTHX_ register char *s)
10383 {
10384     dVAR;
10385     register char *eol;
10386     register char *t;
10387     SV * const stuff = newSVpvs("");
10388     bool needargs = FALSE;
10389     bool eofmt = FALSE;
10390 #ifdef PERL_MAD
10391     char *tokenstart = s;
10392     SV* savewhite = NULL;
10393
10394     if (PL_madskills) {
10395         savewhite = PL_thiswhite;
10396         PL_thiswhite = 0;
10397     }
10398 #endif
10399
10400     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10401
10402     while (!needargs) {
10403         if (*s == '.') {
10404             t = s+1;
10405 #ifdef PERL_STRICT_CR
10406             while (SPACE_OR_TAB(*t))
10407                 t++;
10408 #else
10409             while (SPACE_OR_TAB(*t) || *t == '\r')
10410                 t++;
10411 #endif
10412             if (*t == '\n' || t == PL_bufend) {
10413                 eofmt = TRUE;
10414                 break;
10415             }
10416         }
10417         if (PL_in_eval && !PL_rsfp) {
10418             eol = (char *) memchr(s,'\n',PL_bufend-s);
10419             if (!eol++)
10420                 eol = PL_bufend;
10421         }
10422         else
10423             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10424         if (*s != '#') {
10425             for (t = s; t < eol; t++) {
10426                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10427                     needargs = FALSE;
10428                     goto enough;        /* ~~ must be first line in formline */
10429                 }
10430                 if (*t == '@' || *t == '^')
10431                     needargs = TRUE;
10432             }
10433             if (eol > s) {
10434                 sv_catpvn(stuff, s, eol-s);
10435 #ifndef PERL_STRICT_CR
10436                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10437                     char *end = SvPVX(stuff) + SvCUR(stuff);
10438                     end[-2] = '\n';
10439                     end[-1] = '\0';
10440                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10441                 }
10442 #endif
10443             }
10444             else
10445               break;
10446         }
10447         s = (char*)eol;
10448         if (PL_rsfp) {
10449             bool got_some;
10450 #ifdef PERL_MAD
10451             if (PL_madskills) {
10452                 if (PL_thistoken)
10453                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10454                 else
10455                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10456             }
10457 #endif
10458             PL_bufptr = PL_bufend;
10459             CopLINE_inc(PL_curcop);
10460             got_some = lex_next_chunk(0);
10461             CopLINE_dec(PL_curcop);
10462             s = PL_bufptr;
10463 #ifdef PERL_MAD
10464             tokenstart = PL_bufptr;
10465 #endif
10466             if (!got_some)
10467                 break;
10468         }
10469         incline(s);
10470     }
10471   enough:
10472     if (SvCUR(stuff)) {
10473         PL_expect = XTERM;
10474         if (needargs) {
10475             PL_lex_state = LEX_NORMAL;
10476             start_force(PL_curforce);
10477             NEXTVAL_NEXTTOKE.ival = 0;
10478             force_next(',');
10479         }
10480         else
10481             PL_lex_state = LEX_FORMLINE;
10482         if (!IN_BYTES) {
10483             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10484                 SvUTF8_on(stuff);
10485             else if (PL_encoding)
10486                 sv_recode_to_utf8(stuff, PL_encoding);
10487         }
10488         start_force(PL_curforce);
10489         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10490         force_next(THING);
10491         start_force(PL_curforce);
10492         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10493         force_next(LSTOP);
10494     }
10495     else {
10496         SvREFCNT_dec(stuff);
10497         if (eofmt)
10498             PL_lex_formbrack = 0;
10499         PL_bufptr = s;
10500     }
10501 #ifdef PERL_MAD
10502     if (PL_madskills) {
10503         if (PL_thistoken)
10504             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10505         else
10506             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10507         PL_thiswhite = savewhite;
10508     }
10509 #endif
10510     return s;
10511 }
10512
10513 I32
10514 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10515 {
10516     dVAR;
10517     const I32 oldsavestack_ix = PL_savestack_ix;
10518     CV* const outsidecv = PL_compcv;
10519
10520     if (PL_compcv) {
10521         assert(SvTYPE(PL_compcv) == SVt_PVCV);
10522     }
10523     SAVEI32(PL_subline);
10524     save_item(PL_subname);
10525     SAVESPTR(PL_compcv);
10526
10527     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10528     CvFLAGS(PL_compcv) |= flags;
10529
10530     PL_subline = CopLINE(PL_curcop);
10531     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10532     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10533     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10534
10535     return oldsavestack_ix;
10536 }
10537
10538 #ifdef __SC__
10539 #pragma segment Perl_yylex
10540 #endif
10541 static int
10542 S_yywarn(pTHX_ const char *const s)
10543 {
10544     dVAR;
10545
10546     PERL_ARGS_ASSERT_YYWARN;
10547
10548     PL_in_eval |= EVAL_WARNONLY;
10549     yyerror(s);
10550     PL_in_eval &= ~EVAL_WARNONLY;
10551     return 0;
10552 }
10553
10554 int
10555 Perl_yyerror(pTHX_ const char *const s)
10556 {
10557     dVAR;
10558     const char *where = NULL;
10559     const char *context = NULL;
10560     int contlen = -1;
10561     SV *msg;
10562     int yychar  = PL_parser->yychar;
10563
10564     PERL_ARGS_ASSERT_YYERROR;
10565
10566     if (!yychar || (yychar == ';' && !PL_rsfp))
10567         where = "at EOF";
10568     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10569       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10570       PL_oldbufptr != PL_bufptr) {
10571         /*
10572                 Only for NetWare:
10573                 The code below is removed for NetWare because it abends/crashes on NetWare
10574                 when the script has error such as not having the closing quotes like:
10575                     if ($var eq "value)
10576                 Checking of white spaces is anyway done in NetWare code.
10577         */
10578 #ifndef NETWARE
10579         while (isSPACE(*PL_oldoldbufptr))
10580             PL_oldoldbufptr++;
10581 #endif
10582         context = PL_oldoldbufptr;
10583         contlen = PL_bufptr - PL_oldoldbufptr;
10584     }
10585     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10586       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10587         /*
10588                 Only for NetWare:
10589                 The code below is removed for NetWare because it abends/crashes on NetWare
10590                 when the script has error such as not having the closing quotes like:
10591                     if ($var eq "value)
10592                 Checking of white spaces is anyway done in NetWare code.
10593         */
10594 #ifndef NETWARE
10595         while (isSPACE(*PL_oldbufptr))
10596             PL_oldbufptr++;
10597 #endif
10598         context = PL_oldbufptr;
10599         contlen = PL_bufptr - PL_oldbufptr;
10600     }
10601     else if (yychar > 255)
10602         where = "next token ???";
10603     else if (yychar == -2) { /* YYEMPTY */
10604         if (PL_lex_state == LEX_NORMAL ||
10605            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10606             where = "at end of line";
10607         else if (PL_lex_inpat)
10608             where = "within pattern";
10609         else
10610             where = "within string";
10611     }
10612     else {
10613         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
10614         if (yychar < 32)
10615             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10616         else if (isPRINT_LC(yychar)) {
10617             const char string = yychar;
10618             sv_catpvn(where_sv, &string, 1);
10619         }
10620         else
10621             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10622         where = SvPVX_const(where_sv);
10623     }
10624     msg = sv_2mortal(newSVpv(s, 0));
10625     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10626         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10627     if (context)
10628         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10629     else
10630         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10631     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10632         Perl_sv_catpvf(aTHX_ msg,
10633         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10634                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10635         PL_multi_end = 0;
10636     }
10637     if (PL_in_eval & EVAL_WARNONLY) {
10638         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10639     }
10640     else
10641         qerror(msg);
10642     if (PL_error_count >= 10) {
10643         if (PL_in_eval && SvCUR(ERRSV))
10644             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10645                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
10646         else
10647             Perl_croak(aTHX_ "%s has too many errors.\n",
10648             OutCopFILE(PL_curcop));
10649     }
10650     PL_in_my = 0;
10651     PL_in_my_stash = NULL;
10652     return 0;
10653 }
10654 #ifdef __SC__
10655 #pragma segment Main
10656 #endif
10657
10658 STATIC char*
10659 S_swallow_bom(pTHX_ U8 *s)
10660 {
10661     dVAR;
10662     const STRLEN slen = SvCUR(PL_linestr);
10663
10664     PERL_ARGS_ASSERT_SWALLOW_BOM;
10665
10666     switch (s[0]) {
10667     case 0xFF:
10668         if (s[1] == 0xFE) {
10669             /* UTF-16 little-endian? (or UTF-32LE?) */
10670             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10671                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10672 #ifndef PERL_NO_UTF16_FILTER
10673             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10674             s += 2;
10675             if (PL_bufend > (char*)s) {
10676                 s = add_utf16_textfilter(s, TRUE);
10677             }
10678 #else
10679             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10680 #endif
10681         }
10682         break;
10683     case 0xFE:
10684         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10685 #ifndef PERL_NO_UTF16_FILTER
10686             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10687             s += 2;
10688             if (PL_bufend > (char *)s) {
10689                 s = add_utf16_textfilter(s, FALSE);
10690             }
10691 #else
10692             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10693 #endif
10694         }
10695         break;
10696     case 0xEF:
10697         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10698             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10699             s += 3;                      /* UTF-8 */
10700         }
10701         break;
10702     case 0:
10703         if (slen > 3) {
10704              if (s[1] == 0) {
10705                   if (s[2] == 0xFE && s[3] == 0xFF) {
10706                        /* UTF-32 big-endian */
10707                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10708                   }
10709              }
10710              else if (s[2] == 0 && s[3] != 0) {
10711                   /* Leading bytes
10712                    * 00 xx 00 xx
10713                    * are a good indicator of UTF-16BE. */
10714 #ifndef PERL_NO_UTF16_FILTER
10715                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10716                   s = add_utf16_textfilter(s, FALSE);
10717 #else
10718                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10719 #endif
10720              }
10721         }
10722 #ifdef EBCDIC
10723     case 0xDD:
10724         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10725             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10726             s += 4;                      /* UTF-8 */
10727         }
10728         break;
10729 #endif
10730
10731     default:
10732          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10733                   /* Leading bytes
10734                    * xx 00 xx 00
10735                    * are a good indicator of UTF-16LE. */
10736 #ifndef PERL_NO_UTF16_FILTER
10737               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10738               s = add_utf16_textfilter(s, TRUE);
10739 #else
10740               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10741 #endif
10742          }
10743     }
10744     return (char*)s;
10745 }
10746
10747
10748 #ifndef PERL_NO_UTF16_FILTER
10749 static I32
10750 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10751 {
10752     dVAR;
10753     SV *const filter = FILTER_DATA(idx);
10754     /* We re-use this each time round, throwing the contents away before we
10755        return.  */
10756     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10757     SV *const utf8_buffer = filter;
10758     IV status = IoPAGE(filter);
10759     const bool reverse = cBOOL(IoLINES(filter));
10760     I32 retval;
10761
10762     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10763
10764     /* As we're automatically added, at the lowest level, and hence only called
10765        from this file, we can be sure that we're not called in block mode. Hence
10766        don't bother writing code to deal with block mode.  */
10767     if (maxlen) {
10768         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10769     }
10770     if (status < 0) {
10771         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10772     }
10773     DEBUG_P(PerlIO_printf(Perl_debug_log,
10774                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10775                           FPTR2DPTR(void *, S_utf16_textfilter),
10776                           reverse ? 'l' : 'b', idx, maxlen, status,
10777                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10778
10779     while (1) {
10780         STRLEN chars;
10781         STRLEN have;
10782         I32 newlen;
10783         U8 *end;
10784         /* First, look in our buffer of existing UTF-8 data:  */
10785         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10786
10787         if (nl) {
10788             ++nl;
10789         } else if (status == 0) {
10790             /* EOF */
10791             IoPAGE(filter) = 0;
10792             nl = SvEND(utf8_buffer);
10793         }
10794         if (nl) {
10795             STRLEN got = nl - SvPVX(utf8_buffer);
10796             /* Did we have anything to append?  */
10797             retval = got != 0;
10798             sv_catpvn(sv, SvPVX(utf8_buffer), got);
10799             /* Everything else in this code works just fine if SVp_POK isn't
10800                set.  This, however, needs it, and we need it to work, else
10801                we loop infinitely because the buffer is never consumed.  */
10802             sv_chop(utf8_buffer, nl);
10803             break;
10804         }
10805
10806         /* OK, not a complete line there, so need to read some more UTF-16.
10807            Read an extra octect if the buffer currently has an odd number. */
10808         while (1) {
10809             if (status <= 0)
10810                 break;
10811             if (SvCUR(utf16_buffer) >= 2) {
10812                 /* Location of the high octet of the last complete code point.
10813                    Gosh, UTF-16 is a pain. All the benefits of variable length,
10814                    *coupled* with all the benefits of partial reads and
10815                    endianness.  */
10816                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10817                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10818
10819                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10820                     break;
10821                 }
10822
10823                 /* We have the first half of a surrogate. Read more.  */
10824                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10825             }
10826
10827             status = FILTER_READ(idx + 1, utf16_buffer,
10828                                  160 + (SvCUR(utf16_buffer) & 1));
10829             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10830             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10831             if (status < 0) {
10832                 /* Error */
10833                 IoPAGE(filter) = status;
10834                 return status;
10835             }
10836         }
10837
10838         chars = SvCUR(utf16_buffer) >> 1;
10839         have = SvCUR(utf8_buffer);
10840         SvGROW(utf8_buffer, have + chars * 3 + 1);
10841
10842         if (reverse) {
10843             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10844                                          (U8*)SvPVX_const(utf8_buffer) + have,
10845                                          chars * 2, &newlen);
10846         } else {
10847             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10848                                 (U8*)SvPVX_const(utf8_buffer) + have,
10849                                 chars * 2, &newlen);
10850         }
10851         SvCUR_set(utf8_buffer, have + newlen);
10852         *end = '\0';
10853
10854         /* No need to keep this SV "well-formed" with a '\0' after the end, as
10855            it's private to us, and utf16_to_utf8{,reversed} take a
10856            (pointer,length) pair, rather than a NUL-terminated string.  */
10857         if(SvCUR(utf16_buffer) & 1) {
10858             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10859             SvCUR_set(utf16_buffer, 1);
10860         } else {
10861             SvCUR_set(utf16_buffer, 0);
10862         }
10863     }
10864     DEBUG_P(PerlIO_printf(Perl_debug_log,
10865                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10866                           status,
10867                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10868     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10869     return retval;
10870 }
10871
10872 static U8 *
10873 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10874 {
10875     SV *filter = filter_add(S_utf16_textfilter, NULL);
10876
10877     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10878
10879     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10880     sv_setpvs(filter, "");
10881     IoLINES(filter) = reversed;
10882     IoPAGE(filter) = 1; /* Not EOF */
10883
10884     /* Sadly, we have to return a valid pointer, come what may, so we have to
10885        ignore any error return from this.  */
10886     SvCUR_set(PL_linestr, 0);
10887     if (FILTER_READ(0, PL_linestr, 0)) {
10888         SvUTF8_on(PL_linestr);
10889     } else {
10890         SvUTF8_on(PL_linestr);
10891     }
10892     PL_bufend = SvEND(PL_linestr);
10893     return (U8*)SvPVX(PL_linestr);
10894 }
10895 #endif
10896
10897 /*
10898 Returns a pointer to the next character after the parsed
10899 vstring, as well as updating the passed in sv.
10900
10901 Function must be called like
10902
10903         sv = newSV(5);
10904         s = scan_vstring(s,e,sv);
10905
10906 where s and e are the start and end of the string.
10907 The sv should already be large enough to store the vstring
10908 passed in, for performance reasons.
10909
10910 */
10911
10912 char *
10913 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
10914 {
10915     dVAR;
10916     const char *pos = s;
10917     const char *start = s;
10918
10919     PERL_ARGS_ASSERT_SCAN_VSTRING;
10920
10921     if (*pos == 'v') pos++;  /* get past 'v' */
10922     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10923         pos++;
10924     if ( *pos != '.') {
10925         /* this may not be a v-string if followed by => */
10926         const char *next = pos;
10927         while (next < e && isSPACE(*next))
10928             ++next;
10929         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
10930             /* return string not v-string */
10931             sv_setpvn(sv,(char *)s,pos-s);
10932             return (char *)pos;
10933         }
10934     }
10935
10936     if (!isALPHA(*pos)) {
10937         U8 tmpbuf[UTF8_MAXBYTES+1];
10938
10939         if (*s == 'v')
10940             s++;  /* get past 'v' */
10941
10942         sv_setpvs(sv, "");
10943
10944         for (;;) {
10945             /* this is atoi() that tolerates underscores */
10946             U8 *tmpend;
10947             UV rev = 0;
10948             const char *end = pos;
10949             UV mult = 1;
10950             while (--end >= s) {
10951                 if (*end != '_') {
10952                     const UV orev = rev;
10953                     rev += (*end - '0') * mult;
10954                     mult *= 10;
10955                     if (orev > rev)
10956                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10957                                          "Integer overflow in decimal number");
10958                 }
10959             }
10960 #ifdef EBCDIC
10961             if (rev > 0x7FFFFFFF)
10962                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10963 #endif
10964             /* Append native character for the rev point */
10965             tmpend = uvchr_to_utf8(tmpbuf, rev);
10966             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10967             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10968                  SvUTF8_on(sv);
10969             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
10970                  s = ++pos;
10971             else {
10972                  s = pos;
10973                  break;
10974             }
10975             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
10976                  pos++;
10977         }
10978         SvPOK_on(sv);
10979         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10980         SvRMAGICAL_on(sv);
10981     }
10982     return (char *)s;
10983 }
10984
10985 int
10986 Perl_keyword_plugin_standard(pTHX_
10987         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10988 {
10989     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
10990     PERL_UNUSED_CONTEXT;
10991     PERL_UNUSED_ARG(keyword_ptr);
10992     PERL_UNUSED_ARG(keyword_len);
10993     PERL_UNUSED_ARG(op_ptr);
10994     return KEYWORD_PLUGIN_DECLINE;
10995 }
10996
10997 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
10998 static void
10999 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11000 {
11001     SAVEI32(PL_lex_brackets);
11002     if (PL_lex_brackets > 100)
11003         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11004     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11005     SAVEI32(PL_lex_allbrackets);
11006     PL_lex_allbrackets = 0;
11007     SAVEI8(PL_lex_fakeeof);
11008     PL_lex_fakeeof = (U8)fakeeof;
11009     if(yyparse(gramtype) && !PL_parser->error_count)
11010         qerror(Perl_mess(aTHX_ "Parse error"));
11011 }
11012
11013 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11014 static OP *
11015 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11016 {
11017     OP *o;
11018     ENTER;
11019     SAVEVPTR(PL_eval_root);
11020     PL_eval_root = NULL;
11021     parse_recdescent(gramtype, fakeeof);
11022     o = PL_eval_root;
11023     LEAVE;
11024     return o;
11025 }
11026
11027 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11028 static OP *
11029 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11030 {
11031     OP *exprop;
11032     if (flags & ~PARSE_OPTIONAL)
11033         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11034     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11035     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11036         if (!PL_parser->error_count)
11037             qerror(Perl_mess(aTHX_ "Parse error"));
11038         exprop = newOP(OP_NULL, 0);
11039     }
11040     return exprop;
11041 }
11042
11043 /*
11044 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11045
11046 Parse a Perl arithmetic expression.  This may contain operators of precedence
11047 down to the bit shift operators.  The expression must be followed (and thus
11048 terminated) either by a comparison or lower-precedence operator or by
11049 something that would normally terminate an expression such as semicolon.
11050 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11051 otherwise it is mandatory.  It is up to the caller to ensure that the
11052 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11053 the source of the code to be parsed and the lexical context for the
11054 expression.
11055
11056 The op tree representing the expression is returned.  If an optional
11057 expression is absent, a null pointer is returned, otherwise the pointer
11058 will be non-null.
11059
11060 If an error occurs in parsing or compilation, in most cases a valid op
11061 tree is returned anyway.  The error is reflected in the parser state,
11062 normally resulting in a single exception at the top level of parsing
11063 which covers all the compilation errors that occurred.  Some compilation
11064 errors, however, will throw an exception immediately.
11065
11066 =cut
11067 */
11068
11069 OP *
11070 Perl_parse_arithexpr(pTHX_ U32 flags)
11071 {
11072     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11073 }
11074
11075 /*
11076 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11077
11078 Parse a Perl term expression.  This may contain operators of precedence
11079 down to the assignment operators.  The expression must be followed (and thus
11080 terminated) either by a comma or lower-precedence operator or by
11081 something that would normally terminate an expression such as semicolon.
11082 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11083 otherwise it is mandatory.  It is up to the caller to ensure that the
11084 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11085 the source of the code to be parsed and the lexical context for the
11086 expression.
11087
11088 The op tree representing the expression is returned.  If an optional
11089 expression is absent, a null pointer is returned, otherwise the pointer
11090 will be non-null.
11091
11092 If an error occurs in parsing or compilation, in most cases a valid op
11093 tree is returned anyway.  The error is reflected in the parser state,
11094 normally resulting in a single exception at the top level of parsing
11095 which covers all the compilation errors that occurred.  Some compilation
11096 errors, however, will throw an exception immediately.
11097
11098 =cut
11099 */
11100
11101 OP *
11102 Perl_parse_termexpr(pTHX_ U32 flags)
11103 {
11104     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11105 }
11106
11107 /*
11108 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11109
11110 Parse a Perl list expression.  This may contain operators of precedence
11111 down to the comma operator.  The expression must be followed (and thus
11112 terminated) either by a low-precedence logic operator such as C<or> or by
11113 something that would normally terminate an expression such as semicolon.
11114 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11115 otherwise it is mandatory.  It is up to the caller to ensure that the
11116 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11117 the source of the code to be parsed and the lexical context for the
11118 expression.
11119
11120 The op tree representing the expression is returned.  If an optional
11121 expression is absent, a null pointer is returned, otherwise the pointer
11122 will be non-null.
11123
11124 If an error occurs in parsing or compilation, in most cases a valid op
11125 tree is returned anyway.  The error is reflected in the parser state,
11126 normally resulting in a single exception at the top level of parsing
11127 which covers all the compilation errors that occurred.  Some compilation
11128 errors, however, will throw an exception immediately.
11129
11130 =cut
11131 */
11132
11133 OP *
11134 Perl_parse_listexpr(pTHX_ U32 flags)
11135 {
11136     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11137 }
11138
11139 /*
11140 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11141
11142 Parse a single complete Perl expression.  This allows the full
11143 expression grammar, including the lowest-precedence operators such
11144 as C<or>.  The expression must be followed (and thus terminated) by a
11145 token that an expression would normally be terminated by: end-of-file,
11146 closing bracketing punctuation, semicolon, or one of the keywords that
11147 signals a postfix expression-statement modifier.  If I<flags> includes
11148 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11149 mandatory.  It is up to the caller to ensure that the dynamic parser
11150 state (L</PL_parser> et al) is correctly set to reflect the source of
11151 the code to be parsed and the lexical context for the expression.
11152
11153 The op tree representing the expression is returned.  If an optional
11154 expression is absent, a null pointer is returned, otherwise the pointer
11155 will be non-null.
11156
11157 If an error occurs in parsing or compilation, in most cases a valid op
11158 tree is returned anyway.  The error is reflected in the parser state,
11159 normally resulting in a single exception at the top level of parsing
11160 which covers all the compilation errors that occurred.  Some compilation
11161 errors, however, will throw an exception immediately.
11162
11163 =cut
11164 */
11165
11166 OP *
11167 Perl_parse_fullexpr(pTHX_ U32 flags)
11168 {
11169     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11170 }
11171
11172 /*
11173 =for apidoc Amx|OP *|parse_block|U32 flags
11174
11175 Parse a single complete Perl code block.  This consists of an opening
11176 brace, a sequence of statements, and a closing brace.  The block
11177 constitutes a lexical scope, so C<my> variables and various compile-time
11178 effects can be contained within it.  It is up to the caller to ensure
11179 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11180 reflect the source of the code to be parsed and the lexical context for
11181 the statement.
11182
11183 The op tree representing the code block is returned.  This is always a
11184 real op, never a null pointer.  It will normally be a C<lineseq> list,
11185 including C<nextstate> or equivalent ops.  No ops to construct any kind
11186 of runtime scope are included by virtue of it being a block.
11187
11188 If an error occurs in parsing or compilation, in most cases a valid op
11189 tree (most likely null) is returned anyway.  The error is reflected in
11190 the parser state, normally resulting in a single exception at the top
11191 level of parsing which covers all the compilation errors that occurred.
11192 Some compilation errors, however, will throw an exception immediately.
11193
11194 The I<flags> parameter is reserved for future use, and must always
11195 be zero.
11196
11197 =cut
11198 */
11199
11200 OP *
11201 Perl_parse_block(pTHX_ U32 flags)
11202 {
11203     if (flags)
11204         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11205     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11206 }
11207
11208 /*
11209 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11210
11211 Parse a single unadorned Perl statement.  This may be a normal imperative
11212 statement or a declaration that has compile-time effect.  It does not
11213 include any label or other affixture.  It is up to the caller to ensure
11214 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11215 reflect the source of the code to be parsed and the lexical context for
11216 the statement.
11217
11218 The op tree representing the statement is returned.  This may be a
11219 null pointer if the statement is null, for example if it was actually
11220 a subroutine definition (which has compile-time side effects).  If not
11221 null, it will be ops directly implementing the statement, suitable to
11222 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11223 equivalent op (except for those embedded in a scope contained entirely
11224 within the statement).
11225
11226 If an error occurs in parsing or compilation, in most cases a valid op
11227 tree (most likely null) is returned anyway.  The error is reflected in
11228 the parser state, normally resulting in a single exception at the top
11229 level of parsing which covers all the compilation errors that occurred.
11230 Some compilation errors, however, will throw an exception immediately.
11231
11232 The I<flags> parameter is reserved for future use, and must always
11233 be zero.
11234
11235 =cut
11236 */
11237
11238 OP *
11239 Perl_parse_barestmt(pTHX_ U32 flags)
11240 {
11241     if (flags)
11242         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11243     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11244 }
11245
11246 /*
11247 =for apidoc Amx|SV *|parse_label|U32 flags
11248
11249 Parse a single label, possibly optional, of the type that may prefix a
11250 Perl statement.  It is up to the caller to ensure that the dynamic parser
11251 state (L</PL_parser> et al) is correctly set to reflect the source of
11252 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11253 label is optional, otherwise it is mandatory.
11254
11255 The name of the label is returned in the form of a fresh scalar.  If an
11256 optional label is absent, a null pointer is returned.
11257
11258 If an error occurs in parsing, which can only occur if the label is
11259 mandatory, a valid label is returned anyway.  The error is reflected in
11260 the parser state, normally resulting in a single exception at the top
11261 level of parsing which covers all the compilation errors that occurred.
11262
11263 =cut
11264 */
11265
11266 SV *
11267 Perl_parse_label(pTHX_ U32 flags)
11268 {
11269     if (flags & ~PARSE_OPTIONAL)
11270         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11271     if (PL_lex_state == LEX_KNOWNEXT) {
11272         PL_parser->yychar = yylex();
11273         if (PL_parser->yychar == LABEL) {
11274             char *lpv = pl_yylval.pval;
11275             STRLEN llen = strlen(lpv);
11276             SV *lsv;
11277             PL_parser->yychar = YYEMPTY;
11278             lsv = newSV_type(SVt_PV);
11279             SvPV_set(lsv, lpv);
11280             SvCUR_set(lsv, llen);
11281             SvLEN_set(lsv, llen+1);
11282             SvPOK_on(lsv);
11283             return lsv;
11284         } else {
11285             yyunlex();
11286             goto no_label;
11287         }
11288     } else {
11289         char *s, *t;
11290         U8 c;
11291         STRLEN wlen, bufptr_pos;
11292         lex_read_space(0);
11293         t = s = PL_bufptr;
11294         c = (U8)*s;
11295         if (!isIDFIRST_A(c))
11296             goto no_label;
11297         do {
11298             c = (U8)*++t;
11299         } while(isWORDCHAR_A(c));
11300         wlen = t - s;
11301         if (word_takes_any_delimeter(s, wlen))
11302             goto no_label;
11303         bufptr_pos = s - SvPVX(PL_linestr);
11304         PL_bufptr = t;
11305         lex_read_space(LEX_KEEP_PREVIOUS);
11306         t = PL_bufptr;
11307         s = SvPVX(PL_linestr) + bufptr_pos;
11308         if (t[0] == ':' && t[1] != ':') {
11309             PL_oldoldbufptr = PL_oldbufptr;
11310             PL_oldbufptr = s;
11311             PL_bufptr = t+1;
11312             return newSVpvn(s, wlen);
11313         } else {
11314             PL_bufptr = s;
11315             no_label:
11316             if (flags & PARSE_OPTIONAL) {
11317                 return NULL;
11318             } else {
11319                 qerror(Perl_mess(aTHX_ "Parse error"));
11320                 return newSVpvs("x");
11321             }
11322         }
11323     }
11324 }
11325
11326 /*
11327 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11328
11329 Parse a single complete Perl statement.  This may be a normal imperative
11330 statement or a declaration that has compile-time effect, and may include
11331 optional labels.  It is up to the caller to ensure that the dynamic
11332 parser state (L</PL_parser> et al) is correctly set to reflect the source
11333 of the code to be parsed and the lexical context for the statement.
11334
11335 The op tree representing the statement is returned.  This may be a
11336 null pointer if the statement is null, for example if it was actually
11337 a subroutine definition (which has compile-time side effects).  If not
11338 null, it will be the result of a L</newSTATEOP> call, normally including
11339 a C<nextstate> or equivalent op.
11340
11341 If an error occurs in parsing or compilation, in most cases a valid op
11342 tree (most likely null) is returned anyway.  The error is reflected in
11343 the parser state, normally resulting in a single exception at the top
11344 level of parsing which covers all the compilation errors that occurred.
11345 Some compilation errors, however, will throw an exception immediately.
11346
11347 The I<flags> parameter is reserved for future use, and must always
11348 be zero.
11349
11350 =cut
11351 */
11352
11353 OP *
11354 Perl_parse_fullstmt(pTHX_ U32 flags)
11355 {
11356     if (flags)
11357         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11358     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11359 }
11360
11361 /*
11362 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11363
11364 Parse a sequence of zero or more Perl statements.  These may be normal
11365 imperative statements, including optional labels, or declarations
11366 that have compile-time effect, or any mixture thereof.  The statement
11367 sequence ends when a closing brace or end-of-file is encountered in a
11368 place where a new statement could have validly started.  It is up to
11369 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11370 is correctly set to reflect the source of the code to be parsed and the
11371 lexical context for the statements.
11372
11373 The op tree representing the statement sequence is returned.  This may
11374 be a null pointer if the statements were all null, for example if there
11375 were no statements or if there were only subroutine definitions (which
11376 have compile-time side effects).  If not null, it will be a C<lineseq>
11377 list, normally including C<nextstate> or equivalent ops.
11378
11379 If an error occurs in parsing or compilation, in most cases a valid op
11380 tree is returned anyway.  The error is reflected in the parser state,
11381 normally resulting in a single exception at the top level of parsing
11382 which covers all the compilation errors that occurred.  Some compilation
11383 errors, however, will throw an exception immediately.
11384
11385 The I<flags> parameter is reserved for future use, and must always
11386 be zero.
11387
11388 =cut
11389 */
11390
11391 OP *
11392 Perl_parse_stmtseq(pTHX_ U32 flags)
11393 {
11394     OP *stmtseqop;
11395     I32 c;
11396     if (flags)
11397         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11398     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11399     c = lex_peek_unichar(0);
11400     if (c != -1 && c != /*{*/'}')
11401         qerror(Perl_mess(aTHX_ "Parse error"));
11402     return stmtseqop;
11403 }
11404
11405 void
11406 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
11407 {
11408     PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
11409     deprecate("qw(...) as parentheses");
11410     force_next((4<<24)|')');
11411     if (qwlist->op_type == OP_STUB) {
11412         op_free(qwlist);
11413     }
11414     else {
11415         start_force(PL_curforce);
11416         NEXTVAL_NEXTTOKE.opval = qwlist;
11417         force_next(THING);
11418     }
11419     force_next((2<<24)|'(');
11420 }
11421
11422 /*
11423  * Local variables:
11424  * c-indentation-style: bsd
11425  * c-basic-offset: 4
11426  * indent-tabs-mode: t
11427  * End:
11428  *
11429  * ex: set ts=8 sts=4 sw=4 noet:
11430  */