toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XENUMMASK  0x3f
 130 #define XFAKEEOF   0x40
 131 #define XFAKEBRACK 0x80
 132
 133 #ifdef USE_UTF8_SCRIPTS
 134 #   define UTF (!IN_BYTES)
 135 #else
 136 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 137 #endif
 138
 139 /* The maximum number of characters preceding the unrecognized one to display */
 140 #define UNRECOGNIZED_PRECEDE_COUNT 10
 141
 142 /* In variables named $^X, these are the legal values for X.
 143  * 1999-02-27 mjd-perl-patch@plover.com */
 144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 145
 146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 147
 148 /* LEX_* are values for PL_lex_state, the state of the lexer.
 149  * They are arranged oddly so that the guard on the switch statement
 150  * can get by with a single comparison (if the compiler is smart enough).
 151  */
 152
 153 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 154
 155 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 156 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 157 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 158 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 159 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 160
 161                                    /* at end of code, eg "$x" followed by:  */
 162 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 163 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 164
 165 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 166                                         string or after \E, $foo, etc       */
 167 #define LEX_INTERPCONST          2 /* NOT USED */
 168 #define LEX_FORMLINE             1 /* expecting a format line               */
 169 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 170
 171
 172 #ifdef DEBUGGING
 173 static const char* const lex_state_names[] = {
 174     "KNOWNEXT",
 175     "FORMLINE",
 176     "INTERPCONST",
 177     "INTERPCONCAT",
 178     "INTERPENDMAYBE",
 179     "INTERPEND",
 180     "INTERPSTART",
 181     "INTERPPUSH",
 182     "INTERPCASEMOD",
 183     "INTERPNORMAL",
 184     "NORMAL"
 185 };
 186 #endif
 187
 188 #ifdef ff_next
 189 #undef ff_next
 190 #endif
 191
 192 #include "keywords.h"
 193
 194 /* CLINE is a macro that ensures PL_copline has a sane value */
 195
 196 #ifdef CLINE
 197 #undef CLINE
 198 #endif
 199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 200
 201 #ifdef PERL_MAD
 202 #  define SKIPSPACE0(s) skipspace0(s)
 203 #  define SKIPSPACE1(s) skipspace1(s)
 204 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 205 #  define PEEKSPACE(s) skipspace2(s,0)
 206 #else
 207 #  define SKIPSPACE0(s) skipspace(s)
 208 #  define SKIPSPACE1(s) skipspace(s)
 209 #  define SKIPSPACE2(s,tsv) skipspace(s)
 210 #  define PEEKSPACE(s) skipspace(s)
 211 #endif
 212
 213 /*
 214  * Convenience functions to return different tokens and prime the
 215  * lexer for the next token.  They all take an argument.
 216  *
 217  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 218  * OPERATOR     : generic operator
 219  * AOPERATOR    : assignment operator
 220  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 221  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 222  * PREREF       : *EXPR where EXPR is not a simple identifier
 223  * TERM         : expression term
 224  * LOOPX        : loop exiting command (goto, last, dump, etc)
 225  * FTST         : file test operator
 226  * FUN0         : zero-argument function
 227  * FUN0OP       : zero-argument function, with its op created in this file
 228  * FUN1         : not used, except for not, which isn't a UNIOP
 229  * BOop         : bitwise or or xor
 230  * BAop         : bitwise and
 231  * SHop         : shift operator
 232  * PWop         : power operator
 233  * PMop         : pattern-matching operator
 234  * Aop          : addition-level operator
 235  * Mop          : multiplication-level operator
 236  * Eop          : equality-testing operator
 237  * Rop          : relational operator <= != gt
 238  *
 239  * Also see LOP and lop() below.
 240  */
 241
 242 #ifdef DEBUGGING /* Serve -DT. */
 243 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 244 #else
 245 #   define REPORT(retval) (retval)
 246 #endif
 247
 248 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 249 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 250 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 251 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 252 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 253 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 254 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 255 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 256 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 257 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 258 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 259 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 260 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 261 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 262 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 263 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 264 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 265 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 266 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 267 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 268 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 269
 270 /* This bit of chicanery makes a unary function followed by
 271  * a parenthesis into a function with one argument, highest precedence.
 272  * The UNIDOR macro is for unary functions that can be followed by the //
 273  * operator (such as C<shift // 0>).
 274  */
 275 #define UNI2(f,x) { \
 276         pl_yylval.ival = f; \
 277         PL_expect = x; \
 278         PL_bufptr = s; \
 279         PL_last_uni = PL_oldbufptr; \
 280         PL_last_lop_op = f; \
 281         if (*s == '(') \
 282             return REPORT( (int)FUNC1 ); \
 283         s = PEEKSPACE(s); \
 284         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 285         }
 286 #define UNI(f)    UNI2(f,XTERM)
 287 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 288
 289 #define UNIBRACK(f) { \
 290         pl_yylval.ival = f; \
 291         PL_bufptr = s; \
 292         PL_last_uni = PL_oldbufptr; \
 293         if (*s == '(') \
 294             return REPORT( (int)FUNC1 ); \
 295         s = PEEKSPACE(s); \
 296         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 297         }
 298
 299 /* grandfather return to old style */
 300 #define OLDLOP(f) \
 301         do { \
 302             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 303                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 304             pl_yylval.ival = (f); \
 305             PL_expect = XTERM; \
 306             PL_bufptr = s; \
 307             return (int)LSTOP; \
 308         } while(0)
 309
 310 #ifdef DEBUGGING
 311
 312 /* how to interpret the pl_yylval associated with the token */
 313 enum token_type {
 314     TOKENTYPE_NONE,
 315     TOKENTYPE_IVAL,
 316     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 317     TOKENTYPE_PVAL,
 318     TOKENTYPE_OPVAL,
 319     TOKENTYPE_GVVAL
 320 };
 321
 322 static struct debug_tokens {
 323     const int token;
 324     enum token_type type;
 325     const char *name;
 326 } const debug_tokens[] =
 327 {
 328     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 329     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 330     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 331     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 332     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 333     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 334     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 335     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 336     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 337     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 338     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 339     { DO,               TOKENTYPE_NONE,         "DO" },
 340     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 341     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 342     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 343     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 344     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 345     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 346     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 347     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 348     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 349     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 350     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 351     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 352     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 353     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 354     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 355     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 356     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 357     { IF,               TOKENTYPE_IVAL,         "IF" },
 358     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 359     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 360     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 361     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 362     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 363     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 364     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 365     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 366     { MY,               TOKENTYPE_IVAL,         "MY" },
 367     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 368     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 369     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 370     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 371     { OROR,             TOKENTYPE_NONE,         "OROR" },
 372     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 373     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 374     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 375     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 376     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 377     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 378     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 379     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 380     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 381     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 382     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 383     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 384     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 385     { SUB,              TOKENTYPE_NONE,         "SUB" },
 386     { THING,            TOKENTYPE_OPVAL,        "THING" },
 387     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 388     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 389     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 390     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 391     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 392     { USE,              TOKENTYPE_IVAL,         "USE" },
 393     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 394     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 395     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 396     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 397     { 0,                TOKENTYPE_NONE,         NULL }
 398 };
 399
 400 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 401
 402 STATIC int
 403 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 404 {
 405     dVAR;
 406
 407     PERL_ARGS_ASSERT_TOKEREPORT;
 408
 409     if (DEBUG_T_TEST) {
 410         const char *name = NULL;
 411         enum token_type type = TOKENTYPE_NONE;
 412         const struct debug_tokens *p;
 413         SV* const report = newSVpvs("<== ");
 414
 415         for (p = debug_tokens; p->token; p++) {
 416             if (p->token == (int)rv) {
 417                 name = p->name;
 418                 type = p->type;
 419                 break;
 420             }
 421         }
 422         if (name)
 423             Perl_sv_catpv(aTHX_ report, name);
 424         else if ((char)rv > ' ' && (char)rv < '~')
 425             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 426         else if (!rv)
 427             sv_catpvs(report, "EOF");
 428         else
 429             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 430         switch (type) {
 431         case TOKENTYPE_NONE:
 432         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 433             break;
 434         case TOKENTYPE_IVAL:
 435             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 436             break;
 437         case TOKENTYPE_OPNUM:
 438             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 439                                     PL_op_name[lvalp->ival]);
 440             break;
 441         case TOKENTYPE_PVAL:
 442             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 443             break;
 444         case TOKENTYPE_OPVAL:
 445             if (lvalp->opval) {
 446                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 447                                     PL_op_name[lvalp->opval->op_type]);
 448                 if (lvalp->opval->op_type == OP_CONST) {
 449                     Perl_sv_catpvf(aTHX_ report, " %s",
 450                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 451                 }
 452
 453             }
 454             else
 455                 sv_catpvs(report, "(opval=null)");
 456             break;
 457         }
 458         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 459     };
 460     return (int)rv;
 461 }
 462
 463
 464 /* print the buffer with suitable escapes */
 465
 466 STATIC void
 467 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 468 {
 469     SV* const tmp = newSVpvs("");
 470
 471     PERL_ARGS_ASSERT_PRINTBUF;
 472
 473     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 474     SvREFCNT_dec(tmp);
 475 }
 476
 477 #endif
 478
 479 static int
 480 S_deprecate_commaless_var_list(pTHX) {
 481     PL_expect = XTERM;
 482     deprecate("comma-less variable list");
 483     return REPORT(','); /* grandfather non-comma-format format */
 484 }
 485
 486 /*
 487  * S_ao
 488  *
 489  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 490  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 491  */
 492
 493 STATIC int
 494 S_ao(pTHX_ int toketype)
 495 {
 496     dVAR;
 497     if (*PL_bufptr == '=') {
 498         PL_bufptr++;
 499         if (toketype == ANDAND)
 500             pl_yylval.ival = OP_ANDASSIGN;
 501         else if (toketype == OROR)
 502             pl_yylval.ival = OP_ORASSIGN;
 503         else if (toketype == DORDOR)
 504             pl_yylval.ival = OP_DORASSIGN;
 505         toketype = ASSIGNOP;
 506     }
 507     return toketype;
 508 }
 509
 510 /*
 511  * S_no_op
 512  * When Perl expects an operator and finds something else, no_op
 513  * prints the warning.  It always prints "<something> found where
 514  * operator expected.  It prints "Missing semicolon on previous line?"
 515  * if the surprise occurs at the start of the line.  "do you need to
 516  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 517  * where the compiler doesn't know if foo is a method call or a function.
 518  * It prints "Missing operator before end of line" if there's nothing
 519  * after the missing operator, or "... before <...>" if there is something
 520  * after the missing operator.
 521  */
 522
 523 STATIC void
 524 S_no_op(pTHX_ const char *const what, char *s)
 525 {
 526     dVAR;
 527     char * const oldbp = PL_bufptr;
 528     const bool is_first = (PL_oldbufptr == PL_linestart);
 529
 530     PERL_ARGS_ASSERT_NO_OP;
 531
 532     if (!s)
 533         s = oldbp;
 534     else
 535         PL_bufptr = s;
 536     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 537     if (ckWARN_d(WARN_SYNTAX)) {
 538         if (is_first)
 539             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 540                     "\t(Missing semicolon on previous line?)\n");
 541         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 542             const char *t;
 543             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 544                 NOOP;
 545             if (t < PL_bufptr && isSPACE(*t))
 546                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 547                         "\t(Do you need to predeclare %.*s?)\n",
 548                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 549         }
 550         else {
 551             assert(s >= oldbp);
 552             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 553                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 554         }
 555     }
 556     PL_bufptr = oldbp;
 557 }
 558
 559 /*
 560  * S_missingterm
 561  * Complain about missing quote/regexp/heredoc terminator.
 562  * If it's called with NULL then it cauterizes the line buffer.
 563  * If we're in a delimited string and the delimiter is a control
 564  * character, it's reformatted into a two-char sequence like ^C.
 565  * This is fatal.
 566  */
 567
 568 STATIC void
 569 S_missingterm(pTHX_ char *s)
 570 {
 571     dVAR;
 572     char tmpbuf[3];
 573     char q;
 574     if (s) {
 575         char * const nl = strrchr(s,'\n');
 576         if (nl)
 577             *nl = '\0';
 578     }
 579     else if (isCNTRL(PL_multi_close)) {
 580         *tmpbuf = '^';
 581         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 582         tmpbuf[2] = '\0';
 583         s = tmpbuf;
 584     }
 585     else {
 586         *tmpbuf = (char)PL_multi_close;
 587         tmpbuf[1] = '\0';
 588         s = tmpbuf;
 589     }
 590     q = strchr(s,'"') ? '\'' : '"';
 591     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 592 }
 593
 594 /*
 595  * Check whether the named feature is enabled.
 596  */
 597 bool
 598 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 599 {
 600     dVAR;
 601     HV * const hinthv = GvHV(PL_hintgv);
 602     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 603
 604     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 605
 606     if (namelen > MAX_FEATURE_LEN)
 607         return FALSE;
 608     memcpy(&he_name[8], name, namelen);
 609
 610     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 611 }
 612
 613 /*
 614  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 615  * utf16-to-utf8-reversed.
 616  */
 617
 618 #ifdef PERL_CR_FILTER
 619 static void
 620 strip_return(SV *sv)
 621 {
 622     register const char *s = SvPVX_const(sv);
 623     register const char * const e = s + SvCUR(sv);
 624
 625     PERL_ARGS_ASSERT_STRIP_RETURN;
 626
 627     /* outer loop optimized to do nothing if there are no CR-LFs */
 628     while (s < e) {
 629         if (*s++ == '\r' && *s == '\n') {
 630             /* hit a CR-LF, need to copy the rest */
 631             register char *d = s - 1;
 632             *d++ = *s++;
 633             while (s < e) {
 634                 if (*s == '\r' && s[1] == '\n')
 635                     s++;
 636                 *d++ = *s++;
 637             }
 638             SvCUR(sv) -= s - d;
 639             return;
 640         }
 641     }
 642 }
 643
 644 STATIC I32
 645 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 646 {
 647     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 648     if (count > 0 && !maxlen)
 649         strip_return(sv);
 650     return count;
 651 }
 652 #endif
 653
 654 /*
 655 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 656
 657 Creates and initialises a new lexer/parser state object, supplying
 658 a context in which to lex and parse from a new source of Perl code.
 659 A pointer to the new state object is placed in L</PL_parser>.  An entry
 660 is made on the save stack so that upon unwinding the new state object
 661 will be destroyed and the former value of L</PL_parser> will be restored.
 662 Nothing else need be done to clean up the parsing context.
 663
 664 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 665 non-null, provides a string (in SV form) containing code to be parsed.
 666 A copy of the string is made, so subsequent modification of I<line>
 667 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 668 from which code will be read to be parsed.  If both are non-null, the
 669 code in I<line> comes first and must consist of complete lines of input,
 670 and I<rsfp> supplies the remainder of the source.
 671
 672 The I<flags> parameter is reserved for future use, and must always
 673 be zero, except for one flag that is currently reserved for perl's internal
 674 use.
 675
 676 =cut
 677 */
 678
 679 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 680    can share filters with the current parser. */
 681
 682 void
 683 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 684 {
 685     dVAR;
 686     const char *s = NULL;
 687     STRLEN len;
 688     yy_parser *parser, *oparser;
 689     if (flags && flags & ~LEX_START_FLAGS)
 690         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 691
 692     /* create and initialise a parser */
 693
 694     Newxz(parser, 1, yy_parser);
 695     parser->old_parser = oparser = PL_parser;
 696     PL_parser = parser;
 697
 698     parser->stack = NULL;
 699     parser->ps = NULL;
 700     parser->stack_size = 0;
 701
 702     /* on scope exit, free this parser and restore any outer one */
 703     SAVEPARSER(parser);
 704     parser->saved_curcop = PL_curcop;
 705
 706     /* initialise lexer state */
 707
 708 #ifdef PERL_MAD
 709     parser->curforce = -1;
 710 #else
 711     parser->nexttoke = 0;
 712 #endif
 713     parser->error_count = oparser ? oparser->error_count : 0;
 714     parser->copline = NOLINE;
 715     parser->lex_state = LEX_NORMAL;
 716     parser->expect = XSTATE;
 717     parser->rsfp = rsfp;
 718     parser->rsfp_filters =
 719       !(flags & LEX_START_SAME_FILTER) || !oparser
 720         ? newAV()
 721         : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 722
 723     Newx(parser->lex_brackstack, 120, char);
 724     Newx(parser->lex_casestack, 12, char);
 725     *parser->lex_casestack = '\0';
 726
 727     if (line) {
 728         s = SvPV_const(line, len);
 729         parser->linestr = flags & LEX_START_COPIED
 730                             ? SvREFCNT_inc_simple_NN(line)
 731                             : newSVpvn_flags(s, len, SvUTF8(line));
 732         if (s[len-1] != ';')
 733             sv_catpvs(parser->linestr, "\n;");
 734     } else {
 735         parser->linestr = newSVpvs("\n;");
 736     }
 737     parser->oldoldbufptr =
 738         parser->oldbufptr =
 739         parser->bufptr =
 740         parser->linestart = SvPVX(parser->linestr);
 741     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 742     parser->last_lop = parser->last_uni = NULL;
 743     parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES);
 744
 745     parser->in_pod = parser->filtered = 0;
 746 }
 747
 748
 749 /* delete a parser object */
 750
 751 void
 752 Perl_parser_free(pTHX_  const yy_parser *parser)
 753 {
 754     PERL_ARGS_ASSERT_PARSER_FREE;
 755
 756     PL_curcop = parser->saved_curcop;
 757     SvREFCNT_dec(parser->linestr);
 758
 759     if (parser->rsfp == PerlIO_stdin())
 760         PerlIO_clearerr(parser->rsfp);
 761     else if (parser->rsfp && (!parser->old_parser ||
 762                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 763         PerlIO_close(parser->rsfp);
 764     SvREFCNT_dec(parser->rsfp_filters);
 765
 766     Safefree(parser->lex_brackstack);
 767     Safefree(parser->lex_casestack);
 768     PL_parser = parser->old_parser;
 769     Safefree(parser);
 770 }
 771
 772
 773 /*
 774 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 775
 776 Buffer scalar containing the chunk currently under consideration of the
 777 text currently being lexed.  This is always a plain string scalar (for
 778 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 779 normal scalar means; instead refer to the buffer directly by the pointer
 780 variables described below.
 781
 782 The lexer maintains various C<char*> pointers to things in the
 783 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 784 reallocated, all of these pointers must be updated.  Don't attempt to
 785 do this manually, but rather use L</lex_grow_linestr> if you need to
 786 reallocate the buffer.
 787
 788 The content of the text chunk in the buffer is commonly exactly one
 789 complete line of input, up to and including a newline terminator,
 790 but there are situations where it is otherwise.  The octets of the
 791 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 792 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 793 flag on this scalar, which may disagree with it.
 794
 795 For direct examination of the buffer, the variable
 796 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 797 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 798 of these pointers is usually preferable to examination of the scalar
 799 through normal scalar means.
 800
 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 802
 803 Direct pointer to the end of the chunk of text currently being lexed, the
 804 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 805 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 806 always located at the end of the buffer, and does not count as part of
 807 the buffer's contents.
 808
 809 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 810
 811 Points to the current position of lexing inside the lexer buffer.
 812 Characters around this point may be freely examined, within
 813 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 814 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 815 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 816
 817 Lexing code (whether in the Perl core or not) moves this pointer past
 818 the characters that it consumes.  It is also expected to perform some
 819 bookkeeping whenever a newline character is consumed.  This movement
 820 can be more conveniently performed by the function L</lex_read_to>,
 821 which handles newlines appropriately.
 822
 823 Interpretation of the buffer's octets can be abstracted out by
 824 using the slightly higher-level functions L</lex_peek_unichar> and
 825 L</lex_read_unichar>.
 826
 827 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 828
 829 Points to the start of the current line inside the lexer buffer.
 830 This is useful for indicating at which column an error occurred, and
 831 not much else.  This must be updated by any lexing code that consumes
 832 a newline; the function L</lex_read_to> handles this detail.
 833
 834 =cut
 835 */
 836
 837 /*
 838 =for apidoc Amx|bool|lex_bufutf8
 839
 840 Indicates whether the octets in the lexer buffer
 841 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 842 of Unicode characters.  If not, they should be interpreted as Latin-1
 843 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 844
 845 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 846 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 847 encoding.
 848
 849 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 850 is significant, but not the whole story regarding the input character
 851 encoding.  Normally, when a file is being read, the scalar contains octets
 852 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 853 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 854 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 855 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 856 is in effect.  This logic may change in the future; use this function
 857 instead of implementing the logic yourself.
 858
 859 =cut
 860 */
 861
 862 bool
 863 Perl_lex_bufutf8(pTHX)
 864 {
 865     return UTF;
 866 }
 867
 868 /*
 869 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 870
 871 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 872 at least I<len> octets (including terminating NUL).  Returns a
 873 pointer to the reallocated buffer.  This is necessary before making
 874 any direct modification of the buffer that would increase its length.
 875 L</lex_stuff_pvn> provides a more convenient way to insert text into
 876 the buffer.
 877
 878 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 879 this function updates all of the lexer's variables that point directly
 880 into the buffer.
 881
 882 =cut
 883 */
 884
 885 char *
 886 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 887 {
 888     SV *linestr;
 889     char *buf;
 890     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 891     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 892     linestr = PL_parser->linestr;
 893     buf = SvPVX(linestr);
 894     if (len <= SvLEN(linestr))
 895         return buf;
 896     bufend_pos = PL_parser->bufend - buf;
 897     bufptr_pos = PL_parser->bufptr - buf;
 898     oldbufptr_pos = PL_parser->oldbufptr - buf;
 899     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 900     linestart_pos = PL_parser->linestart - buf;
 901     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 902     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 903     buf = sv_grow(linestr, len);
 904     PL_parser->bufend = buf + bufend_pos;
 905     PL_parser->bufptr = buf + bufptr_pos;
 906     PL_parser->oldbufptr = buf + oldbufptr_pos;
 907     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 908     PL_parser->linestart = buf + linestart_pos;
 909     if (PL_parser->last_uni)
 910         PL_parser->last_uni = buf + last_uni_pos;
 911     if (PL_parser->last_lop)
 912         PL_parser->last_lop = buf + last_lop_pos;
 913     return buf;
 914 }
 915
 916 /*
 917 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 918
 919 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 920 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 921 reallocating the buffer if necessary.  This means that lexing code that
 922 runs later will see the characters as if they had appeared in the input.
 923 It is not recommended to do this as part of normal parsing, and most
 924 uses of this facility run the risk of the inserted characters being
 925 interpreted in an unintended manner.
 926
 927 The string to be inserted is represented by I<len> octets starting
 928 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 929 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 930 The characters are recoded for the lexer buffer, according to how the
 931 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 932 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 933 function is more convenient.
 934
 935 =cut
 936 */
 937
 938 void
 939 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 940 {
 941     dVAR;
 942     char *bufptr;
 943     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 944     if (flags & ~(LEX_STUFF_UTF8))
 945         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 946     if (UTF) {
 947         if (flags & LEX_STUFF_UTF8) {
 948             goto plain_copy;
 949         } else {
 950             STRLEN highhalf = 0;
 951             const char *p, *e = pv+len;
 952             for (p = pv; p != e; p++)
 953                 highhalf += !!(((U8)*p) & 0x80);
 954             if (!highhalf)
 955                 goto plain_copy;
 956             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 957             bufptr = PL_parser->bufptr;
 958             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 959             SvCUR_set(PL_parser->linestr,
 960                 SvCUR(PL_parser->linestr) + len+highhalf);
 961             PL_parser->bufend += len+highhalf;
 962             for (p = pv; p != e; p++) {
 963                 U8 c = (U8)*p;
 964                 if (c & 0x80) {
 965                     *bufptr++ = (char)(0xc0 | (c >> 6));
 966                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 967                 } else {
 968                     *bufptr++ = (char)c;
 969                 }
 970             }
 971         }
 972     } else {
 973         if (flags & LEX_STUFF_UTF8) {
 974             STRLEN highhalf = 0;
 975             const char *p, *e = pv+len;
 976             for (p = pv; p != e; p++) {
 977                 U8 c = (U8)*p;
 978                 if (c >= 0xc4) {
 979                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 980                                 "non-Latin-1 character into Latin-1 input");
 981                 } else if (c >= 0xc2 && p+1 != e &&
 982                             (((U8)p[1]) & 0xc0) == 0x80) {
 983                     p++;
 984                     highhalf++;
 985                 } else if (c >= 0x80) {
 986                     /* malformed UTF-8 */
 987                     ENTER;
 988                     SAVESPTR(PL_warnhook);
 989                     PL_warnhook = PERL_WARNHOOK_FATAL;
 990                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 991                     LEAVE;
 992                 }
 993             }
 994             if (!highhalf)
 995                 goto plain_copy;
 996             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 997             bufptr = PL_parser->bufptr;
 998             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 999             SvCUR_set(PL_parser->linestr,
1000                 SvCUR(PL_parser->linestr) + len-highhalf);
1001             PL_parser->bufend += len-highhalf;
1002             for (p = pv; p != e; p++) {
1003                 U8 c = (U8)*p;
1004                 if (c & 0x80) {
1005                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1006                     p++;
1007                 } else {
1008                     *bufptr++ = (char)c;
1009                 }
1010             }
1011         } else {
1012             plain_copy:
1013             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1014             bufptr = PL_parser->bufptr;
1015             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1016             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1017             PL_parser->bufend += len;
1018             Copy(pv, bufptr, len, char);
1019         }
1020     }
1021 }
1022
1023 /*
1024 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1025
1026 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1027 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1028 reallocating the buffer if necessary.  This means that lexing code that
1029 runs later will see the characters as if they had appeared in the input.
1030 It is not recommended to do this as part of normal parsing, and most
1031 uses of this facility run the risk of the inserted characters being
1032 interpreted in an unintended manner.
1033
1034 The string to be inserted is represented by octets starting at I<pv>
1035 and continuing to the first nul.  These octets are interpreted as either
1036 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1037 in I<flags>.  The characters are recoded for the lexer buffer, according
1038 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1039 If it is not convenient to nul-terminate a string to be inserted, the
1040 L</lex_stuff_pvn> function is more appropriate.
1041
1042 =cut
1043 */
1044
1045 void
1046 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1047 {
1048     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1049     lex_stuff_pvn(pv, strlen(pv), flags);
1050 }
1051
1052 /*
1053 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1054
1055 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1056 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1057 reallocating the buffer if necessary.  This means that lexing code that
1058 runs later will see the characters as if they had appeared in the input.
1059 It is not recommended to do this as part of normal parsing, and most
1060 uses of this facility run the risk of the inserted characters being
1061 interpreted in an unintended manner.
1062
1063 The string to be inserted is the string value of I<sv>.  The characters
1064 are recoded for the lexer buffer, according to how the buffer is currently
1065 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1066 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1067 need to construct a scalar.
1068
1069 =cut
1070 */
1071
1072 void
1073 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1074 {
1075     char *pv;
1076     STRLEN len;
1077     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1078     if (flags)
1079         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1080     pv = SvPV(sv, len);
1081     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1082 }
1083
1084 /*
1085 =for apidoc Amx|void|lex_unstuff|char *ptr
1086
1087 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1088 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1089 This hides the discarded text from any lexing code that runs later,
1090 as if the text had never appeared.
1091
1092 This is not the normal way to consume lexed text.  For that, use
1093 L</lex_read_to>.
1094
1095 =cut
1096 */
1097
1098 void
1099 Perl_lex_unstuff(pTHX_ char *ptr)
1100 {
1101     char *buf, *bufend;
1102     STRLEN unstuff_len;
1103     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1104     buf = PL_parser->bufptr;
1105     if (ptr < buf)
1106         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1107     if (ptr == buf)
1108         return;
1109     bufend = PL_parser->bufend;
1110     if (ptr > bufend)
1111         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1112     unstuff_len = ptr - buf;
1113     Move(ptr, buf, bufend+1-ptr, char);
1114     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1115     PL_parser->bufend = bufend - unstuff_len;
1116 }
1117
1118 /*
1119 =for apidoc Amx|void|lex_read_to|char *ptr
1120
1121 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1122 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1123 performing the correct bookkeeping whenever a newline character is passed.
1124 This is the normal way to consume lexed text.
1125
1126 Interpretation of the buffer's octets can be abstracted out by
1127 using the slightly higher-level functions L</lex_peek_unichar> and
1128 L</lex_read_unichar>.
1129
1130 =cut
1131 */
1132
1133 void
1134 Perl_lex_read_to(pTHX_ char *ptr)
1135 {
1136     char *s;
1137     PERL_ARGS_ASSERT_LEX_READ_TO;
1138     s = PL_parser->bufptr;
1139     if (ptr < s || ptr > PL_parser->bufend)
1140         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1141     for (; s != ptr; s++)
1142         if (*s == '\n') {
1143             CopLINE_inc(PL_curcop);
1144             PL_parser->linestart = s+1;
1145         }
1146     PL_parser->bufptr = ptr;
1147 }
1148
1149 /*
1150 =for apidoc Amx|void|lex_discard_to|char *ptr
1151
1152 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1153 up to I<ptr>.  The remaining content of the buffer will be moved, and
1154 all pointers into the buffer updated appropriately.  I<ptr> must not
1155 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1156 it is not permitted to discard text that has yet to be lexed.
1157
1158 Normally it is not necessarily to do this directly, because it suffices to
1159 use the implicit discarding behaviour of L</lex_next_chunk> and things
1160 based on it.  However, if a token stretches across multiple lines,
1161 and the lexing code has kept multiple lines of text in the buffer for
1162 that purpose, then after completion of the token it would be wise to
1163 explicitly discard the now-unneeded earlier lines, to avoid future
1164 multi-line tokens growing the buffer without bound.
1165
1166 =cut
1167 */
1168
1169 void
1170 Perl_lex_discard_to(pTHX_ char *ptr)
1171 {
1172     char *buf;
1173     STRLEN discard_len;
1174     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1175     buf = SvPVX(PL_parser->linestr);
1176     if (ptr < buf)
1177         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1178     if (ptr == buf)
1179         return;
1180     if (ptr > PL_parser->bufptr)
1181         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1182     discard_len = ptr - buf;
1183     if (PL_parser->oldbufptr < ptr)
1184         PL_parser->oldbufptr = ptr;
1185     if (PL_parser->oldoldbufptr < ptr)
1186         PL_parser->oldoldbufptr = ptr;
1187     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1188         PL_parser->last_uni = NULL;
1189     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1190         PL_parser->last_lop = NULL;
1191     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1192     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1193     PL_parser->bufend -= discard_len;
1194     PL_parser->bufptr -= discard_len;
1195     PL_parser->oldbufptr -= discard_len;
1196     PL_parser->oldoldbufptr -= discard_len;
1197     if (PL_parser->last_uni)
1198         PL_parser->last_uni -= discard_len;
1199     if (PL_parser->last_lop)
1200         PL_parser->last_lop -= discard_len;
1201 }
1202
1203 /*
1204 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1205
1206 Reads in the next chunk of text to be lexed, appending it to
1207 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1208 looked to the end of the current chunk and wants to know more.  It is
1209 usual, but not necessary, for lexing to have consumed the entirety of
1210 the current chunk at this time.
1211
1212 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1213 chunk (i.e., the current chunk has been entirely consumed), normally the
1214 current chunk will be discarded at the same time that the new chunk is
1215 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1216 will not be discarded.  If the current chunk has not been entirely
1217 consumed, then it will not be discarded regardless of the flag.
1218
1219 Returns true if some new text was added to the buffer, or false if the
1220 buffer has reached the end of the input text.
1221
1222 =cut
1223 */
1224
1225 #define LEX_FAKE_EOF 0x80000000
1226
1227 bool
1228 Perl_lex_next_chunk(pTHX_ U32 flags)
1229 {
1230     SV *linestr;
1231     char *buf;
1232     STRLEN old_bufend_pos, new_bufend_pos;
1233     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1234     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1235     bool got_some_for_debugger = 0;
1236     bool got_some;
1237     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1238         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1239     linestr = PL_parser->linestr;
1240     buf = SvPVX(linestr);
1241     if (!(flags & LEX_KEEP_PREVIOUS) &&
1242             PL_parser->bufptr == PL_parser->bufend) {
1243         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1244         linestart_pos = 0;
1245         if (PL_parser->last_uni != PL_parser->bufend)
1246             PL_parser->last_uni = NULL;
1247         if (PL_parser->last_lop != PL_parser->bufend)
1248             PL_parser->last_lop = NULL;
1249         last_uni_pos = last_lop_pos = 0;
1250         *buf = 0;
1251         SvCUR(linestr) = 0;
1252     } else {
1253         old_bufend_pos = PL_parser->bufend - buf;
1254         bufptr_pos = PL_parser->bufptr - buf;
1255         oldbufptr_pos = PL_parser->oldbufptr - buf;
1256         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1257         linestart_pos = PL_parser->linestart - buf;
1258         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1259         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1260     }
1261     if (flags & LEX_FAKE_EOF) {
1262         goto eof;
1263     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1264         got_some = 0;
1265     } else if (filter_gets(linestr, old_bufend_pos)) {
1266         got_some = 1;
1267         got_some_for_debugger = 1;
1268     } else {
1269         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1270             sv_setpvs(linestr, "");
1271         eof:
1272         /* End of real input.  Close filehandle (unless it was STDIN),
1273          * then add implicit termination.
1274          */
1275         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1276             PerlIO_clearerr(PL_parser->rsfp);
1277         else if (PL_parser->rsfp)
1278             (void)PerlIO_close(PL_parser->rsfp);
1279         PL_parser->rsfp = NULL;
1280         PL_parser->in_pod = PL_parser->filtered = 0;
1281 #ifdef PERL_MAD
1282         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1283             PL_faketokens = 1;
1284 #endif
1285         if (!PL_in_eval && PL_minus_p) {
1286             sv_catpvs(linestr,
1287                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1288             PL_minus_n = PL_minus_p = 0;
1289         } else if (!PL_in_eval && PL_minus_n) {
1290             sv_catpvs(linestr, /*{*/";}");
1291             PL_minus_n = 0;
1292         } else
1293             sv_catpvs(linestr, ";");
1294         got_some = 1;
1295     }
1296     buf = SvPVX(linestr);
1297     new_bufend_pos = SvCUR(linestr);
1298     PL_parser->bufend = buf + new_bufend_pos;
1299     PL_parser->bufptr = buf + bufptr_pos;
1300     PL_parser->oldbufptr = buf + oldbufptr_pos;
1301     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1302     PL_parser->linestart = buf + linestart_pos;
1303     if (PL_parser->last_uni)
1304         PL_parser->last_uni = buf + last_uni_pos;
1305     if (PL_parser->last_lop)
1306         PL_parser->last_lop = buf + last_lop_pos;
1307     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1308             PL_curstash != PL_debstash) {
1309         /* debugger active and we're not compiling the debugger code,
1310          * so store the line into the debugger's array of lines
1311          */
1312         update_debugger_info(NULL, buf+old_bufend_pos,
1313             new_bufend_pos-old_bufend_pos);
1314     }
1315     return got_some;
1316 }
1317
1318 /*
1319 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1320
1321 Looks ahead one (Unicode) character in the text currently being lexed.
1322 Returns the codepoint (unsigned integer value) of the next character,
1323 or -1 if lexing has reached the end of the input text.  To consume the
1324 peeked character, use L</lex_read_unichar>.
1325
1326 If the next character is in (or extends into) the next chunk of input
1327 text, the next chunk will be read in.  Normally the current chunk will be
1328 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1329 then the current chunk will not be discarded.
1330
1331 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1332 is encountered, an exception is generated.
1333
1334 =cut
1335 */
1336
1337 I32
1338 Perl_lex_peek_unichar(pTHX_ U32 flags)
1339 {
1340     dVAR;
1341     char *s, *bufend;
1342     if (flags & ~(LEX_KEEP_PREVIOUS))
1343         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1344     s = PL_parser->bufptr;
1345     bufend = PL_parser->bufend;
1346     if (UTF) {
1347         U8 head;
1348         I32 unichar;
1349         STRLEN len, retlen;
1350         if (s == bufend) {
1351             if (!lex_next_chunk(flags))
1352                 return -1;
1353             s = PL_parser->bufptr;
1354             bufend = PL_parser->bufend;
1355         }
1356         head = (U8)*s;
1357         if (!(head & 0x80))
1358             return head;
1359         if (head & 0x40) {
1360             len = PL_utf8skip[head];
1361             while ((STRLEN)(bufend-s) < len) {
1362                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1363                     break;
1364                 s = PL_parser->bufptr;
1365                 bufend = PL_parser->bufend;
1366             }
1367         }
1368         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1369         if (retlen == (STRLEN)-1) {
1370             /* malformed UTF-8 */
1371             ENTER;
1372             SAVESPTR(PL_warnhook);
1373             PL_warnhook = PERL_WARNHOOK_FATAL;
1374             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1375             LEAVE;
1376         }
1377         return unichar;
1378     } else {
1379         if (s == bufend) {
1380             if (!lex_next_chunk(flags))
1381                 return -1;
1382             s = PL_parser->bufptr;
1383         }
1384         return (U8)*s;
1385     }
1386 }
1387
1388 /*
1389 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1390
1391 Reads the next (Unicode) character in the text currently being lexed.
1392 Returns the codepoint (unsigned integer value) of the character read,
1393 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1394 if lexing has reached the end of the input text.  To non-destructively
1395 examine the next character, use L</lex_peek_unichar> instead.
1396
1397 If the next character is in (or extends into) the next chunk of input
1398 text, the next chunk will be read in.  Normally the current chunk will be
1399 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1400 then the current chunk will not be discarded.
1401
1402 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1403 is encountered, an exception is generated.
1404
1405 =cut
1406 */
1407
1408 I32
1409 Perl_lex_read_unichar(pTHX_ U32 flags)
1410 {
1411     I32 c;
1412     if (flags & ~(LEX_KEEP_PREVIOUS))
1413         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1414     c = lex_peek_unichar(flags);
1415     if (c != -1) {
1416         if (c == '\n')
1417             CopLINE_inc(PL_curcop);
1418         if (UTF)
1419             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1420         else
1421             ++(PL_parser->bufptr);
1422     }
1423     return c;
1424 }
1425
1426 /*
1427 =for apidoc Amx|void|lex_read_space|U32 flags
1428
1429 Reads optional spaces, in Perl style, in the text currently being
1430 lexed.  The spaces may include ordinary whitespace characters and
1431 Perl-style comments.  C<#line> directives are processed if encountered.
1432 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1433 at a non-space character (or the end of the input text).
1434
1435 If spaces extend into the next chunk of input text, the next chunk will
1436 be read in.  Normally the current chunk will be discarded at the same
1437 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1438 chunk will not be discarded.
1439
1440 =cut
1441 */
1442
1443 #define LEX_NO_NEXT_CHUNK 0x80000000
1444
1445 void
1446 Perl_lex_read_space(pTHX_ U32 flags)
1447 {
1448     char *s, *bufend;
1449     bool need_incline = 0;
1450     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1451         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1452 #ifdef PERL_MAD
1453     if (PL_skipwhite) {
1454         sv_free(PL_skipwhite);
1455         PL_skipwhite = NULL;
1456     }
1457     if (PL_madskills)
1458         PL_skipwhite = newSVpvs("");
1459 #endif /* PERL_MAD */
1460     s = PL_parser->bufptr;
1461     bufend = PL_parser->bufend;
1462     while (1) {
1463         char c = *s;
1464         if (c == '#') {
1465             do {
1466                 c = *++s;
1467             } while (!(c == '\n' || (c == 0 && s == bufend)));
1468         } else if (c == '\n') {
1469             s++;
1470             PL_parser->linestart = s;
1471             if (s == bufend)
1472                 need_incline = 1;
1473             else
1474                 incline(s);
1475         } else if (isSPACE(c)) {
1476             s++;
1477         } else if (c == 0 && s == bufend) {
1478             bool got_more;
1479 #ifdef PERL_MAD
1480             if (PL_madskills)
1481                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1482 #endif /* PERL_MAD */
1483             if (flags & LEX_NO_NEXT_CHUNK)
1484                 break;
1485             PL_parser->bufptr = s;
1486             CopLINE_inc(PL_curcop);
1487             got_more = lex_next_chunk(flags);
1488             CopLINE_dec(PL_curcop);
1489             s = PL_parser->bufptr;
1490             bufend = PL_parser->bufend;
1491             if (!got_more)
1492                 break;
1493             if (need_incline && PL_parser->rsfp) {
1494                 incline(s);
1495                 need_incline = 0;
1496             }
1497         } else {
1498             break;
1499         }
1500     }
1501 #ifdef PERL_MAD
1502     if (PL_madskills)
1503         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1504 #endif /* PERL_MAD */
1505     PL_parser->bufptr = s;
1506 }
1507
1508 /*
1509  * S_incline
1510  * This subroutine has nothing to do with tilting, whether at windmills
1511  * or pinball tables.  Its name is short for "increment line".  It
1512  * increments the current line number in CopLINE(PL_curcop) and checks
1513  * to see whether the line starts with a comment of the form
1514  *    # line 500 "foo.pm"
1515  * If so, it sets the current line number and file to the values in the comment.
1516  */
1517
1518 STATIC void
1519 S_incline(pTHX_ const char *s)
1520 {
1521     dVAR;
1522     const char *t;
1523     const char *n;
1524     const char *e;
1525     line_t line_num;
1526
1527     PERL_ARGS_ASSERT_INCLINE;
1528
1529     CopLINE_inc(PL_curcop);
1530     if (*s++ != '#')
1531         return;
1532     while (SPACE_OR_TAB(*s))
1533         s++;
1534     if (strnEQ(s, "line", 4))
1535         s += 4;
1536     else
1537         return;
1538     if (SPACE_OR_TAB(*s))
1539         s++;
1540     else
1541         return;
1542     while (SPACE_OR_TAB(*s))
1543         s++;
1544     if (!isDIGIT(*s))
1545         return;
1546
1547     n = s;
1548     while (isDIGIT(*s))
1549         s++;
1550     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1551         return;
1552     while (SPACE_OR_TAB(*s))
1553         s++;
1554     if (*s == '"' && (t = strchr(s+1, '"'))) {
1555         s++;
1556         e = t + 1;
1557     }
1558     else {
1559         t = s;
1560         while (!isSPACE(*t))
1561             t++;
1562         e = t;
1563     }
1564     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1565         e++;
1566     if (*e != '\n' && *e != '\0')
1567         return;         /* false alarm */
1568
1569     line_num = atoi(n)-1;
1570
1571     if (t - s > 0) {
1572         const STRLEN len = t - s;
1573         SV *const temp_sv = CopFILESV(PL_curcop);
1574         const char *cf;
1575         STRLEN tmplen;
1576
1577         if (temp_sv) {
1578             cf = SvPVX(temp_sv);
1579             tmplen = SvCUR(temp_sv);
1580         } else {
1581             cf = NULL;
1582             tmplen = 0;
1583         }
1584
1585         if (!PL_rsfp && !PL_parser->filtered) {
1586             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1587              * to *{"::_<newfilename"} */
1588             /* However, the long form of evals is only turned on by the
1589                debugger - usually they're "(eval %lu)" */
1590             char smallbuf[128];
1591             char *tmpbuf;
1592             GV **gvp;
1593             STRLEN tmplen2 = len;
1594             if (tmplen + 2 <= sizeof smallbuf)
1595                 tmpbuf = smallbuf;
1596             else
1597                 Newx(tmpbuf, tmplen + 2, char);
1598             tmpbuf[0] = '_';
1599             tmpbuf[1] = '<';
1600             memcpy(tmpbuf + 2, cf, tmplen);
1601             tmplen += 2;
1602             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1603             if (gvp) {
1604                 char *tmpbuf2;
1605                 GV *gv2;
1606
1607                 if (tmplen2 + 2 <= sizeof smallbuf)
1608                     tmpbuf2 = smallbuf;
1609                 else
1610                     Newx(tmpbuf2, tmplen2 + 2, char);
1611
1612                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1613                     /* Either they malloc'd it, or we malloc'd it,
1614                        so no prefix is present in ours.  */
1615                     tmpbuf2[0] = '_';
1616                     tmpbuf2[1] = '<';
1617                 }
1618
1619                 memcpy(tmpbuf2 + 2, s, tmplen2);
1620                 tmplen2 += 2;
1621
1622                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1623                 if (!isGV(gv2)) {
1624                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1625                     /* adjust ${"::_<newfilename"} to store the new file name */
1626                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1627                     /* The line number may differ. If that is the case,
1628                        alias the saved lines that are in the array.
1629                        Otherwise alias the whole array. */
1630                     if (CopLINE(PL_curcop) == line_num) {
1631                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1632                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1633                     }
1634                     else if (GvAV(*gvp)) {
1635                         AV * const av = GvAV(*gvp);
1636                         const I32 start = CopLINE(PL_curcop)+1;
1637                         I32 items = AvFILLp(av) - start;
1638                         if (items > 0) {
1639                             AV * const av2 = GvAVn(gv2);
1640                             SV **svp = AvARRAY(av) + start;
1641                             I32 l = (I32)line_num+1;
1642                             while (items--)
1643                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1644                         }
1645                     }
1646                 }
1647
1648                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1649             }
1650             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1651         }
1652         CopFILE_free(PL_curcop);
1653         CopFILE_setn(PL_curcop, s, len);
1654     }
1655     CopLINE_set(PL_curcop, line_num);
1656 }
1657
1658 #ifdef PERL_MAD
1659 /* skip space before PL_thistoken */
1660
1661 STATIC char *
1662 S_skipspace0(pTHX_ register char *s)
1663 {
1664     PERL_ARGS_ASSERT_SKIPSPACE0;
1665
1666     s = skipspace(s);
1667     if (!PL_madskills)
1668         return s;
1669     if (PL_skipwhite) {
1670         if (!PL_thiswhite)
1671             PL_thiswhite = newSVpvs("");
1672         sv_catsv(PL_thiswhite, PL_skipwhite);
1673         sv_free(PL_skipwhite);
1674         PL_skipwhite = 0;
1675     }
1676     PL_realtokenstart = s - SvPVX(PL_linestr);
1677     return s;
1678 }
1679
1680 /* skip space after PL_thistoken */
1681
1682 STATIC char *
1683 S_skipspace1(pTHX_ register char *s)
1684 {
1685     const char *start = s;
1686     I32 startoff = start - SvPVX(PL_linestr);
1687
1688     PERL_ARGS_ASSERT_SKIPSPACE1;
1689
1690     s = skipspace(s);
1691     if (!PL_madskills)
1692         return s;
1693     start = SvPVX(PL_linestr) + startoff;
1694     if (!PL_thistoken && PL_realtokenstart >= 0) {
1695         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1696         PL_thistoken = newSVpvn(tstart, start - tstart);
1697     }
1698     PL_realtokenstart = -1;
1699     if (PL_skipwhite) {
1700         if (!PL_nextwhite)
1701             PL_nextwhite = newSVpvs("");
1702         sv_catsv(PL_nextwhite, PL_skipwhite);
1703         sv_free(PL_skipwhite);
1704         PL_skipwhite = 0;
1705     }
1706     return s;
1707 }
1708
1709 STATIC char *
1710 S_skipspace2(pTHX_ register char *s, SV **svp)
1711 {
1712     char *start;
1713     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1714     const I32 startoff = s - SvPVX(PL_linestr);
1715
1716     PERL_ARGS_ASSERT_SKIPSPACE2;
1717
1718     s = skipspace(s);
1719     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1720     if (!PL_madskills || !svp)
1721         return s;
1722     start = SvPVX(PL_linestr) + startoff;
1723     if (!PL_thistoken && PL_realtokenstart >= 0) {
1724         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1725         PL_thistoken = newSVpvn(tstart, start - tstart);
1726         PL_realtokenstart = -1;
1727     }
1728     if (PL_skipwhite) {
1729         if (!*svp)
1730             *svp = newSVpvs("");
1731         sv_setsv(*svp, PL_skipwhite);
1732         sv_free(PL_skipwhite);
1733         PL_skipwhite = 0;
1734     }
1735
1736     return s;
1737 }
1738 #endif
1739
1740 STATIC void
1741 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1742 {
1743     AV *av = CopFILEAVx(PL_curcop);
1744     if (av) {
1745         SV * const sv = newSV_type(SVt_PVMG);
1746         if (orig_sv)
1747             sv_setsv(sv, orig_sv);
1748         else
1749             sv_setpvn(sv, buf, len);
1750         (void)SvIOK_on(sv);
1751         SvIV_set(sv, 0);
1752         av_store(av, (I32)CopLINE(PL_curcop), sv);
1753     }
1754 }
1755
1756 /*
1757  * S_skipspace
1758  * Called to gobble the appropriate amount and type of whitespace.
1759  * Skips comments as well.
1760  */
1761
1762 STATIC char *
1763 S_skipspace(pTHX_ register char *s)
1764 {
1765 #ifdef PERL_MAD
1766     char *start = s;
1767 #endif /* PERL_MAD */
1768     PERL_ARGS_ASSERT_SKIPSPACE;
1769 #ifdef PERL_MAD
1770     if (PL_skipwhite) {
1771         sv_free(PL_skipwhite);
1772         PL_skipwhite = NULL;
1773     }
1774 #endif /* PERL_MAD */
1775     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1776         while (s < PL_bufend && SPACE_OR_TAB(*s))
1777             s++;
1778     } else {
1779         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1780         PL_bufptr = s;
1781         lex_read_space(LEX_KEEP_PREVIOUS |
1782                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1783                     LEX_NO_NEXT_CHUNK : 0));
1784         s = PL_bufptr;
1785         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1786         if (PL_linestart > PL_bufptr)
1787             PL_bufptr = PL_linestart;
1788         return s;
1789     }
1790 #ifdef PERL_MAD
1791     if (PL_madskills)
1792         PL_skipwhite = newSVpvn(start, s-start);
1793 #endif /* PERL_MAD */
1794     return s;
1795 }
1796
1797 /*
1798  * S_check_uni
1799  * Check the unary operators to ensure there's no ambiguity in how they're
1800  * used.  An ambiguous piece of code would be:
1801  *     rand + 5
1802  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1803  * the +5 is its argument.
1804  */
1805
1806 STATIC void
1807 S_check_uni(pTHX)
1808 {
1809     dVAR;
1810     const char *s;
1811     const char *t;
1812
1813     if (PL_oldoldbufptr != PL_last_uni)
1814         return;
1815     while (isSPACE(*PL_last_uni))
1816         PL_last_uni++;
1817     s = PL_last_uni;
1818     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1819         s++;
1820     if ((t = strchr(s, '(')) && t < PL_bufptr)
1821         return;
1822
1823     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1824                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1825                      (int)(s - PL_last_uni), PL_last_uni);
1826 }
1827
1828 /*
1829  * LOP : macro to build a list operator.  Its behaviour has been replaced
1830  * with a subroutine, S_lop() for which LOP is just another name.
1831  */
1832
1833 #define LOP(f,x) return lop(f,x,s)
1834
1835 /*
1836  * S_lop
1837  * Build a list operator (or something that might be one).  The rules:
1838  *  - if we have a next token, then it's a list operator [why?]
1839  *  - if the next thing is an opening paren, then it's a function
1840  *  - else it's a list operator
1841  */
1842
1843 STATIC I32
1844 S_lop(pTHX_ I32 f, int x, char *s)
1845 {
1846     dVAR;
1847
1848     PERL_ARGS_ASSERT_LOP;
1849
1850     pl_yylval.ival = f;
1851     CLINE;
1852     PL_expect = x;
1853     PL_bufptr = s;
1854     PL_last_lop = PL_oldbufptr;
1855     PL_last_lop_op = (OPCODE)f;
1856 #ifdef PERL_MAD
1857     if (PL_lasttoke)
1858         goto lstop;
1859 #else
1860     if (PL_nexttoke)
1861         goto lstop;
1862 #endif
1863     if (*s == '(')
1864         return REPORT(FUNC);
1865     s = PEEKSPACE(s);
1866     if (*s == '(')
1867         return REPORT(FUNC);
1868     else {
1869         lstop:
1870         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1871             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1872         return REPORT(LSTOP);
1873     }
1874 }
1875
1876 #ifdef PERL_MAD
1877  /*
1878  * S_start_force
1879  * Sets up for an eventual force_next().  start_force(0) basically does
1880  * an unshift, while start_force(-1) does a push.  yylex removes items
1881  * on the "pop" end.
1882  */
1883
1884 STATIC void
1885 S_start_force(pTHX_ int where)
1886 {
1887     int i;
1888
1889     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1890         where = PL_lasttoke;
1891     assert(PL_curforce < 0 || PL_curforce == where);
1892     if (PL_curforce != where) {
1893         for (i = PL_lasttoke; i > where; --i) {
1894             PL_nexttoke[i] = PL_nexttoke[i-1];
1895         }
1896         PL_lasttoke++;
1897     }
1898     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1899         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1900     PL_curforce = where;
1901     if (PL_nextwhite) {
1902         if (PL_madskills)
1903             curmad('^', newSVpvs(""));
1904         CURMAD('_', PL_nextwhite);
1905     }
1906 }
1907
1908 STATIC void
1909 S_curmad(pTHX_ char slot, SV *sv)
1910 {
1911     MADPROP **where;
1912
1913     if (!sv)
1914         return;
1915     if (PL_curforce < 0)
1916         where = &PL_thismad;
1917     else
1918         where = &PL_nexttoke[PL_curforce].next_mad;
1919
1920     if (PL_faketokens)
1921         sv_setpvs(sv, "");
1922     else {
1923         if (!IN_BYTES) {
1924             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1925                 SvUTF8_on(sv);
1926             else if (PL_encoding) {
1927                 sv_recode_to_utf8(sv, PL_encoding);
1928             }
1929         }
1930     }
1931
1932     /* keep a slot open for the head of the list? */
1933     if (slot != '_' && *where && (*where)->mad_key == '^') {
1934         (*where)->mad_key = slot;
1935         sv_free(MUTABLE_SV(((*where)->mad_val)));
1936         (*where)->mad_val = (void*)sv;
1937     }
1938     else
1939         addmad(newMADsv(slot, sv), where, 0);
1940 }
1941 #else
1942 #  define start_force(where)    NOOP
1943 #  define curmad(slot, sv)      NOOP
1944 #endif
1945
1946 /*
1947  * S_force_next
1948  * When the lexer realizes it knows the next token (for instance,
1949  * it is reordering tokens for the parser) then it can call S_force_next
1950  * to know what token to return the next time the lexer is called.  Caller
1951  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1952  * and possibly PL_expect to ensure the lexer handles the token correctly.
1953  */
1954
1955 STATIC void
1956 S_force_next(pTHX_ I32 type)
1957 {
1958     dVAR;
1959 #ifdef DEBUGGING
1960     if (DEBUG_T_TEST) {
1961         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1962         tokereport(type, &NEXTVAL_NEXTTOKE);
1963     }
1964 #endif
1965 #ifdef PERL_MAD
1966     if (PL_curforce < 0)
1967         start_force(PL_lasttoke);
1968     PL_nexttoke[PL_curforce].next_type = type;
1969     if (PL_lex_state != LEX_KNOWNEXT)
1970         PL_lex_defer = PL_lex_state;
1971     PL_lex_state = LEX_KNOWNEXT;
1972     PL_lex_expect = PL_expect;
1973     PL_curforce = -1;
1974 #else
1975     PL_nexttype[PL_nexttoke] = type;
1976     PL_nexttoke++;
1977     if (PL_lex_state != LEX_KNOWNEXT) {
1978         PL_lex_defer = PL_lex_state;
1979         PL_lex_expect = PL_expect;
1980         PL_lex_state = LEX_KNOWNEXT;
1981     }
1982 #endif
1983 }
1984
1985 void
1986 Perl_yyunlex(pTHX)
1987 {
1988     int yyc = PL_parser->yychar;
1989     if (yyc != YYEMPTY) {
1990         if (yyc) {
1991             start_force(-1);
1992             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1993             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1994                 PL_lex_allbrackets--;
1995                 PL_lex_brackets--;
1996                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1997             } else if (yyc == '('/*)*/) {
1998                 PL_lex_allbrackets--;
1999                 yyc |= (2<<24);
2000             }
2001             force_next(yyc);
2002         }
2003         PL_parser->yychar = YYEMPTY;
2004     }
2005 }
2006
2007 STATIC SV *
2008 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2009 {
2010     dVAR;
2011     SV * const sv = newSVpvn_utf8(start, len,
2012                                   !IN_BYTES
2013                                   && UTF
2014                                   && !is_ascii_string((const U8*)start, len)
2015                                   && is_utf8_string((const U8*)start, len));
2016     return sv;
2017 }
2018
2019 /*
2020  * S_force_word
2021  * When the lexer knows the next thing is a word (for instance, it has
2022  * just seen -> and it knows that the next char is a word char, then
2023  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2024  * lookahead.
2025  *
2026  * Arguments:
2027  *   char *start : buffer position (must be within PL_linestr)
2028  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2029  *   int check_keyword : if true, Perl checks to make sure the word isn't
2030  *       a keyword (do this if the word is a label, e.g. goto FOO)
2031  *   int allow_pack : if true, : characters will also be allowed (require,
2032  *       use, etc. do this)
2033  *   int allow_initial_tick : used by the "sub" lexer only.
2034  */
2035
2036 STATIC char *
2037 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2038 {
2039     dVAR;
2040     register char *s;
2041     STRLEN len;
2042
2043     PERL_ARGS_ASSERT_FORCE_WORD;
2044
2045     start = SKIPSPACE1(start);
2046     s = start;
2047     if (isIDFIRST_lazy_if(s,UTF) ||
2048         (allow_pack && *s == ':') ||
2049         (allow_initial_tick && *s == '\'') )
2050     {
2051         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2052         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2053             return start;
2054         start_force(PL_curforce);
2055         if (PL_madskills)
2056             curmad('X', newSVpvn(start,s-start));
2057         if (token == METHOD) {
2058             s = SKIPSPACE1(s);
2059             if (*s == '(')
2060                 PL_expect = XTERM;
2061             else {
2062                 PL_expect = XOPERATOR;
2063             }
2064         }
2065         if (PL_madskills)
2066             curmad('g', newSVpvs( "forced" ));
2067         NEXTVAL_NEXTTOKE.opval
2068             = (OP*)newSVOP(OP_CONST,0,
2069                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2070         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2071         force_next(token);
2072     }
2073     return s;
2074 }
2075
2076 /*
2077  * S_force_ident
2078  * Called when the lexer wants $foo *foo &foo etc, but the program
2079  * text only contains the "foo" portion.  The first argument is a pointer
2080  * to the "foo", and the second argument is the type symbol to prefix.
2081  * Forces the next token to be a "WORD".
2082  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2083  */
2084
2085 STATIC void
2086 S_force_ident(pTHX_ register const char *s, int kind)
2087 {
2088     dVAR;
2089
2090     PERL_ARGS_ASSERT_FORCE_IDENT;
2091
2092     if (*s) {
2093         const STRLEN len = strlen(s);
2094         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2095                                                                 UTF ? SVf_UTF8 : 0));
2096         start_force(PL_curforce);
2097         NEXTVAL_NEXTTOKE.opval = o;
2098         force_next(WORD);
2099         if (kind) {
2100             o->op_private = OPpCONST_ENTERED;
2101             /* XXX see note in pp_entereval() for why we forgo typo
2102                warnings if the symbol must be introduced in an eval.
2103                GSAR 96-10-12 */
2104             gv_fetchpvn_flags(s, len,
2105                               (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2106                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2107                               kind == '$' ? SVt_PV :
2108                               kind == '@' ? SVt_PVAV :
2109                               kind == '%' ? SVt_PVHV :
2110                               SVt_PVGV
2111                               );
2112         }
2113     }
2114 }
2115
2116 NV
2117 Perl_str_to_version(pTHX_ SV *sv)
2118 {
2119     NV retval = 0.0;
2120     NV nshift = 1.0;
2121     STRLEN len;
2122     const char *start = SvPV_const(sv,len);
2123     const char * const end = start + len;
2124     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2125
2126     PERL_ARGS_ASSERT_STR_TO_VERSION;
2127
2128     while (start < end) {
2129         STRLEN skip;
2130         UV n;
2131         if (utf)
2132             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2133         else {
2134             n = *(U8*)start;
2135             skip = 1;
2136         }
2137         retval += ((NV)n)/nshift;
2138         start += skip;
2139         nshift *= 1000;
2140     }
2141     return retval;
2142 }
2143
2144 /*
2145  * S_force_version
2146  * Forces the next token to be a version number.
2147  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2148  * and if "guessing" is TRUE, then no new token is created (and the caller
2149  * must use an alternative parsing method).
2150  */
2151
2152 STATIC char *
2153 S_force_version(pTHX_ char *s, int guessing)
2154 {
2155     dVAR;
2156     OP *version = NULL;
2157     char *d;
2158 #ifdef PERL_MAD
2159     I32 startoff = s - SvPVX(PL_linestr);
2160 #endif
2161
2162     PERL_ARGS_ASSERT_FORCE_VERSION;
2163
2164     s = SKIPSPACE1(s);
2165
2166     d = s;
2167     if (*d == 'v')
2168         d++;
2169     if (isDIGIT(*d)) {
2170         while (isDIGIT(*d) || *d == '_' || *d == '.')
2171             d++;
2172 #ifdef PERL_MAD
2173         if (PL_madskills) {
2174             start_force(PL_curforce);
2175             curmad('X', newSVpvn(s,d-s));
2176         }
2177 #endif
2178         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2179             SV *ver;
2180 #ifdef USE_LOCALE_NUMERIC
2181             char *loc = setlocale(LC_NUMERIC, "C");
2182 #endif
2183             s = scan_num(s, &pl_yylval);
2184 #ifdef USE_LOCALE_NUMERIC
2185             setlocale(LC_NUMERIC, loc);
2186 #endif
2187             version = pl_yylval.opval;
2188             ver = cSVOPx(version)->op_sv;
2189             if (SvPOK(ver) && !SvNIOK(ver)) {
2190                 SvUPGRADE(ver, SVt_PVNV);
2191                 SvNV_set(ver, str_to_version(ver));
2192                 SvNOK_on(ver);          /* hint that it is a version */
2193             }
2194         }
2195         else if (guessing) {
2196 #ifdef PERL_MAD
2197             if (PL_madskills) {
2198                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2199                 PL_nextwhite = 0;
2200                 s = SvPVX(PL_linestr) + startoff;
2201             }
2202 #endif
2203             return s;
2204         }
2205     }
2206
2207 #ifdef PERL_MAD
2208     if (PL_madskills && !version) {
2209         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2210         PL_nextwhite = 0;
2211         s = SvPVX(PL_linestr) + startoff;
2212     }
2213 #endif
2214     /* NOTE: The parser sees the package name and the VERSION swapped */
2215     start_force(PL_curforce);
2216     NEXTVAL_NEXTTOKE.opval = version;
2217     force_next(WORD);
2218
2219     return s;
2220 }
2221
2222 /*
2223  * S_force_strict_version
2224  * Forces the next token to be a version number using strict syntax rules.
2225  */
2226
2227 STATIC char *
2228 S_force_strict_version(pTHX_ char *s)
2229 {
2230     dVAR;
2231     OP *version = NULL;
2232 #ifdef PERL_MAD
2233     I32 startoff = s - SvPVX(PL_linestr);
2234 #endif
2235     const char *errstr = NULL;
2236
2237     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2238
2239     while (isSPACE(*s)) /* leading whitespace */
2240         s++;
2241
2242     if (is_STRICT_VERSION(s,&errstr)) {
2243         SV *ver = newSV(0);
2244         s = (char *)scan_version(s, ver, 0);
2245         version = newSVOP(OP_CONST, 0, ver);
2246     }
2247     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2248             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2249     {
2250         PL_bufptr = s;
2251         if (errstr)
2252             yyerror(errstr); /* version required */
2253         return s;
2254     }
2255
2256 #ifdef PERL_MAD
2257     if (PL_madskills && !version) {
2258         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2259         PL_nextwhite = 0;
2260         s = SvPVX(PL_linestr) + startoff;
2261     }
2262 #endif
2263     /* NOTE: The parser sees the package name and the VERSION swapped */
2264     start_force(PL_curforce);
2265     NEXTVAL_NEXTTOKE.opval = version;
2266     force_next(WORD);
2267
2268     return s;
2269 }
2270
2271 /*
2272  * S_tokeq
2273  * Tokenize a quoted string passed in as an SV.  It finds the next
2274  * chunk, up to end of string or a backslash.  It may make a new
2275  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2276  * turns \\ into \.
2277  */
2278
2279 STATIC SV *
2280 S_tokeq(pTHX_ SV *sv)
2281 {
2282     dVAR;
2283     register char *s;
2284     register char *send;
2285     register char *d;
2286     STRLEN len = 0;
2287     SV *pv = sv;
2288
2289     PERL_ARGS_ASSERT_TOKEQ;
2290
2291     if (!SvLEN(sv))
2292         goto finish;
2293
2294     s = SvPV_force(sv, len);
2295     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2296         goto finish;
2297     send = s + len;
2298     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2299     while (s < send && !(*s == '\\' && s[1] == '\\'))
2300         s++;
2301     if (s == send)
2302         goto finish;
2303     d = s;
2304     if ( PL_hints & HINT_NEW_STRING ) {
2305         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2306     }
2307     while (s < send) {
2308         if (*s == '\\') {
2309             if (s + 1 < send && (s[1] == '\\'))
2310                 s++;            /* all that, just for this */
2311         }
2312         *d++ = *s++;
2313     }
2314     *d = '\0';
2315     SvCUR_set(sv, d - SvPVX_const(sv));
2316   finish:
2317     if ( PL_hints & HINT_NEW_STRING )
2318        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2319     return sv;
2320 }
2321
2322 /*
2323  * Now come three functions related to double-quote context,
2324  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2325  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2326  * interact with PL_lex_state, and create fake ( ... ) argument lists
2327  * to handle functions and concatenation.
2328  * They assume that whoever calls them will be setting up a fake
2329  * join call, because each subthing puts a ',' after it.  This lets
2330  *   "lower \luPpEr"
2331  * become
2332  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2333  *
2334  * (I'm not sure whether the spurious commas at the end of lcfirst's
2335  * arguments and join's arguments are created or not).
2336  */
2337
2338 /*
2339  * S_sublex_start
2340  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2341  *
2342  * Pattern matching will set PL_lex_op to the pattern-matching op to
2343  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2344  *
2345  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2346  *
2347  * Everything else becomes a FUNC.
2348  *
2349  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2350  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2351  * call to S_sublex_push().
2352  */
2353
2354 STATIC I32
2355 S_sublex_start(pTHX)
2356 {
2357     dVAR;
2358     register const I32 op_type = pl_yylval.ival;
2359
2360     if (op_type == OP_NULL) {
2361         pl_yylval.opval = PL_lex_op;
2362         PL_lex_op = NULL;
2363         return THING;
2364     }
2365     if (op_type == OP_CONST || op_type == OP_READLINE) {
2366         SV *sv = tokeq(PL_lex_stuff);
2367
2368         if (SvTYPE(sv) == SVt_PVIV) {
2369             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2370             STRLEN len;
2371             const char * const p = SvPV_const(sv, len);
2372             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2373             SvREFCNT_dec(sv);
2374             sv = nsv;
2375         }
2376         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2377         PL_lex_stuff = NULL;
2378         /* Allow <FH> // "foo" */
2379         if (op_type == OP_READLINE)
2380             PL_expect = XTERMORDORDOR;
2381         return THING;
2382     }
2383     else if (op_type == OP_BACKTICK && PL_lex_op) {
2384         /* readpipe() vas overriden */
2385         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2386         pl_yylval.opval = PL_lex_op;
2387         PL_lex_op = NULL;
2388         PL_lex_stuff = NULL;
2389         return THING;
2390     }
2391
2392     PL_sublex_info.super_state = PL_lex_state;
2393     PL_sublex_info.sub_inwhat = (U16)op_type;
2394     PL_sublex_info.sub_op = PL_lex_op;
2395     PL_lex_state = LEX_INTERPPUSH;
2396
2397     PL_expect = XTERM;
2398     if (PL_lex_op) {
2399         pl_yylval.opval = PL_lex_op;
2400         PL_lex_op = NULL;
2401         return PMFUNC;
2402     }
2403     else
2404         return FUNC;
2405 }
2406
2407 /*
2408  * S_sublex_push
2409  * Create a new scope to save the lexing state.  The scope will be
2410  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2411  * to the uc, lc, etc. found before.
2412  * Sets PL_lex_state to LEX_INTERPCONCAT.
2413  */
2414
2415 STATIC I32
2416 S_sublex_push(pTHX)
2417 {
2418     dVAR;
2419     ENTER;
2420
2421     PL_lex_state = PL_sublex_info.super_state;
2422     SAVEBOOL(PL_lex_dojoin);
2423     SAVEI32(PL_lex_brackets);
2424     SAVEI32(PL_lex_allbrackets);
2425     SAVEI8(PL_lex_fakeeof);
2426     SAVEI32(PL_lex_casemods);
2427     SAVEI32(PL_lex_starts);
2428     SAVEI8(PL_lex_state);
2429     SAVEVPTR(PL_lex_inpat);
2430     SAVEI16(PL_lex_inwhat);
2431     SAVECOPLINE(PL_curcop);
2432     SAVEPPTR(PL_bufptr);
2433     SAVEPPTR(PL_bufend);
2434     SAVEPPTR(PL_oldbufptr);
2435     SAVEPPTR(PL_oldoldbufptr);
2436     SAVEPPTR(PL_last_lop);
2437     SAVEPPTR(PL_last_uni);
2438     SAVEPPTR(PL_linestart);
2439     SAVESPTR(PL_linestr);
2440     SAVEGENERICPV(PL_lex_brackstack);
2441     SAVEGENERICPV(PL_lex_casestack);
2442
2443     PL_linestr = PL_lex_stuff;
2444     PL_lex_stuff = NULL;
2445
2446     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2447         = SvPVX(PL_linestr);
2448     PL_bufend += SvCUR(PL_linestr);
2449     PL_last_lop = PL_last_uni = NULL;
2450     SAVEFREESV(PL_linestr);
2451
2452     PL_lex_dojoin = FALSE;
2453     PL_lex_brackets = 0;
2454     PL_lex_allbrackets = 0;
2455     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2456     Newx(PL_lex_brackstack, 120, char);
2457     Newx(PL_lex_casestack, 12, char);
2458     PL_lex_casemods = 0;
2459     *PL_lex_casestack = '\0';
2460     PL_lex_starts = 0;
2461     PL_lex_state = LEX_INTERPCONCAT;
2462     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2463
2464     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2465     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2466     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2467         PL_lex_inpat = PL_sublex_info.sub_op;
2468     else
2469         PL_lex_inpat = NULL;
2470
2471     return '(';
2472 }
2473
2474 /*
2475  * S_sublex_done
2476  * Restores lexer state after a S_sublex_push.
2477  */
2478
2479 STATIC I32
2480 S_sublex_done(pTHX)
2481 {
2482     dVAR;
2483     if (!PL_lex_starts++) {
2484         SV * const sv = newSVpvs("");
2485         if (SvUTF8(PL_linestr))
2486             SvUTF8_on(sv);
2487         PL_expect = XOPERATOR;
2488         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2489         return THING;
2490     }
2491
2492     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2493         PL_lex_state = LEX_INTERPCASEMOD;
2494         return yylex();
2495     }
2496
2497     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2498     assert(PL_lex_inwhat != OP_TRANSR);
2499     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2500         PL_linestr = PL_lex_repl;
2501         PL_lex_inpat = 0;
2502         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2503         PL_bufend += SvCUR(PL_linestr);
2504         PL_last_lop = PL_last_uni = NULL;
2505         SAVEFREESV(PL_linestr);
2506         PL_lex_dojoin = FALSE;
2507         PL_lex_brackets = 0;
2508         PL_lex_allbrackets = 0;
2509         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2510         PL_lex_casemods = 0;
2511         *PL_lex_casestack = '\0';
2512         PL_lex_starts = 0;
2513         if (SvEVALED(PL_lex_repl)) {
2514             PL_lex_state = LEX_INTERPNORMAL;
2515             PL_lex_starts++;
2516             /*  we don't clear PL_lex_repl here, so that we can check later
2517                 whether this is an evalled subst; that means we rely on the
2518                 logic to ensure sublex_done() is called again only via the
2519                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2520         }
2521         else {
2522             PL_lex_state = LEX_INTERPCONCAT;
2523             PL_lex_repl = NULL;
2524         }
2525         return ',';
2526     }
2527     else {
2528 #ifdef PERL_MAD
2529         if (PL_madskills) {
2530             if (PL_thiswhite) {
2531                 if (!PL_endwhite)
2532                     PL_endwhite = newSVpvs("");
2533                 sv_catsv(PL_endwhite, PL_thiswhite);
2534                 PL_thiswhite = 0;
2535             }
2536             if (PL_thistoken)
2537                 sv_setpvs(PL_thistoken,"");
2538             else
2539                 PL_realtokenstart = -1;
2540         }
2541 #endif
2542         LEAVE;
2543         PL_bufend = SvPVX(PL_linestr);
2544         PL_bufend += SvCUR(PL_linestr);
2545         PL_expect = XOPERATOR;
2546         PL_sublex_info.sub_inwhat = 0;
2547         return ')';
2548     }
2549 }
2550
2551 /*
2552   scan_const
2553
2554   Extracts a pattern, double-quoted string, or transliteration.  This
2555   is terrifying code.
2556
2557   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2558   processing a pattern (PL_lex_inpat is true), a transliteration
2559   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2560
2561   Returns a pointer to the character scanned up to. If this is
2562   advanced from the start pointer supplied (i.e. if anything was
2563   successfully parsed), will leave an OP for the substring scanned
2564   in pl_yylval. Caller must intuit reason for not parsing further
2565   by looking at the next characters herself.
2566
2567   In patterns:
2568     backslashes:
2569       constants: \N{NAME} only
2570       case and quoting: \U \Q \E
2571     stops on @ and $, but not for $ as tail anchor
2572
2573   In transliterations:
2574     characters are VERY literal, except for - not at the start or end
2575     of the string, which indicates a range. If the range is in bytes,
2576     scan_const expands the range to the full set of intermediate
2577     characters. If the range is in utf8, the hyphen is replaced with
2578     a certain range mark which will be handled by pmtrans() in op.c.
2579
2580   In double-quoted strings:
2581     backslashes:
2582       double-quoted style: \r and \n
2583       constants: \x31, etc.
2584       deprecated backrefs: \1 (in substitution replacements)
2585       case and quoting: \U \Q \E
2586     stops on @ and $
2587
2588   scan_const does *not* construct ops to handle interpolated strings.
2589   It stops processing as soon as it finds an embedded $ or @ variable
2590   and leaves it to the caller to work out what's going on.
2591
2592   embedded arrays (whether in pattern or not) could be:
2593       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2594
2595   $ in double-quoted strings must be the symbol of an embedded scalar.
2596
2597   $ in pattern could be $foo or could be tail anchor.  Assumption:
2598   it's a tail anchor if $ is the last thing in the string, or if it's
2599   followed by one of "()| \r\n\t"
2600
2601   \1 (backreferences) are turned into $1
2602
2603   The structure of the code is
2604       while (there's a character to process) {
2605           handle transliteration ranges
2606           skip regexp comments /(?#comment)/ and codes /(?{code})/
2607           skip #-initiated comments in //x patterns
2608           check for embedded arrays
2609           check for embedded scalars
2610           if (backslash) {
2611               deprecate \1 in substitution replacements
2612               handle string-changing backslashes \l \U \Q \E, etc.
2613               switch (what was escaped) {
2614                   handle \- in a transliteration (becomes a literal -)
2615                   if a pattern and not \N{, go treat as regular character
2616                   handle \132 (octal characters)
2617                   handle \x15 and \x{1234} (hex characters)
2618                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2619                   handle \cV (control characters)
2620                   handle printf-style backslashes (\f, \r, \n, etc)
2621               } (end switch)
2622               continue
2623           } (end if backslash)
2624           handle regular character
2625     } (end while character to read)
2626
2627 */
2628
2629 STATIC char *
2630 S_scan_const(pTHX_ char *start)
2631 {
2632     dVAR;
2633     register char *send = PL_bufend;            /* end of the constant */
2634     SV *sv = newSV(send - start);               /* sv for the constant.  See
2635                                                    note below on sizing. */
2636     register char *s = start;                   /* start of the constant */
2637     register char *d = SvPVX(sv);               /* destination for copies */
2638     bool dorange = FALSE;                       /* are we in a translit range? */
2639     bool didrange = FALSE;                      /* did we just finish a range? */
2640     bool has_utf8 = FALSE;                      /* Output constant is UTF8 */
2641     bool  this_utf8 = cBOOL(UTF);               /* Is the source string assumed
2642                                                    to be UTF8?  But, this can
2643                                                    show as true when the source
2644                                                    isn't utf8, as for example
2645                                                    when it is entirely composed
2646                                                    of hex constants */
2647
2648     /* Note on sizing:  The scanned constant is placed into sv, which is
2649      * initialized by newSV() assuming one byte of output for every byte of
2650      * input.  This routine expects newSV() to allocate an extra byte for a
2651      * trailing NUL, which this routine will append if it gets to the end of
2652      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2653      * CAPITAL LETTER A}), or more output than input if the constant ends up
2654      * recoded to utf8, but each time a construct is found that might increase
2655      * the needed size, SvGROW() is called.  Its size parameter each time is
2656      * based on the best guess estimate at the time, namely the length used so
2657      * far, plus the length the current construct will occupy, plus room for
2658      * the trailing NUL, plus one byte for every input byte still unscanned */
2659
2660     UV uv;
2661 #ifdef EBCDIC
2662     UV literal_endpoint = 0;
2663     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2664 #endif
2665
2666     PERL_ARGS_ASSERT_SCAN_CONST;
2667
2668     assert(PL_lex_inwhat != OP_TRANSR);
2669     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2670         /* If we are doing a trans and we know we want UTF8 set expectation */
2671         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2672         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2673     }
2674
2675
2676     while (s < send || dorange) {
2677
2678         /* get transliterations out of the way (they're most literal) */
2679         if (PL_lex_inwhat == OP_TRANS) {
2680             /* expand a range A-Z to the full set of characters.  AIE! */
2681             if (dorange) {
2682                 I32 i;                          /* current expanded character */
2683                 I32 min;                        /* first character in range */
2684                 I32 max;                        /* last character in range */
2685
2686 #ifdef EBCDIC
2687                 UV uvmax = 0;
2688 #endif
2689
2690                 if (has_utf8
2691 #ifdef EBCDIC
2692                     && !native_range
2693 #endif
2694                     ) {
2695                     char * const c = (char*)utf8_hop((U8*)d, -1);
2696                     char *e = d++;
2697                     while (e-- > c)
2698                         *(e + 1) = *e;
2699                     *c = (char)UTF_TO_NATIVE(0xff);
2700                     /* mark the range as done, and continue */
2701                     dorange = FALSE;
2702                     didrange = TRUE;
2703                     continue;
2704                 }
2705
2706                 i = d - SvPVX_const(sv);                /* remember current offset */
2707 #ifdef EBCDIC
2708                 SvGROW(sv,
2709                        SvLEN(sv) + (has_utf8 ?
2710                                     (512 - UTF_CONTINUATION_MARK +
2711                                      UNISKIP(0x100))
2712                                     : 256));
2713                 /* How many two-byte within 0..255: 128 in UTF-8,
2714                  * 96 in UTF-8-mod. */
2715 #else
2716                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2717 #endif
2718                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2719 #ifdef EBCDIC
2720                 if (has_utf8) {
2721                     int j;
2722                     for (j = 0; j <= 1; j++) {
2723                         char * const c = (char*)utf8_hop((U8*)d, -1);
2724                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2725                         if (j)
2726                             min = (U8)uv;
2727                         else if (uv < 256)
2728                             max = (U8)uv;
2729                         else {
2730                             max = (U8)0xff; /* only to \xff */
2731                             uvmax = uv; /* \x{100} to uvmax */
2732                         }
2733                         d = c; /* eat endpoint chars */
2734                      }
2735                 }
2736                else {
2737 #endif
2738                    d -= 2;              /* eat the first char and the - */
2739                    min = (U8)*d;        /* first char in range */
2740                    max = (U8)d[1];      /* last char in range  */
2741 #ifdef EBCDIC
2742                }
2743 #endif
2744
2745                 if (min > max) {
2746                     Perl_croak(aTHX_
2747                                "Invalid range \"%c-%c\" in transliteration operator",
2748                                (char)min, (char)max);
2749                 }
2750
2751 #ifdef EBCDIC
2752                 if (literal_endpoint == 2 &&
2753                     ((isLOWER(min) && isLOWER(max)) ||
2754                      (isUPPER(min) && isUPPER(max)))) {
2755                     if (isLOWER(min)) {
2756                         for (i = min; i <= max; i++)
2757                             if (isLOWER(i))
2758                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2759                     } else {
2760                         for (i = min; i <= max; i++)
2761                             if (isUPPER(i))
2762                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2763                     }
2764                 }
2765                 else
2766 #endif
2767                     for (i = min; i <= max; i++)
2768 #ifdef EBCDIC
2769                         if (has_utf8) {
2770                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2771                             if (UNI_IS_INVARIANT(ch))
2772                                 *d++ = (U8)i;
2773                             else {
2774                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2775                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2776                             }
2777                         }
2778                         else
2779 #endif
2780                             *d++ = (char)i;
2781
2782 #ifdef EBCDIC
2783                 if (uvmax) {
2784                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2785                     if (uvmax > 0x101)
2786                         *d++ = (char)UTF_TO_NATIVE(0xff);
2787                     if (uvmax > 0x100)
2788                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2789                 }
2790 #endif
2791
2792                 /* mark the range as done, and continue */
2793                 dorange = FALSE;
2794                 didrange = TRUE;
2795 #ifdef EBCDIC
2796                 literal_endpoint = 0;
2797 #endif
2798                 continue;
2799             }
2800
2801             /* range begins (ignore - as first or last char) */
2802             else if (*s == '-' && s+1 < send  && s != start) {
2803                 if (didrange) {
2804                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2805                 }
2806                 if (has_utf8
2807 #ifdef EBCDIC
2808                     && !native_range
2809 #endif
2810                     ) {
2811                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2812                     s++;
2813                     continue;
2814                 }
2815                 dorange = TRUE;
2816                 s++;
2817             }
2818             else {
2819                 didrange = FALSE;
2820 #ifdef EBCDIC
2821                 literal_endpoint = 0;
2822                 native_range = TRUE;
2823 #endif
2824             }
2825         }
2826
2827         /* if we get here, we're not doing a transliteration */
2828
2829         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2830            except for the last char, which will be done separately. */
2831         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2832             if (s[2] == '#') {
2833                 while (s+1 < send && *s != ')')
2834                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2835             }
2836             else if (s[2] == '{' /* This should match regcomp.c */
2837                     || (s[2] == '?' && s[3] == '{'))
2838             {
2839                 I32 count = 1;
2840                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2841                 char c;
2842
2843                 while (count && (c = *regparse)) {
2844                     if (c == '\\' && regparse[1])
2845                         regparse++;
2846                     else if (c == '{')
2847                         count++;
2848                     else if (c == '}')
2849                         count--;
2850                     regparse++;
2851                 }
2852                 if (*regparse != ')')
2853                     regparse--;         /* Leave one char for continuation. */
2854                 while (s < regparse)
2855                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2856             }
2857         }
2858
2859         /* likewise skip #-initiated comments in //x patterns */
2860         else if (*s == '#' && PL_lex_inpat &&
2861           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2862             while (s+1 < send && *s != '\n')
2863                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2864         }
2865
2866         /* check for embedded arrays
2867            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2868            */
2869         else if (*s == '@' && s[1]) {
2870             if (isALNUM_lazy_if(s+1,UTF))
2871                 break;
2872             if (strchr(":'{$", s[1]))
2873                 break;
2874             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2875                 break; /* in regexp, neither @+ nor @- are interpolated */
2876         }
2877
2878         /* check for embedded scalars.  only stop if we're sure it's a
2879            variable.
2880         */
2881         else if (*s == '$') {
2882             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2883                 break;
2884             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2885                 if (s[1] == '\\') {
2886                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2887                                    "Possible unintended interpolation of $\\ in regex");
2888                 }
2889                 break;          /* in regexp, $ might be tail anchor */
2890             }
2891         }
2892
2893         /* End of else if chain - OP_TRANS rejoin rest */
2894
2895         /* backslashes */
2896         if (*s == '\\' && s+1 < send) {
2897             char* e;    /* Can be used for ending '}', etc. */
2898
2899             s++;
2900
2901             /* warn on \1 - \9 in substitution replacements, but note that \11
2902              * is an octal; and \19 is \1 followed by '9' */
2903             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2904                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2905             {
2906                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2907                 *--s = '$';
2908                 break;
2909             }
2910
2911             /* string-change backslash escapes */
2912             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2913                 --s;
2914                 break;
2915             }
2916             /* In a pattern, process \N, but skip any other backslash escapes.
2917              * This is because we don't want to translate an escape sequence
2918              * into a meta symbol and have the regex compiler use the meta
2919              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2920              * in spite of this, we do have to process \N here while the proper
2921              * charnames handler is in scope.  See bugs #56444 and #62056.
2922              * There is a complication because \N in a pattern may also stand
2923              * for 'match a non-nl', and not mean a charname, in which case its
2924              * processing should be deferred to the regex compiler.  To be a
2925              * charname it must be followed immediately by a '{', and not look
2926              * like \N followed by a curly quantifier, i.e., not something like
2927              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2928              * quantifier */
2929             else if (PL_lex_inpat
2930                     && (*s != 'N'
2931                         || s[1] != '{'
2932                         || regcurly(s + 1)))
2933             {
2934                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2935                 goto default_action;
2936             }
2937
2938             switch (*s) {
2939
2940             /* quoted - in transliterations */
2941             case '-':
2942                 if (PL_lex_inwhat == OP_TRANS) {
2943                     *d++ = *s++;
2944                     continue;
2945                 }
2946                 /* FALL THROUGH */
2947             default:
2948                 {
2949                     if ((isALPHA(*s) || isDIGIT(*s)))
2950                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2951                                        "Unrecognized escape \\%c passed through",
2952                                        *s);
2953                     /* default action is to copy the quoted character */
2954                     goto default_action;
2955                 }
2956
2957             /* eg. \132 indicates the octal constant 0132 */
2958             case '0': case '1': case '2': case '3':
2959             case '4': case '5': case '6': case '7':
2960                 {
2961                     I32 flags = 0;
2962                     STRLEN len = 3;
2963                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2964                     s += len;
2965                 }
2966                 goto NUM_ESCAPE_INSERT;
2967
2968             /* eg. \o{24} indicates the octal constant \024 */
2969             case 'o':
2970                 {
2971                     STRLEN len;
2972                     const char* error;
2973
2974                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2975                     s += len;
2976                     if (! valid) {
2977                         yyerror(error);
2978                         continue;
2979                     }
2980                     goto NUM_ESCAPE_INSERT;
2981                 }
2982
2983             /* eg. \x24 indicates the hex constant 0x24 */
2984             case 'x':
2985                 ++s;
2986                 if (*s == '{') {
2987                     char* const e = strchr(s, '}');
2988                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2989                       PERL_SCAN_DISALLOW_PREFIX;
2990                     STRLEN len;
2991
2992                     ++s;
2993                     if (!e) {
2994                         yyerror("Missing right brace on \\x{}");
2995                         continue;
2996                     }
2997                     len = e - s;
2998                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2999                     s = e + 1;
3000                 }
3001                 else {
3002                     {
3003                         STRLEN len = 2;
3004                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3005                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3006                         s += len;
3007                     }
3008                 }
3009
3010               NUM_ESCAPE_INSERT:
3011                 /* Insert oct or hex escaped character.  There will always be
3012                  * enough room in sv since such escapes will be longer than any
3013                  * UTF-8 sequence they can end up as, except if they force us
3014                  * to recode the rest of the string into utf8 */
3015
3016                 /* Here uv is the ordinal of the next character being added in
3017                  * unicode (converted from native). */
3018                 if (!UNI_IS_INVARIANT(uv)) {
3019                     if (!has_utf8 && uv > 255) {
3020                         /* Might need to recode whatever we have accumulated so
3021                          * far if it contains any chars variant in utf8 or
3022                          * utf-ebcdic. */
3023
3024                         SvCUR_set(sv, d - SvPVX_const(sv));
3025                         SvPOK_on(sv);
3026                         *d = '\0';
3027                         /* See Note on sizing above.  */
3028                         sv_utf8_upgrade_flags_grow(sv,
3029                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3030                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3031                         d = SvPVX(sv) + SvCUR(sv);
3032                         has_utf8 = TRUE;
3033                     }
3034
3035                     if (has_utf8) {
3036                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3037                         if (PL_lex_inwhat == OP_TRANS &&
3038                             PL_sublex_info.sub_op) {
3039                             PL_sublex_info.sub_op->op_private |=
3040                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3041                                              : OPpTRANS_TO_UTF);
3042                         }
3043 #ifdef EBCDIC
3044                         if (uv > 255 && !dorange)
3045                             native_range = FALSE;
3046 #endif
3047                     }
3048                     else {
3049                         *d++ = (char)uv;
3050                     }
3051                 }
3052                 else {
3053                     *d++ = (char) uv;
3054                 }
3055                 continue;
3056
3057             case 'N':
3058                 /* In a non-pattern \N must be a named character, like \N{LATIN
3059                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3060                  * mean to match a non-newline.  For non-patterns, named
3061                  * characters are converted to their string equivalents. In
3062                  * patterns, named characters are not converted to their
3063                  * ultimate forms for the same reasons that other escapes
3064                  * aren't.  Instead, they are converted to the \N{U+...} form
3065                  * to get the value from the charnames that is in effect right
3066                  * now, while preserving the fact that it was a named character
3067                  * so that the regex compiler knows this */
3068
3069                 /* This section of code doesn't generally use the
3070                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3071                  * a close examination of this macro and determined it is a
3072                  * no-op except on utfebcdic variant characters.  Every
3073                  * character generated by this that would normally need to be
3074                  * enclosed by this macro is invariant, so the macro is not
3075                  * needed, and would complicate use of copy().  XXX There are
3076                  * other parts of this file where the macro is used
3077                  * inconsistently, but are saved by it being a no-op */
3078
3079                 /* The structure of this section of code (besides checking for
3080                  * errors and upgrading to utf8) is:
3081                  *  Further disambiguate between the two meanings of \N, and if
3082                  *      not a charname, go process it elsewhere
3083                  *  If of form \N{U+...}, pass it through if a pattern;
3084                  *      otherwise convert to utf8
3085                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3086                  *  pattern; otherwise convert to utf8 */
3087
3088                 /* Here, s points to the 'N'; the test below is guaranteed to
3089                  * succeed if we are being called on a pattern as we already
3090                  * know from a test above that the next character is a '{'.
3091                  * On a non-pattern \N must mean 'named sequence, which
3092                  * requires braces */
3093                 s++;
3094                 if (*s != '{') {
3095                     yyerror("Missing braces on \\N{}");
3096                     continue;
3097                 }
3098                 s++;
3099
3100                 /* If there is no matching '}', it is an error. */
3101                 if (! (e = strchr(s, '}'))) {
3102                     if (! PL_lex_inpat) {
3103                         yyerror("Missing right brace on \\N{}");
3104                     } else {
3105                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3106                     }
3107                     continue;
3108                 }
3109
3110                 /* Here it looks like a named character */
3111
3112                 if (PL_lex_inpat) {
3113
3114                     /* XXX This block is temporary code.  \N{} implies that the
3115                      * pattern is to have Unicode semantics, and therefore
3116                      * currently has to be encoded in utf8.  By putting it in
3117                      * utf8 now, we save a whole pass in the regular expression
3118                      * compiler.  Once that code is changed so Unicode
3119                      * semantics doesn't necessarily have to be in utf8, this
3120                      * block should be removed.  However, the code that parses
3121                      * the output of this would have to be changed to not
3122                      * necessarily expect utf8 */
3123                     if (!has_utf8) {
3124                         SvCUR_set(sv, d - SvPVX_const(sv));
3125                         SvPOK_on(sv);
3126                         *d = '\0';
3127                         /* See Note on sizing above.  */
3128                         sv_utf8_upgrade_flags_grow(sv,
3129                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3130                                         /* 5 = '\N{' + cur char + NUL */
3131                                         (STRLEN)(send - s) + 5);
3132                         d = SvPVX(sv) + SvCUR(sv);
3133                         has_utf8 = TRUE;
3134                     }
3135                 }
3136
3137                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3138                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3139                                 | PERL_SCAN_DISALLOW_PREFIX;
3140                     STRLEN len;
3141
3142                     /* For \N{U+...}, the '...' is a unicode value even on
3143                      * EBCDIC machines */
3144                     s += 2;         /* Skip to next char after the 'U+' */
3145                     len = e - s;
3146                     uv = grok_hex(s, &len, &flags, NULL);
3147                     if (len == 0 || len != (STRLEN)(e - s)) {
3148                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3149                         s = e + 1;
3150                         continue;
3151                     }
3152
3153                     if (PL_lex_inpat) {
3154
3155                         /* On non-EBCDIC platforms, pass through to the regex
3156                          * compiler unchanged.  The reason we evaluated the
3157                          * number above is to make sure there wasn't a syntax
3158                          * error.  But on EBCDIC we convert to native so
3159                          * downstream code can continue to assume it's native
3160                          */
3161                         s -= 5;     /* Include the '\N{U+' */
3162 #ifdef EBCDIC
3163                         d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
3164                                                                and the \0 */
3165                                     "\\N{U+%X}",
3166                                     (unsigned int) UNI_TO_NATIVE(uv));
3167 #else
3168                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3169                         d += e - s + 1;
3170 #endif
3171                     }
3172                     else {  /* Not a pattern: convert the hex to string */
3173
3174                          /* If destination is not in utf8, unconditionally
3175                           * recode it to be so.  This is because \N{} implies
3176                           * Unicode semantics, and scalars have to be in utf8
3177                           * to guarantee those semantics */
3178                         if (! has_utf8) {
3179                             SvCUR_set(sv, d - SvPVX_const(sv));
3180                             SvPOK_on(sv);
3181                             *d = '\0';
3182                             /* See Note on sizing above.  */
3183                             sv_utf8_upgrade_flags_grow(
3184                                         sv,
3185                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3186                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3187                             d = SvPVX(sv) + SvCUR(sv);
3188                             has_utf8 = TRUE;
3189                         }
3190
3191                         /* Add the string to the output */
3192                         if (UNI_IS_INVARIANT(uv)) {
3193                             *d++ = (char) uv;
3194                         }
3195                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3196                     }
3197                 }
3198                 else { /* Here is \N{NAME} but not \N{U+...}. */
3199
3200                     SV *res;            /* result from charnames */
3201                     const char *str;    /* the string in 'res' */
3202                     STRLEN len;         /* its length */
3203
3204                     /* Get the value for NAME */
3205                     res = newSVpvn(s, e - s);
3206                     res = new_constant( NULL, 0, "charnames",
3207                                         /* includes all of: \N{...} */
3208                                         res, NULL, s - 3, e - s + 4 );
3209
3210                     /* Most likely res will be in utf8 already since the
3211                      * standard charnames uses pack U, but a custom translator
3212                      * can leave it otherwise, so make sure.  XXX This can be
3213                      * revisited to not have charnames use utf8 for characters
3214                      * that don't need it when regexes don't have to be in utf8
3215                      * for Unicode semantics.  If doing so, remember EBCDIC */
3216                     sv_utf8_upgrade(res);
3217                     str = SvPV_const(res, len);
3218
3219                     /* Don't accept malformed input */
3220                     if (! is_utf8_string((U8 *) str, len)) {
3221                         yyerror("Malformed UTF-8 returned by \\N");
3222                     }
3223                     else if (PL_lex_inpat) {
3224
3225                         if (! len) { /* The name resolved to an empty string */
3226                             Copy("\\N{}", d, 4, char);
3227                             d += 4;
3228                         }
3229                         else {
3230                             /* In order to not lose information for the regex
3231                             * compiler, pass the result in the specially made
3232                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3233                             * the code points in hex of each character
3234                             * returned by charnames */
3235
3236                             const char *str_end = str + len;
3237                             STRLEN char_length;     /* cur char's byte length */
3238                             STRLEN output_length;   /* and the number of bytes
3239                                                        after this is translated
3240                                                        into hex digits */
3241                             const STRLEN off = d - SvPVX_const(sv);
3242
3243                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3244                              * max('U+', '.'); and 1 for NUL */
3245                             char hex_string[2 * UTF8_MAXBYTES + 5];
3246
3247                             /* Get the first character of the result. */
3248                             U32 uv = utf8n_to_uvuni((U8 *) str,
3249                                                     len,
3250                                                     &char_length,
3251                                                     UTF8_ALLOW_ANYUV);
3252
3253                             /* The call to is_utf8_string() above hopefully
3254                              * guarantees that there won't be an error.  But
3255                              * it's easy here to make sure.  The function just
3256                              * above warns and returns 0 if invalid utf8, but
3257                              * it can also return 0 if the input is validly a
3258                              * NUL. Disambiguate */
3259                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3260                                 uv = UNICODE_REPLACEMENT;
3261                             }
3262
3263                             /* Convert first code point to hex, including the
3264                              * boiler plate before it.  For all these, we
3265                              * convert to native format so that downstream code
3266                              * can continue to assume the input is native */
3267                             output_length =
3268                                 my_snprintf(hex_string, sizeof(hex_string),
3269                                             "\\N{U+%X",
3270                                             (unsigned int) UNI_TO_NATIVE(uv));
3271
3272                             /* Make sure there is enough space to hold it */
3273                             d = off + SvGROW(sv, off
3274                                                  + output_length
3275                                                  + (STRLEN)(send - e)
3276                                                  + 2);  /* '}' + NUL */
3277                             /* And output it */
3278                             Copy(hex_string, d, output_length, char);
3279                             d += output_length;
3280
3281                             /* For each subsequent character, append dot and
3282                              * its ordinal in hex */
3283                             while ((str += char_length) < str_end) {
3284                                 const STRLEN off = d - SvPVX_const(sv);
3285                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3286                                                         str_end - str,
3287                                                         &char_length,
3288                                                         UTF8_ALLOW_ANYUV);
3289                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3290                                     uv = UNICODE_REPLACEMENT;
3291                                 }
3292
3293                                 output_length =
3294                                     my_snprintf(hex_string, sizeof(hex_string),
3295                                             ".%X",
3296                                             (unsigned int) UNI_TO_NATIVE(uv));
3297
3298                                 d = off + SvGROW(sv, off
3299                                                      + output_length
3300                                                      + (STRLEN)(send - e)
3301                                                      + 2);      /* '}' +  NUL */
3302                                 Copy(hex_string, d, output_length, char);
3303                                 d += output_length;
3304                             }
3305
3306                             *d++ = '}'; /* Done.  Add the trailing brace */
3307                         }
3308                     }
3309                     else { /* Here, not in a pattern.  Convert the name to a
3310                             * string. */
3311
3312                          /* If destination is not in utf8, unconditionally
3313                           * recode it to be so.  This is because \N{} implies
3314                           * Unicode semantics, and scalars have to be in utf8
3315                           * to guarantee those semantics */
3316                         if (! has_utf8) {
3317                             SvCUR_set(sv, d - SvPVX_const(sv));
3318                             SvPOK_on(sv);
3319                             *d = '\0';
3320                             /* See Note on sizing above.  */
3321                             sv_utf8_upgrade_flags_grow(sv,
3322                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3323                                                 len + (STRLEN)(send - s) + 1);
3324                             d = SvPVX(sv) + SvCUR(sv);
3325                             has_utf8 = TRUE;
3326                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3327
3328                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3329                              * set correctly here). */
3330                             const STRLEN off = d - SvPVX_const(sv);
3331                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3332                         }
3333                         Copy(str, d, len, char);
3334                         d += len;
3335                     }
3336                     SvREFCNT_dec(res);
3337
3338                     /* Deprecate non-approved name syntax */
3339                     if (ckWARN_d(WARN_DEPRECATED)) {
3340                         bool problematic = FALSE;
3341                         char* i = s;
3342
3343                         /* For non-ut8 input, look to see that the first
3344                          * character is an alpha, then loop through the rest
3345                          * checking that each is a continuation */
3346                         if (! this_utf8) {
3347                             if (! isALPHAU(*i)) problematic = TRUE;
3348                             else for (i = s + 1; i < e; i++) {
3349                                 if (isCHARNAME_CONT(*i)) continue;
3350                                 problematic = TRUE;
3351                                 break;
3352                             }
3353                         }
3354                         else {
3355                             /* Similarly for utf8.  For invariants can check
3356                              * directly.  We accept anything above the latin1
3357                              * range because it is immaterial to Perl if it is
3358                              * correct or not, and is expensive to check.  But
3359                              * it is fairly easy in the latin1 range to convert
3360                              * the variants into a single character and check
3361                              * those */
3362                             if (UTF8_IS_INVARIANT(*i)) {
3363                                 if (! isALPHAU(*i)) problematic = TRUE;
3364                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3365                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3366                                                                             *(i+1)))))
3367                                 {
3368                                     problematic = TRUE;
3369                                 }
3370                             }
3371                             if (! problematic) for (i = s + UTF8SKIP(s);
3372                                                     i < e;
3373                                                     i+= UTF8SKIP(i))
3374                             {
3375                                 if (UTF8_IS_INVARIANT(*i)) {
3376                                     if (isCHARNAME_CONT(*i)) continue;
3377                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3378                                     continue;
3379                                 } else if (isCHARNAME_CONT(
3380                                             UNI_TO_NATIVE(
3381                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3382                                 {
3383                                     continue;
3384                                 }
3385                                 problematic = TRUE;
3386                                 break;
3387                             }
3388                         }
3389                         if (problematic) {
3390                             /* The e-i passed to the final %.*s makes sure that
3391                              * should the trailing NUL be missing that this
3392                              * print won't run off the end of the string */
3393                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3394                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3395                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3396                         }
3397                     }
3398                 } /* End \N{NAME} */
3399 #ifdef EBCDIC
3400                 if (!dorange)
3401                     native_range = FALSE; /* \N{} is defined to be Unicode */
3402 #endif
3403                 s = e + 1;  /* Point to just after the '}' */
3404                 continue;
3405
3406             /* \c is a control character */
3407             case 'c':
3408                 s++;
3409                 if (s < send) {
3410                     *d++ = grok_bslash_c(*s++, has_utf8, 1);
3411                 }
3412                 else {
3413                     yyerror("Missing control char name in \\c");
3414                 }
3415                 continue;
3416
3417             /* printf-style backslashes, formfeeds, newlines, etc */
3418             case 'b':
3419                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3420                 break;
3421             case 'n':
3422                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3423                 break;
3424             case 'r':
3425                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3426                 break;
3427             case 'f':
3428                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3429                 break;
3430             case 't':
3431                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3432                 break;
3433             case 'e':
3434                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3435                 break;
3436             case 'a':
3437                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3438                 break;
3439             } /* end switch */
3440
3441             s++;
3442             continue;
3443         } /* end if (backslash) */
3444 #ifdef EBCDIC
3445         else
3446             literal_endpoint++;
3447 #endif
3448
3449     default_action:
3450         /* If we started with encoded form, or already know we want it,
3451            then encode the next character */
3452         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3453             STRLEN len  = 1;
3454
3455
3456             /* One might think that it is wasted effort in the case of the
3457              * source being utf8 (this_utf8 == TRUE) to take the next character
3458              * in the source, convert it to an unsigned value, and then convert
3459              * it back again.  But the source has not been validated here.  The
3460              * routine that does the conversion checks for errors like
3461              * malformed utf8 */
3462
3463             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3464             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3465             if (!has_utf8) {
3466                 SvCUR_set(sv, d - SvPVX_const(sv));
3467                 SvPOK_on(sv);
3468                 *d = '\0';
3469                 /* See Note on sizing above.  */
3470                 sv_utf8_upgrade_flags_grow(sv,
3471                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3472                                         need + (STRLEN)(send - s) + 1);
3473                 d = SvPVX(sv) + SvCUR(sv);
3474                 has_utf8 = TRUE;
3475             } else if (need > len) {
3476                 /* encoded value larger than old, may need extra space (NOTE:
3477                  * SvCUR() is not set correctly here).   See Note on sizing
3478                  * above.  */
3479                 const STRLEN off = d - SvPVX_const(sv);
3480                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3481             }
3482             s += len;
3483
3484             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3485 #ifdef EBCDIC
3486             if (uv > 255 && !dorange)
3487                 native_range = FALSE;
3488 #endif
3489         }
3490         else {
3491             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3492         }
3493     } /* while loop to process each character */
3494
3495     /* terminate the string and set up the sv */
3496     *d = '\0';
3497     SvCUR_set(sv, d - SvPVX_const(sv));
3498     if (SvCUR(sv) >= SvLEN(sv))
3499         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3500
3501     SvPOK_on(sv);
3502     if (PL_encoding && !has_utf8) {
3503         sv_recode_to_utf8(sv, PL_encoding);
3504         if (SvUTF8(sv))
3505             has_utf8 = TRUE;
3506     }
3507     if (has_utf8) {
3508         SvUTF8_on(sv);
3509         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3510             PL_sublex_info.sub_op->op_private |=
3511                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3512         }
3513     }
3514
3515     /* shrink the sv if we allocated more than we used */
3516     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3517         SvPV_shrink_to_cur(sv);
3518     }
3519
3520     /* return the substring (via pl_yylval) only if we parsed anything */
3521     if (s > PL_bufptr) {
3522         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3523             const char *const key = PL_lex_inpat ? "qr" : "q";
3524             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3525             const char *type;
3526             STRLEN typelen;
3527
3528             if (PL_lex_inwhat == OP_TRANS) {
3529                 type = "tr";
3530                 typelen = 2;
3531             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3532                 type = "s";
3533                 typelen = 1;
3534             } else  {
3535                 type = "qq";
3536                 typelen = 2;
3537             }
3538
3539             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3540                                 type, typelen);
3541         }
3542         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3543     } else
3544         SvREFCNT_dec(sv);
3545     return s;
3546 }
3547
3548 /* S_intuit_more
3549  * Returns TRUE if there's more to the expression (e.g., a subscript),
3550  * FALSE otherwise.
3551  *
3552  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3553  *
3554  * ->[ and ->{ return TRUE
3555  * { and [ outside a pattern are always subscripts, so return TRUE
3556  * if we're outside a pattern and it's not { or [, then return FALSE
3557  * if we're in a pattern and the first char is a {
3558  *   {4,5} (any digits around the comma) returns FALSE
3559  * if we're in a pattern and the first char is a [
3560  *   [] returns FALSE
3561  *   [SOMETHING] has a funky algorithm to decide whether it's a
3562  *      character class or not.  It has to deal with things like
3563  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3564  * anything else returns TRUE
3565  */
3566
3567 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3568
3569 STATIC int
3570 S_intuit_more(pTHX_ register char *s)
3571 {
3572     dVAR;
3573
3574     PERL_ARGS_ASSERT_INTUIT_MORE;
3575
3576     if (PL_lex_brackets)
3577         return TRUE;
3578     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3579         return TRUE;
3580     if (*s != '{' && *s != '[')
3581         return FALSE;
3582     if (!PL_lex_inpat)
3583         return TRUE;
3584
3585     /* In a pattern, so maybe we have {n,m}. */
3586     if (*s == '{') {
3587         if (regcurly(s)) {
3588             return FALSE;
3589         }
3590         return TRUE;
3591     }
3592
3593     /* On the other hand, maybe we have a character class */
3594
3595     s++;
3596     if (*s == ']' || *s == '^')
3597         return FALSE;
3598     else {
3599         /* this is terrifying, and it works */
3600         int weight = 2;         /* let's weigh the evidence */
3601         char seen[256];
3602         unsigned char un_char = 255, last_un_char;
3603         const char * const send = strchr(s,']');
3604         char tmpbuf[sizeof PL_tokenbuf * 4];
3605
3606         if (!send)              /* has to be an expression */
3607             return TRUE;
3608
3609         Zero(seen,256,char);
3610         if (*s == '$')
3611             weight -= 3;
3612         else if (isDIGIT(*s)) {
3613             if (s[1] != ']') {
3614                 if (isDIGIT(s[1]) && s[2] == ']')
3615                     weight -= 10;
3616             }
3617             else
3618                 weight -= 100;
3619         }
3620         for (; s < send; s++) {
3621             last_un_char = un_char;
3622             un_char = (unsigned char)*s;
3623             switch (*s) {
3624             case '@':
3625             case '&':
3626             case '$':
3627                 weight -= seen[un_char] * 10;
3628                 if (isALNUM_lazy_if(s+1,UTF)) {
3629                     int len;
3630                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3631                     len = (int)strlen(tmpbuf);
3632                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3633                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
3634                         weight -= 100;
3635                     else
3636                         weight -= 10;
3637                 }
3638                 else if (*s == '$' && s[1] &&
3639                   strchr("[#!%*<>()-=",s[1])) {
3640                     if (/*{*/ strchr("])} =",s[2]))
3641                         weight -= 10;
3642                     else
3643                         weight -= 1;
3644                 }
3645                 break;
3646             case '\\':
3647                 un_char = 254;
3648                 if (s[1]) {
3649                     if (strchr("wds]",s[1]))
3650                         weight += 100;
3651                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3652                         weight += 1;
3653                     else if (strchr("rnftbxcav",s[1]))
3654                         weight += 40;
3655                     else if (isDIGIT(s[1])) {
3656                         weight += 40;
3657                         while (s[1] && isDIGIT(s[1]))
3658                             s++;
3659                     }
3660                 }
3661                 else
3662                     weight += 100;
3663                 break;
3664             case '-':
3665                 if (s[1] == '\\')
3666                     weight += 50;
3667                 if (strchr("aA01! ",last_un_char))
3668                     weight += 30;
3669                 if (strchr("zZ79~",s[1]))
3670                     weight += 30;
3671                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3672                     weight -= 5;        /* cope with negative subscript */
3673                 break;
3674             default:
3675                 if (!isALNUM(last_un_char)
3676                     && !(last_un_char == '$' || last_un_char == '@'
3677                          || last_un_char == '&')
3678                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3679                     char *d = tmpbuf;
3680                     while (isALPHA(*s))
3681                         *d++ = *s++;
3682                     *d = '\0';
3683                     if (keyword(tmpbuf, d - tmpbuf, 0))
3684                         weight -= 150;
3685                 }
3686                 if (un_char == last_un_char + 1)
3687                     weight += 5;
3688                 weight -= seen[un_char];
3689                 break;
3690             }
3691             seen[un_char]++;
3692         }
3693         if (weight >= 0)        /* probably a character class */
3694             return FALSE;
3695     }
3696
3697     return TRUE;
3698 }
3699
3700 /*
3701  * S_intuit_method
3702  *
3703  * Does all the checking to disambiguate
3704  *   foo bar
3705  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3706  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3707  *
3708  * First argument is the stuff after the first token, e.g. "bar".
3709  *
3710  * Not a method if bar is a filehandle.
3711  * Not a method if foo is a subroutine prototyped to take a filehandle.
3712  * Not a method if it's really "Foo $bar"
3713  * Method if it's "foo $bar"
3714  * Not a method if it's really "print foo $bar"
3715  * Method if it's really "foo package::" (interpreted as package->foo)
3716  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3717  * Not a method if bar is a filehandle or package, but is quoted with
3718  *   =>
3719  */
3720
3721 STATIC int
3722 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3723 {
3724     dVAR;
3725     char *s = start + (*start == '$');
3726     char tmpbuf[sizeof PL_tokenbuf];
3727     STRLEN len;
3728     GV* indirgv;
3729 #ifdef PERL_MAD
3730     int soff;
3731 #endif
3732
3733     PERL_ARGS_ASSERT_INTUIT_METHOD;
3734
3735     if (gv) {
3736         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3737             return 0;
3738         if (cv) {
3739             if (SvPOK(cv)) {
3740                 const char *proto = CvPROTO(cv);
3741                 if (proto) {
3742                     if (*proto == ';')
3743                         proto++;
3744                     if (*proto == '*')
3745                         return 0;
3746                 }
3747             }
3748         } else
3749             gv = NULL;
3750     }
3751     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3752     /* start is the beginning of the possible filehandle/object,
3753      * and s is the end of it
3754      * tmpbuf is a copy of it
3755      */
3756
3757     if (*start == '$') {
3758         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3759                 isUPPER(*PL_tokenbuf))
3760             return 0;
3761 #ifdef PERL_MAD
3762         len = start - SvPVX(PL_linestr);
3763 #endif
3764         s = PEEKSPACE(s);
3765 #ifdef PERL_MAD
3766         start = SvPVX(PL_linestr) + len;
3767 #endif
3768         PL_bufptr = start;
3769         PL_expect = XREF;
3770         return *s == '(' ? FUNCMETH : METHOD;
3771     }
3772     if (!keyword(tmpbuf, len, 0)) {
3773         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3774             len -= 2;
3775             tmpbuf[len] = '\0';
3776 #ifdef PERL_MAD
3777             soff = s - SvPVX(PL_linestr);
3778 #endif
3779             goto bare_package;
3780         }
3781         indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3782         if (indirgv && GvCVu(indirgv))
3783             return 0;
3784         /* filehandle or package name makes it a method */
3785         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3786 #ifdef PERL_MAD
3787             soff = s - SvPVX(PL_linestr);
3788 #endif
3789             s = PEEKSPACE(s);
3790             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3791                 return 0;       /* no assumptions -- "=>" quotes bareword */
3792       bare_package:
3793             start_force(PL_curforce);
3794             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3795                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3796             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3797             if (PL_madskills)
3798                 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3799                                                             ( UTF ? SVf_UTF8 : 0 )));
3800             PL_expect = XTERM;
3801             force_next(WORD);
3802             PL_bufptr = s;
3803 #ifdef PERL_MAD
3804             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3805 #endif
3806             return *s == '(' ? FUNCMETH : METHOD;
3807         }
3808     }
3809     return 0;
3810 }
3811
3812 /* Encoded script support. filter_add() effectively inserts a
3813  * 'pre-processing' function into the current source input stream.
3814  * Note that the filter function only applies to the current source file
3815  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3816  *
3817  * The datasv parameter (which may be NULL) can be used to pass
3818  * private data to this instance of the filter. The filter function
3819  * can recover the SV using the FILTER_DATA macro and use it to
3820  * store private buffers and state information.
3821  *
3822  * The supplied datasv parameter is upgraded to a PVIO type
3823  * and the IoDIRP/IoANY field is used to store the function pointer,
3824  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3825  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3826  * private use must be set using malloc'd pointers.
3827  */
3828
3829 SV *
3830 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3831 {
3832     dVAR;
3833     if (!funcp)
3834         return NULL;
3835
3836     if (!PL_parser)
3837         return NULL;
3838
3839     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3840         Perl_croak(aTHX_ "Source filters apply only to byte streams");
3841
3842     if (!PL_rsfp_filters)
3843         PL_rsfp_filters = newAV();
3844     if (!datasv)
3845         datasv = newSV(0);
3846     SvUPGRADE(datasv, SVt_PVIO);
3847     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3848     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3849     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3850                           FPTR2DPTR(void *, IoANY(datasv)),
3851                           SvPV_nolen(datasv)));
3852     av_unshift(PL_rsfp_filters, 1);
3853     av_store(PL_rsfp_filters, 0, datasv) ;
3854     if (
3855         !PL_parser->filtered
3856      && PL_parser->lex_flags & LEX_EVALBYTES
3857      && PL_bufptr < PL_bufend
3858     ) {
3859         const char *s = PL_bufptr;
3860         while (s < PL_bufend) {
3861             if (*s == '\n') {
3862                 SV *linestr = PL_parser->linestr;
3863                 char *buf = SvPVX(linestr);
3864                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3865                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3866                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3867                 STRLEN const linestart_pos = PL_parser->linestart - buf;
3868                 STRLEN const last_uni_pos =
3869                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3870                 STRLEN const last_lop_pos =
3871                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3872                 av_push(PL_rsfp_filters, linestr);
3873                 PL_parser->linestr =
3874                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3875                 buf = SvPVX(PL_parser->linestr);
3876                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3877                 PL_parser->bufptr = buf + bufptr_pos;
3878                 PL_parser->oldbufptr = buf + oldbufptr_pos;
3879                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3880                 PL_parser->linestart = buf + linestart_pos;
3881                 if (PL_parser->last_uni)
3882                     PL_parser->last_uni = buf + last_uni_pos;
3883                 if (PL_parser->last_lop)
3884                     PL_parser->last_lop = buf + last_lop_pos;
3885                 SvLEN(linestr) = SvCUR(linestr);
3886                 SvCUR(linestr) = s-SvPVX(linestr);
3887                 PL_parser->filtered = 1;
3888                 break;
3889             }
3890             s++;
3891         }
3892     }
3893     return(datasv);
3894 }
3895
3896
3897 /* Delete most recently added instance of this filter function. */
3898 void
3899 Perl_filter_del(pTHX_ filter_t funcp)
3900 {
3901     dVAR;
3902     SV *datasv;
3903
3904     PERL_ARGS_ASSERT_FILTER_DEL;
3905
3906 #ifdef DEBUGGING
3907     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3908                           FPTR2DPTR(void*, funcp)));
3909 #endif
3910     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3911         return;
3912     /* if filter is on top of stack (usual case) just pop it off */
3913     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3914     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3915         sv_free(av_pop(PL_rsfp_filters));
3916
3917         return;
3918     }
3919     /* we need to search for the correct entry and clear it     */
3920     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3921 }
3922
3923
3924 /* Invoke the idxth filter function for the current rsfp.        */
3925 /* maxlen 0 = read one text line */
3926 I32
3927 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3928 {
3929     dVAR;
3930     filter_t funcp;
3931     SV *datasv = NULL;
3932     /* This API is bad. It should have been using unsigned int for maxlen.
3933        Not sure if we want to change the API, but if not we should sanity
3934        check the value here.  */
3935     unsigned int correct_length
3936         = maxlen < 0 ?
3937 #ifdef PERL_MICRO
3938         0x7FFFFFFF
3939 #else
3940         INT_MAX
3941 #endif
3942         : maxlen;
3943
3944     PERL_ARGS_ASSERT_FILTER_READ;
3945
3946     if (!PL_parser || !PL_rsfp_filters)
3947         return -1;
3948     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3949         /* Provide a default input filter to make life easy.    */
3950         /* Note that we append to the line. This is handy.      */
3951         DEBUG_P(PerlIO_printf(Perl_debug_log,
3952                               "filter_read %d: from rsfp\n", idx));
3953         if (correct_length) {
3954             /* Want a block */
3955             int len ;
3956             const int old_len = SvCUR(buf_sv);
3957
3958             /* ensure buf_sv is large enough */
3959             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3960             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3961                                    correct_length)) <= 0) {
3962                 if (PerlIO_error(PL_rsfp))
3963                     return -1;          /* error */
3964                 else
3965                     return 0 ;          /* end of file */
3966             }
3967             SvCUR_set(buf_sv, old_len + len) ;
3968             SvPVX(buf_sv)[old_len + len] = '\0';
3969         } else {
3970             /* Want a line */
3971             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3972                 if (PerlIO_error(PL_rsfp))
3973                     return -1;          /* error */
3974                 else
3975                     return 0 ;          /* end of file */
3976             }
3977         }
3978         return SvCUR(buf_sv);
3979     }
3980     /* Skip this filter slot if filter has been deleted */
3981     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3982         DEBUG_P(PerlIO_printf(Perl_debug_log,
3983                               "filter_read %d: skipped (filter deleted)\n",
3984                               idx));
3985         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3986     }
3987     if (SvTYPE(datasv) != SVt_PVIO) {
3988         if (correct_length) {
3989             /* Want a block */
3990             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
3991             if (!remainder) return 0; /* eof */
3992             if (correct_length > remainder) correct_length = remainder;
3993             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
3994             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
3995         } else {
3996             /* Want a line */
3997             const char *s = SvEND(datasv);
3998             const char *send = SvPVX(datasv) + SvLEN(datasv);
3999             while (s < send) {
4000                 if (*s == '\n') {
4001                     s++;
4002                     break;
4003                 }
4004                 s++;
4005             }
4006             if (s == send) return 0; /* eof */
4007             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4008             SvCUR_set(datasv, s-SvPVX(datasv));
4009         }
4010         return SvCUR(buf_sv);
4011     }
4012     /* Get function pointer hidden within datasv        */
4013     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4014     DEBUG_P(PerlIO_printf(Perl_debug_log,
4015                           "filter_read %d: via function %p (%s)\n",
4016                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4017     /* Call function. The function is expected to       */
4018     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4019     /* Return: <0:error, =0:eof, >0:not eof             */
4020     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4021 }
4022
4023 STATIC char *
4024 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
4025 {
4026     dVAR;
4027
4028     PERL_ARGS_ASSERT_FILTER_GETS;
4029
4030 #ifdef PERL_CR_FILTER
4031     if (!PL_rsfp_filters) {
4032         filter_add(S_cr_textfilter,NULL);
4033     }
4034 #endif
4035     if (PL_rsfp_filters) {
4036         if (!append)
4037             SvCUR_set(sv, 0);   /* start with empty line        */
4038         if (FILTER_READ(0, sv, 0) > 0)
4039             return ( SvPVX(sv) ) ;
4040         else
4041             return NULL ;
4042     }
4043     else
4044         return (sv_gets(sv, PL_rsfp, append));
4045 }
4046
4047 STATIC HV *
4048 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4049 {
4050     dVAR;
4051     GV *gv;
4052
4053     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4054
4055     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4056         return PL_curstash;
4057
4058     if (len > 2 &&
4059         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4060         (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4061     {
4062         return GvHV(gv);                        /* Foo:: */
4063     }
4064
4065     /* use constant CLASS => 'MyClass' */
4066     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4067     if (gv && GvCV(gv)) {
4068         SV * const sv = cv_const_sv(GvCV(gv));
4069         if (sv)
4070             pkgname = SvPV_const(sv, len);
4071     }
4072
4073     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4074 }
4075
4076 /*
4077  * S_readpipe_override
4078  * Check whether readpipe() is overridden, and generates the appropriate
4079  * optree, provided sublex_start() is called afterwards.
4080  */
4081 STATIC void
4082 S_readpipe_override(pTHX)
4083 {
4084     GV **gvp;
4085     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4086     pl_yylval.ival = OP_BACKTICK;
4087     if ((gv_readpipe
4088                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4089             ||
4090             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4091              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4092              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4093     {
4094         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4095             op_append_elem(OP_LIST,
4096                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4097                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4098     }
4099 }
4100
4101 #ifdef PERL_MAD
4102  /*
4103  * Perl_madlex
4104  * The intent of this yylex wrapper is to minimize the changes to the
4105  * tokener when we aren't interested in collecting madprops.  It remains
4106  * to be seen how successful this strategy will be...
4107  */
4108
4109 int
4110 Perl_madlex(pTHX)
4111 {
4112     int optype;
4113     char *s = PL_bufptr;
4114
4115     /* make sure PL_thiswhite is initialized */
4116     PL_thiswhite = 0;
4117     PL_thismad = 0;
4118
4119     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4120     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4121         return S_pending_ident(aTHX);
4122
4123     /* previous token ate up our whitespace? */
4124     if (!PL_lasttoke && PL_nextwhite) {
4125         PL_thiswhite = PL_nextwhite;
4126         PL_nextwhite = 0;
4127     }
4128
4129     /* isolate the token, and figure out where it is without whitespace */
4130     PL_realtokenstart = -1;
4131     PL_thistoken = 0;
4132     optype = yylex();
4133     s = PL_bufptr;
4134     assert(PL_curforce < 0);
4135
4136     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4137         if (!PL_thistoken) {
4138             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4139                 PL_thistoken = newSVpvs("");
4140             else {
4141                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4142                 PL_thistoken = newSVpvn(tstart, s - tstart);
4143             }
4144         }
4145         if (PL_thismad) /* install head */
4146             CURMAD('X', PL_thistoken);
4147     }
4148
4149     /* last whitespace of a sublex? */
4150     if (optype == ')' && PL_endwhite) {
4151         CURMAD('X', PL_endwhite);
4152     }
4153
4154     if (!PL_thismad) {
4155
4156         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4157         if (!PL_thiswhite && !PL_endwhite && !optype) {
4158             sv_free(PL_thistoken);
4159             PL_thistoken = 0;
4160             return 0;
4161         }
4162
4163         /* put off final whitespace till peg */
4164         if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4165             PL_nextwhite = PL_thiswhite;
4166             PL_thiswhite = 0;
4167         }
4168         else if (PL_thisopen) {
4169             CURMAD('q', PL_thisopen);
4170             if (PL_thistoken)
4171                 sv_free(PL_thistoken);
4172             PL_thistoken = 0;
4173         }
4174         else {
4175             /* Store actual token text as madprop X */
4176             CURMAD('X', PL_thistoken);
4177         }
4178
4179         if (PL_thiswhite) {
4180             /* add preceding whitespace as madprop _ */
4181             CURMAD('_', PL_thiswhite);
4182         }
4183
4184         if (PL_thisstuff) {
4185             /* add quoted material as madprop = */
4186             CURMAD('=', PL_thisstuff);
4187         }
4188
4189         if (PL_thisclose) {
4190             /* add terminating quote as madprop Q */
4191             CURMAD('Q', PL_thisclose);
4192         }
4193     }
4194
4195     /* special processing based on optype */
4196
4197     switch (optype) {
4198
4199     /* opval doesn't need a TOKEN since it can already store mp */
4200     case WORD:
4201     case METHOD:
4202     case FUNCMETH:
4203     case THING:
4204     case PMFUNC:
4205     case PRIVATEREF:
4206     case FUNC0SUB:
4207     case UNIOPSUB:
4208     case LSTOPSUB:
4209         if (pl_yylval.opval)
4210             append_madprops(PL_thismad, pl_yylval.opval, 0);
4211         PL_thismad = 0;
4212         return optype;
4213
4214     /* fake EOF */
4215     case 0:
4216         optype = PEG;
4217         if (PL_endwhite) {
4218             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4219             PL_endwhite = 0;
4220         }
4221         break;
4222
4223     case ']':
4224     case '}':
4225         if (PL_faketokens)
4226             break;
4227         /* remember any fake bracket that lexer is about to discard */
4228         if (PL_lex_brackets == 1 &&
4229             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4230         {
4231             s = PL_bufptr;
4232             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4233                 s++;
4234             if (*s == '}') {
4235                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4236                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4237                 PL_thiswhite = 0;
4238                 PL_bufptr = s - 1;
4239                 break;  /* don't bother looking for trailing comment */
4240             }
4241             else
4242                 s = PL_bufptr;
4243         }
4244         if (optype == ']')
4245             break;
4246         /* FALLTHROUGH */
4247
4248     /* attach a trailing comment to its statement instead of next token */
4249     case ';':
4250         if (PL_faketokens)
4251             break;
4252         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4253             s = PL_bufptr;
4254             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4255                 s++;
4256             if (*s == '\n' || *s == '#') {
4257                 while (s < PL_bufend && *s != '\n')
4258                     s++;
4259                 if (s < PL_bufend)
4260                     s++;
4261                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4262                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4263                 PL_thiswhite = 0;
4264                 PL_bufptr = s;
4265             }
4266         }
4267         break;
4268
4269     /* pval */
4270     case LABEL:
4271         break;
4272
4273     /* ival */
4274     default:
4275         break;
4276
4277     }
4278
4279     /* Create new token struct.  Note: opvals return early above. */
4280     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4281     PL_thismad = 0;
4282     return optype;
4283 }
4284 #endif
4285
4286 STATIC char *
4287 S_tokenize_use(pTHX_ int is_use, char *s) {
4288     dVAR;
4289
4290     PERL_ARGS_ASSERT_TOKENIZE_USE;
4291
4292     if (PL_expect != XSTATE)
4293         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4294                     is_use ? "use" : "no"));
4295     s = SKIPSPACE1(s);
4296     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4297         s = force_version(s, TRUE);
4298         if (*s == ';' || *s == '}'
4299                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4300             start_force(PL_curforce);
4301             NEXTVAL_NEXTTOKE.opval = NULL;
4302             force_next(WORD);
4303         }
4304         else if (*s == 'v') {
4305             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4306             s = force_version(s, FALSE);
4307         }
4308     }
4309     else {
4310         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4311         s = force_version(s, FALSE);
4312     }
4313     pl_yylval.ival = is_use;
4314     return s;
4315 }
4316 #ifdef DEBUGGING
4317     static const char* const exp_name[] =
4318         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4319           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4320         };
4321 #endif
4322
4323 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4324 STATIC bool
4325 S_word_takes_any_delimeter(char *p, STRLEN len)
4326 {
4327     return (len == 1 && strchr("msyq", p[0])) ||
4328            (len == 2 && (
4329             (p[0] == 't' && p[1] == 'r') ||
4330             (p[0] == 'q' && strchr("qwxr", p[1]))));
4331 }
4332
4333 /*
4334   yylex
4335
4336   Works out what to call the token just pulled out of the input
4337   stream.  The yacc parser takes care of taking the ops we return and
4338   stitching them into a tree.
4339
4340   Returns:
4341     PRIVATEREF
4342
4343   Structure:
4344       if read an identifier
4345           if we're in a my declaration
4346               croak if they tried to say my($foo::bar)
4347               build the ops for a my() declaration
4348           if it's an access to a my() variable
4349               are we in a sort block?
4350                   croak if my($a); $a <=> $b
4351               build ops for access to a my() variable
4352           if in a dq string, and they've said @foo and we can't find @foo
4353               croak
4354           build ops for a bareword
4355       if we already built the token before, use it.
4356 */
4357
4358
4359 #ifdef __SC__
4360 #pragma segment Perl_yylex
4361 #endif
4362 int
4363 Perl_yylex(pTHX)
4364 {
4365     dVAR;
4366     register char *s = PL_bufptr;
4367     register char *d;
4368     STRLEN len;
4369     bool bof = FALSE;
4370     U32 fake_eof = 0;
4371
4372     /* orig_keyword, gvp, and gv are initialized here because
4373      * jump to the label just_a_word_zero can bypass their
4374      * initialization later. */
4375     I32 orig_keyword = 0;
4376     GV *gv = NULL;
4377     GV **gvp = NULL;
4378
4379     DEBUG_T( {
4380         SV* tmp = newSVpvs("");
4381         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4382             (IV)CopLINE(PL_curcop),
4383             lex_state_names[PL_lex_state],
4384             exp_name[PL_expect],
4385             pv_display(tmp, s, strlen(s), 0, 60));
4386         SvREFCNT_dec(tmp);
4387     } );
4388     /* check if there's an identifier for us to look at */
4389     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4390         return REPORT(S_pending_ident(aTHX));
4391
4392     /* no identifier pending identification */
4393
4394     switch (PL_lex_state) {
4395 #ifdef COMMENTARY
4396     case LEX_NORMAL:            /* Some compilers will produce faster */
4397     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4398         break;
4399 #endif
4400
4401     /* when we've already built the next token, just pull it out of the queue */
4402     case LEX_KNOWNEXT:
4403 #ifdef PERL_MAD
4404         PL_lasttoke--;
4405         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4406         if (PL_madskills) {
4407             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4408             PL_nexttoke[PL_lasttoke].next_mad = 0;
4409             if (PL_thismad && PL_thismad->mad_key == '_') {
4410                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4411                 PL_thismad->mad_val = 0;
4412                 mad_free(PL_thismad);
4413                 PL_thismad = 0;
4414             }
4415         }
4416         if (!PL_lasttoke) {
4417             PL_lex_state = PL_lex_defer;
4418             PL_expect = PL_lex_expect;
4419             PL_lex_defer = LEX_NORMAL;
4420             if (!PL_nexttoke[PL_lasttoke].next_type)
4421                 return yylex();
4422         }
4423 #else
4424         PL_nexttoke--;
4425         pl_yylval = PL_nextval[PL_nexttoke];
4426         if (!PL_nexttoke) {
4427             PL_lex_state = PL_lex_defer;
4428             PL_expect = PL_lex_expect;
4429             PL_lex_defer = LEX_NORMAL;
4430         }
4431 #endif
4432         {
4433             I32 next_type;
4434 #ifdef PERL_MAD
4435             next_type = PL_nexttoke[PL_lasttoke].next_type;
4436 #else
4437             next_type = PL_nexttype[PL_nexttoke];
4438 #endif
4439             if (next_type & (7<<24)) {
4440                 if (next_type & (1<<24)) {
4441                     if (PL_lex_brackets > 100)
4442                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4443                     PL_lex_brackstack[PL_lex_brackets++] =
4444                         (char) ((next_type >> 16) & 0xff);
4445                 }
4446                 if (next_type & (2<<24))
4447                     PL_lex_allbrackets++;
4448                 if (next_type & (4<<24))
4449                     PL_lex_allbrackets--;
4450                 next_type &= 0xffff;
4451             }
4452 #ifdef PERL_MAD
4453             /* FIXME - can these be merged?  */
4454             return next_type;
4455 #else
4456             return REPORT(next_type);
4457 #endif
4458         }
4459
4460     /* interpolated case modifiers like \L \U, including \Q and \E.
4461        when we get here, PL_bufptr is at the \
4462     */
4463     case LEX_INTERPCASEMOD:
4464 #ifdef DEBUGGING
4465         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4466             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4467 #endif
4468         /* handle \E or end of string */
4469         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4470             /* if at a \E */
4471             if (PL_lex_casemods) {
4472                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4473                 PL_lex_casestack[PL_lex_casemods] = '\0';
4474
4475                 if (PL_bufptr != PL_bufend
4476                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4477                     PL_bufptr += 2;
4478                     PL_lex_state = LEX_INTERPCONCAT;
4479 #ifdef PERL_MAD
4480                     if (PL_madskills)
4481                         PL_thistoken = newSVpvs("\\E");
4482 #endif
4483                 }
4484                 PL_lex_allbrackets--;
4485                 return REPORT(')');
4486             }
4487 #ifdef PERL_MAD
4488             while (PL_bufptr != PL_bufend &&
4489               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4490                 if (!PL_thiswhite)
4491                     PL_thiswhite = newSVpvs("");
4492                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4493                 PL_bufptr += 2;
4494             }
4495 #else
4496             if (PL_bufptr != PL_bufend)
4497                 PL_bufptr += 2;
4498 #endif
4499             PL_lex_state = LEX_INTERPCONCAT;
4500             return yylex();
4501         }
4502         else {
4503             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4504               "### Saw case modifier\n"); });
4505             s = PL_bufptr + 1;
4506             if (s[1] == '\\' && s[2] == 'E') {
4507 #ifdef PERL_MAD
4508                 if (!PL_thiswhite)
4509                     PL_thiswhite = newSVpvs("");
4510                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4511 #endif
4512                 PL_bufptr = s + 3;
4513                 PL_lex_state = LEX_INTERPCONCAT;
4514                 return yylex();
4515             }
4516             else {
4517                 I32 tmp;
4518                 if (!PL_madskills) /* when just compiling don't need correct */
4519                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4520                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4521                 if ((*s == 'L' || *s == 'U') &&
4522                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4523                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4524                     PL_lex_allbrackets--;
4525                     return REPORT(')');
4526                 }
4527                 if (PL_lex_casemods > 10)
4528                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4529                 PL_lex_casestack[PL_lex_casemods++] = *s;
4530                 PL_lex_casestack[PL_lex_casemods] = '\0';
4531                 PL_lex_state = LEX_INTERPCONCAT;
4532                 start_force(PL_curforce);
4533                 NEXTVAL_NEXTTOKE.ival = 0;
4534                 force_next((2<<24)|'(');
4535                 start_force(PL_curforce);
4536                 if (*s == 'l')
4537                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4538                 else if (*s == 'u')
4539                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4540                 else if (*s == 'L')
4541                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4542                 else if (*s == 'U')
4543                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4544                 else if (*s == 'Q')
4545                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4546                 else
4547                     Perl_croak(aTHX_ "panic: yylex");
4548                 if (PL_madskills) {
4549                     SV* const tmpsv = newSVpvs("\\ ");
4550                     /* replace the space with the character we want to escape
4551                      */
4552                     SvPVX(tmpsv)[1] = *s;
4553                     curmad('_', tmpsv);
4554                 }
4555                 PL_bufptr = s + 1;
4556             }
4557             force_next(FUNC);
4558             if (PL_lex_starts) {
4559                 s = PL_bufptr;
4560                 PL_lex_starts = 0;
4561 #ifdef PERL_MAD
4562                 if (PL_madskills) {
4563                     if (PL_thistoken)
4564                         sv_free(PL_thistoken);
4565                     PL_thistoken = newSVpvs("");
4566                 }
4567 #endif
4568                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4569                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4570                     OPERATOR(',');
4571                 else
4572                     Aop(OP_CONCAT);
4573             }
4574             else
4575                 return yylex();
4576         }
4577
4578     case LEX_INTERPPUSH:
4579         return REPORT(sublex_push());
4580
4581     case LEX_INTERPSTART:
4582         if (PL_bufptr == PL_bufend)
4583             return REPORT(sublex_done());
4584         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4585               "### Interpolated variable\n"); });
4586         PL_expect = XTERM;
4587         PL_lex_dojoin = (*PL_bufptr == '@');
4588         PL_lex_state = LEX_INTERPNORMAL;
4589         if (PL_lex_dojoin) {
4590             start_force(PL_curforce);
4591             NEXTVAL_NEXTTOKE.ival = 0;
4592             force_next(',');
4593             start_force(PL_curforce);
4594             force_ident("\"", '$');
4595             start_force(PL_curforce);
4596             NEXTVAL_NEXTTOKE.ival = 0;
4597             force_next('$');
4598             start_force(PL_curforce);
4599             NEXTVAL_NEXTTOKE.ival = 0;
4600             force_next((2<<24)|'(');
4601             start_force(PL_curforce);
4602             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4603             force_next(FUNC);
4604         }
4605         if (PL_lex_starts++) {
4606             s = PL_bufptr;
4607 #ifdef PERL_MAD
4608             if (PL_madskills) {
4609                 if (PL_thistoken)
4610                     sv_free(PL_thistoken);
4611                 PL_thistoken = newSVpvs("");
4612             }
4613 #endif
4614             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4615             if (!PL_lex_casemods && PL_lex_inpat)
4616                 OPERATOR(',');
4617             else
4618                 Aop(OP_CONCAT);
4619         }
4620         return yylex();
4621
4622     case LEX_INTERPENDMAYBE:
4623         if (intuit_more(PL_bufptr)) {
4624             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4625             break;
4626         }
4627         /* FALL THROUGH */
4628
4629     case LEX_INTERPEND:
4630         if (PL_lex_dojoin) {
4631             PL_lex_dojoin = FALSE;
4632             PL_lex_state = LEX_INTERPCONCAT;
4633 #ifdef PERL_MAD
4634             if (PL_madskills) {
4635                 if (PL_thistoken)
4636                     sv_free(PL_thistoken);
4637                 PL_thistoken = newSVpvs("");
4638             }
4639 #endif
4640             PL_lex_allbrackets--;
4641             return REPORT(')');
4642         }
4643         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4644             && SvEVALED(PL_lex_repl))
4645         {
4646             if (PL_bufptr != PL_bufend)
4647                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4648             PL_lex_repl = NULL;
4649         }
4650         /* FALLTHROUGH */
4651     case LEX_INTERPCONCAT:
4652 #ifdef DEBUGGING
4653         if (PL_lex_brackets)
4654             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4655 #endif
4656         if (PL_bufptr == PL_bufend)
4657             return REPORT(sublex_done());
4658
4659         if (SvIVX(PL_linestr) == '\'') {
4660             SV *sv = newSVsv(PL_linestr);
4661             if (!PL_lex_inpat)
4662                 sv = tokeq(sv);
4663             else if ( PL_hints & HINT_NEW_RE )
4664                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4665             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4666             s = PL_bufend;
4667         }
4668         else {
4669             s = scan_const(PL_bufptr);
4670             if (*s == '\\')
4671                 PL_lex_state = LEX_INTERPCASEMOD;
4672             else
4673                 PL_lex_state = LEX_INTERPSTART;
4674         }
4675
4676         if (s != PL_bufptr) {
4677             start_force(PL_curforce);
4678             if (PL_madskills) {
4679                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4680             }
4681             NEXTVAL_NEXTTOKE = pl_yylval;
4682             PL_expect = XTERM;
4683             force_next(THING);
4684             if (PL_lex_starts++) {
4685 #ifdef PERL_MAD
4686                 if (PL_madskills) {
4687                     if (PL_thistoken)
4688                         sv_free(PL_thistoken);
4689                     PL_thistoken = newSVpvs("");
4690                 }
4691 #endif
4692                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4693                 if (!PL_lex_casemods && PL_lex_inpat)
4694                     OPERATOR(',');
4695                 else
4696                     Aop(OP_CONCAT);
4697             }
4698             else {
4699                 PL_bufptr = s;
4700                 return yylex();
4701             }
4702         }
4703
4704         return yylex();
4705     case LEX_FORMLINE:
4706         PL_lex_state = LEX_NORMAL;
4707         s = scan_formline(PL_bufptr);
4708         if (!PL_lex_formbrack)
4709             goto rightbracket;
4710         OPERATOR(';');
4711     }
4712
4713     s = PL_bufptr;
4714     PL_oldoldbufptr = PL_oldbufptr;
4715     PL_oldbufptr = s;
4716
4717   retry:
4718 #ifdef PERL_MAD
4719     if (PL_thistoken) {
4720         sv_free(PL_thistoken);
4721         PL_thistoken = 0;
4722     }
4723     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4724 #endif
4725     switch (*s) {
4726     default:
4727         if (isIDFIRST_lazy_if(s,UTF))
4728             goto keylookup;
4729         {
4730         unsigned char c = *s;
4731         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4732         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4733             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4734         } else {
4735             d = PL_linestart;
4736         }
4737         *s = '\0';
4738         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4739     }
4740     case 4:
4741     case 26:
4742         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4743     case 0:
4744 #ifdef PERL_MAD
4745         if (PL_madskills)
4746             PL_faketokens = 0;
4747 #endif
4748         if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4749             PL_last_uni = 0;
4750             PL_last_lop = 0;
4751             if (PL_lex_brackets &&
4752                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4753                 yyerror((const char *)
4754                         (PL_lex_formbrack
4755                          ? "Format not terminated"
4756                          : "Missing right curly or square bracket"));
4757             }
4758             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4759                         "### Tokener got EOF\n");
4760             } );
4761             TOKEN(0);
4762         }
4763         if (s++ < PL_bufend)
4764             goto retry;                 /* ignore stray nulls */
4765         PL_last_uni = 0;
4766         PL_last_lop = 0;
4767         if (!PL_in_eval && !PL_preambled) {
4768             PL_preambled = TRUE;
4769 #ifdef PERL_MAD
4770             if (PL_madskills)
4771                 PL_faketokens = 1;
4772 #endif
4773             if (PL_perldb) {
4774                 /* Generate a string of Perl code to load the debugger.
4775                  * If PERL5DB is set, it will return the contents of that,
4776                  * otherwise a compile-time require of perl5db.pl.  */
4777
4778                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4779
4780                 if (pdb) {
4781                     sv_setpv(PL_linestr, pdb);
4782                     sv_catpvs(PL_linestr,";");
4783                 } else {
4784                     SETERRNO(0,SS_NORMAL);
4785                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4786                 }
4787             } else
4788                 sv_setpvs(PL_linestr,"");
4789             if (PL_preambleav) {
4790                 SV **svp = AvARRAY(PL_preambleav);
4791                 SV **const end = svp + AvFILLp(PL_preambleav);
4792                 while(svp <= end) {
4793                     sv_catsv(PL_linestr, *svp);
4794                     ++svp;
4795                     sv_catpvs(PL_linestr, ";");
4796                 }
4797                 sv_free(MUTABLE_SV(PL_preambleav));
4798                 PL_preambleav = NULL;
4799             }
4800             if (PL_minus_E)
4801                 sv_catpvs(PL_linestr,
4802                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4803             if (PL_minus_n || PL_minus_p) {
4804                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4805                 if (PL_minus_l)
4806                     sv_catpvs(PL_linestr,"chomp;");
4807                 if (PL_minus_a) {
4808                     if (PL_minus_F) {
4809                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4810                              || *PL_splitstr == '"')
4811                               && strchr(PL_splitstr + 1, *PL_splitstr))
4812                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4813                         else {
4814                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4815                                bytes can be used as quoting characters.  :-) */
4816                             const char *splits = PL_splitstr;
4817                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4818                             do {
4819                                 /* Need to \ \s  */
4820                                 if (*splits == '\\')
4821                                     sv_catpvn(PL_linestr, splits, 1);
4822                                 sv_catpvn(PL_linestr, splits, 1);
4823                             } while (*splits++);
4824                             /* This loop will embed the trailing NUL of
4825                                PL_linestr as the last thing it does before
4826                                terminating.  */
4827                             sv_catpvs(PL_linestr, ");");
4828                         }
4829                     }
4830                     else
4831                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4832                 }
4833             }
4834             sv_catpvs(PL_linestr, "\n");
4835             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4836             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4837             PL_last_lop = PL_last_uni = NULL;
4838             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4839                 update_debugger_info(PL_linestr, NULL, 0);
4840             goto retry;
4841         }
4842         do {
4843             fake_eof = 0;
4844             bof = PL_rsfp ? TRUE : FALSE;
4845             if (0) {
4846               fake_eof:
4847                 fake_eof = LEX_FAKE_EOF;
4848             }
4849             PL_bufptr = PL_bufend;
4850             CopLINE_inc(PL_curcop);
4851             if (!lex_next_chunk(fake_eof)) {
4852                 CopLINE_dec(PL_curcop);
4853                 s = PL_bufptr;
4854                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4855             }
4856             CopLINE_dec(PL_curcop);
4857 #ifdef PERL_MAD
4858             if (!PL_rsfp)
4859                 PL_realtokenstart = -1;
4860 #endif
4861             s = PL_bufptr;
4862             /* If it looks like the start of a BOM or raw UTF-16,
4863              * check if it in fact is. */
4864             if (bof && PL_rsfp &&
4865                      (*s == 0 ||
4866                       *(U8*)s == 0xEF ||
4867                       *(U8*)s >= 0xFE ||
4868                       s[1] == 0)) {
4869                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4870                 bof = (offset == (Off_t)SvCUR(PL_linestr));
4871 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4872                 /* offset may include swallowed CR */
4873                 if (!bof)
4874                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4875 #endif
4876                 if (bof) {
4877                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4878                     s = swallow_bom((U8*)s);
4879                 }
4880             }
4881             if (PL_parser->in_pod) {
4882                 /* Incest with pod. */
4883 #ifdef PERL_MAD
4884                 if (PL_madskills)
4885                     sv_catsv(PL_thiswhite, PL_linestr);
4886 #endif
4887                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4888                     sv_setpvs(PL_linestr, "");
4889                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4890                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4891                     PL_last_lop = PL_last_uni = NULL;
4892                     PL_parser->in_pod = 0;
4893                 }
4894             }
4895             if (PL_rsfp || PL_parser->filtered)
4896                 incline(s);
4897         } while (PL_parser->in_pod);
4898         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4899         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4900         PL_last_lop = PL_last_uni = NULL;
4901         if (CopLINE(PL_curcop) == 1) {
4902             while (s < PL_bufend && isSPACE(*s))
4903                 s++;
4904             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4905                 s++;
4906 #ifdef PERL_MAD
4907             if (PL_madskills)
4908                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4909 #endif
4910             d = NULL;
4911             if (!PL_in_eval) {
4912                 if (*s == '#' && *(s+1) == '!')
4913                     d = s + 2;
4914 #ifdef ALTERNATE_SHEBANG
4915                 else {
4916                     static char const as[] = ALTERNATE_SHEBANG;
4917                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4918                         d = s + (sizeof(as) - 1);
4919                 }
4920 #endif /* ALTERNATE_SHEBANG */
4921             }
4922             if (d) {
4923                 char *ipath;
4924                 char *ipathend;
4925
4926                 while (isSPACE(*d))
4927                     d++;
4928                 ipath = d;
4929                 while (*d && !isSPACE(*d))
4930                     d++;
4931                 ipathend = d;
4932
4933 #ifdef ARG_ZERO_IS_SCRIPT
4934                 if (ipathend > ipath) {
4935                     /*
4936                      * HP-UX (at least) sets argv[0] to the script name,
4937                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4938                      * at least, set argv[0] to the basename of the Perl
4939                      * interpreter. So, having found "#!", we'll set it right.
4940                      */
4941                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4942                                                     SVt_PV)); /* $^X */
4943                     assert(SvPOK(x) || SvGMAGICAL(x));
4944                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4945                         sv_setpvn(x, ipath, ipathend - ipath);
4946                         SvSETMAGIC(x);
4947                     }
4948                     else {
4949                         STRLEN blen;
4950                         STRLEN llen;
4951                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4952                         const char * const lstart = SvPV_const(x,llen);
4953                         if (llen < blen) {
4954                             bstart += blen - llen;
4955                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4956                                 sv_setpvn(x, ipath, ipathend - ipath);
4957                                 SvSETMAGIC(x);
4958                             }
4959                         }
4960                     }
4961                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4962                 }
4963 #endif /* ARG_ZERO_IS_SCRIPT */
4964
4965                 /*
4966                  * Look for options.
4967                  */
4968                 d = instr(s,"perl -");
4969                 if (!d) {
4970                     d = instr(s,"perl");
4971 #if defined(DOSISH)
4972                     /* avoid getting into infinite loops when shebang
4973                      * line contains "Perl" rather than "perl" */
4974                     if (!d) {
4975                         for (d = ipathend-4; d >= ipath; --d) {
4976                             if ((*d == 'p' || *d == 'P')
4977                                 && !ibcmp(d, "perl", 4))
4978                             {
4979                                 break;
4980                             }
4981                         }
4982                         if (d < ipath)
4983                             d = NULL;
4984                     }
4985 #endif
4986                 }
4987 #ifdef ALTERNATE_SHEBANG
4988                 /*
4989                  * If the ALTERNATE_SHEBANG on this system starts with a
4990                  * character that can be part of a Perl expression, then if
4991                  * we see it but not "perl", we're probably looking at the
4992                  * start of Perl code, not a request to hand off to some
4993                  * other interpreter.  Similarly, if "perl" is there, but
4994                  * not in the first 'word' of the line, we assume the line
4995                  * contains the start of the Perl program.
4996                  */
4997                 if (d && *s != '#') {
4998                     const char *c = ipath;
4999                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5000                         c++;
5001                     if (c < d)
5002                         d = NULL;       /* "perl" not in first word; ignore */
5003                     else
5004                         *s = '#';       /* Don't try to parse shebang line */
5005                 }
5006 #endif /* ALTERNATE_SHEBANG */
5007                 if (!d &&
5008                     *s == '#' &&
5009                     ipathend > ipath &&
5010                     !PL_minus_c &&
5011                     !instr(s,"indir") &&
5012                     instr(PL_origargv[0],"perl"))
5013                 {
5014                     dVAR;
5015                     char **newargv;
5016
5017                     *ipathend = '\0';
5018                     s = ipathend + 1;
5019                     while (s < PL_bufend && isSPACE(*s))
5020                         s++;
5021                     if (s < PL_bufend) {
5022                         Newx(newargv,PL_origargc+3,char*);
5023                         newargv[1] = s;
5024                         while (s < PL_bufend && !isSPACE(*s))
5025                             s++;
5026                         *s = '\0';
5027                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5028                     }
5029                     else
5030                         newargv = PL_origargv;
5031                     newargv[0] = ipath;
5032                     PERL_FPU_PRE_EXEC
5033                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5034                     PERL_FPU_POST_EXEC
5035                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5036                 }
5037                 if (d) {
5038                     while (*d && !isSPACE(*d))
5039                         d++;
5040                     while (SPACE_OR_TAB(*d))
5041                         d++;
5042
5043                     if (*d++ == '-') {
5044                         const bool switches_done = PL_doswitches;
5045                         const U32 oldpdb = PL_perldb;
5046                         const bool oldn = PL_minus_n;
5047                         const bool oldp = PL_minus_p;
5048                         const char *d1 = d;
5049
5050                         do {
5051                             bool baduni = FALSE;
5052                             if (*d1 == 'C') {
5053                                 const char *d2 = d1 + 1;
5054                                 if (parse_unicode_opts((const char **)&d2)
5055                                     != PL_unicode)
5056                                     baduni = TRUE;
5057                             }
5058                             if (baduni || *d1 == 'M' || *d1 == 'm') {
5059                                 const char * const m = d1;
5060                                 while (*d1 && !isSPACE(*d1))
5061                                     d1++;
5062                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5063                                       (int)(d1 - m), m);
5064                             }
5065                             d1 = moreswitches(d1);
5066                         } while (d1);
5067                         if (PL_doswitches && !switches_done) {
5068                             int argc = PL_origargc;
5069                             char **argv = PL_origargv;
5070                             do {
5071                                 argc--,argv++;
5072                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5073                             init_argv_symbols(argc,argv);
5074                         }
5075                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5076                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5077                               /* if we have already added "LINE: while (<>) {",
5078                                  we must not do it again */
5079                         {
5080                             sv_setpvs(PL_linestr, "");
5081                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5082                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5083                             PL_last_lop = PL_last_uni = NULL;
5084                             PL_preambled = FALSE;
5085                             if (PERLDB_LINE || PERLDB_SAVESRC)
5086                                 (void)gv_fetchfile(PL_origfilename);
5087                             goto retry;
5088                         }
5089                     }
5090                 }
5091             }
5092         }
5093         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5094             PL_bufptr = s;
5095             PL_lex_state = LEX_FORMLINE;
5096             return yylex();
5097         }
5098         goto retry;
5099     case '\r':
5100 #ifdef PERL_STRICT_CR
5101         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5102         Perl_croak(aTHX_
5103       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5104 #endif
5105     case ' ': case '\t': case '\f': case 013:
5106 #ifdef PERL_MAD
5107         PL_realtokenstart = -1;
5108         if (!PL_thiswhite)
5109             PL_thiswhite = newSVpvs("");
5110         sv_catpvn(PL_thiswhite, s, 1);
5111 #endif
5112         s++;
5113         goto retry;
5114     case '#':
5115     case '\n':
5116 #ifdef PERL_MAD
5117         PL_realtokenstart = -1;
5118         if (PL_madskills)
5119             PL_faketokens = 0;
5120 #endif
5121         if (PL_lex_state != LEX_NORMAL ||
5122              (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5123             if (*s == '#' && s == PL_linestart && PL_in_eval
5124              && !PL_rsfp && !PL_parser->filtered) {
5125                 /* handle eval qq[#line 1 "foo"\n ...] */
5126                 CopLINE_dec(PL_curcop);
5127                 incline(s);
5128             }
5129             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5130                 s = SKIPSPACE0(s);
5131                 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5132                     incline(s);
5133             }
5134             else {
5135                 d = s;
5136                 while (d < PL_bufend && *d != '\n')
5137                     d++;
5138                 if (d < PL_bufend)
5139                     d++;
5140                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5141                   Perl_croak(aTHX_ "panic: input overflow");
5142 #ifdef PERL_MAD
5143                 if (PL_madskills)
5144                     PL_thiswhite = newSVpvn(s, d - s);
5145 #endif
5146                 s = d;
5147                 incline(s);
5148             }
5149             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5150                 PL_bufptr = s;
5151                 PL_lex_state = LEX_FORMLINE;
5152                 return yylex();
5153             }
5154         }
5155         else {
5156 #ifdef PERL_MAD
5157             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5158                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5159                     PL_faketokens = 0;
5160                     s = SKIPSPACE0(s);
5161                     TOKEN(PEG); /* make sure any #! line is accessible */
5162                 }
5163                 s = SKIPSPACE0(s);
5164             }
5165             else {
5166 /*              if (PL_madskills && PL_lex_formbrack) { */
5167                     d = s;
5168                     while (d < PL_bufend && *d != '\n')
5169                         d++;
5170                     if (d < PL_bufend)
5171                         d++;
5172                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5173                       Perl_croak(aTHX_ "panic: input overflow");
5174                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5175                         if (!PL_thiswhite)
5176                             PL_thiswhite = newSVpvs("");
5177                         if (CopLINE(PL_curcop) == 1) {
5178                             sv_setpvs(PL_thiswhite, "");
5179                             PL_faketokens = 0;
5180                         }
5181                         sv_catpvn(PL_thiswhite, s, d - s);
5182                     }
5183                     s = d;
5184 /*              }
5185                 *s = '\0';
5186                 PL_bufend = s; */
5187             }
5188 #else
5189             *s = '\0';
5190             PL_bufend = s;
5191 #endif
5192         }
5193         goto retry;
5194     case '-':
5195         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5196             I32 ftst = 0;
5197             char tmp;
5198
5199             s++;
5200             PL_bufptr = s;
5201             tmp = *s++;
5202
5203             while (s < PL_bufend && SPACE_OR_TAB(*s))
5204                 s++;
5205
5206             if (strnEQ(s,"=>",2)) {
5207                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5208                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5209                 OPERATOR('-');          /* unary minus */
5210             }
5211             PL_last_uni = PL_oldbufptr;
5212             switch (tmp) {
5213             case 'r': ftst = OP_FTEREAD;        break;
5214             case 'w': ftst = OP_FTEWRITE;       break;
5215             case 'x': ftst = OP_FTEEXEC;        break;
5216             case 'o': ftst = OP_FTEOWNED;       break;
5217             case 'R': ftst = OP_FTRREAD;        break;
5218             case 'W': ftst = OP_FTRWRITE;       break;
5219             case 'X': ftst = OP_FTREXEC;        break;
5220             case 'O': ftst = OP_FTROWNED;       break;
5221             case 'e': ftst = OP_FTIS;           break;
5222             case 'z': ftst = OP_FTZERO;         break;
5223             case 's': ftst = OP_FTSIZE;         break;
5224             case 'f': ftst = OP_FTFILE;         break;
5225             case 'd': ftst = OP_FTDIR;          break;
5226             case 'l': ftst = OP_FTLINK;         break;
5227             case 'p': ftst = OP_FTPIPE;         break;
5228             case 'S': ftst = OP_FTSOCK;         break;
5229             case 'u': ftst = OP_FTSUID;         break;
5230             case 'g': ftst = OP_FTSGID;         break;
5231             case 'k': ftst = OP_FTSVTX;         break;
5232             case 'b': ftst = OP_FTBLK;          break;
5233             case 'c': ftst = OP_FTCHR;          break;
5234             case 't': ftst = OP_FTTTY;          break;
5235             case 'T': ftst = OP_FTTEXT;         break;
5236             case 'B': ftst = OP_FTBINARY;       break;
5237             case 'M': case 'A': case 'C':
5238                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5239                 switch (tmp) {
5240                 case 'M': ftst = OP_FTMTIME;    break;
5241                 case 'A': ftst = OP_FTATIME;    break;
5242                 case 'C': ftst = OP_FTCTIME;    break;
5243                 default:                        break;
5244                 }
5245                 break;
5246             default:
5247                 break;
5248             }
5249             if (ftst) {
5250                 PL_last_lop_op = (OPCODE)ftst;
5251                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5252                         "### Saw file test %c\n", (int)tmp);
5253                 } );
5254                 FTST(ftst);
5255             }
5256             else {
5257                 /* Assume it was a minus followed by a one-letter named
5258                  * subroutine call (or a -bareword), then. */
5259                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5260                         "### '-%c' looked like a file test but was not\n",
5261                         (int) tmp);
5262                 } );
5263                 s = --PL_bufptr;
5264             }
5265         }
5266         {
5267             const char tmp = *s++;
5268             if (*s == tmp) {
5269                 s++;
5270                 if (PL_expect == XOPERATOR)
5271                     TERM(POSTDEC);
5272                 else
5273                     OPERATOR(PREDEC);
5274             }
5275             else if (*s == '>') {
5276                 s++;
5277                 s = SKIPSPACE1(s);
5278                 if (isIDFIRST_lazy_if(s,UTF)) {
5279                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5280                     TOKEN(ARROW);
5281                 }
5282                 else if (*s == '$')
5283                     OPERATOR(ARROW);
5284                 else
5285                     TERM(ARROW);
5286             }
5287             if (PL_expect == XOPERATOR) {
5288                 if (*s == '=' && !PL_lex_allbrackets &&
5289                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5290                     s--;
5291                     TOKEN(0);
5292                 }
5293                 Aop(OP_SUBTRACT);
5294             }
5295             else {
5296                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5297                     check_uni();
5298                 OPERATOR('-');          /* unary minus */
5299             }
5300         }
5301
5302     case '+':
5303         {
5304             const char tmp = *s++;
5305             if (*s == tmp) {
5306                 s++;
5307                 if (PL_expect == XOPERATOR)
5308                     TERM(POSTINC);
5309                 else
5310                     OPERATOR(PREINC);
5311             }
5312             if (PL_expect == XOPERATOR) {
5313                 if (*s == '=' && !PL_lex_allbrackets &&
5314                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5315                     s--;
5316                     TOKEN(0);
5317                 }
5318                 Aop(OP_ADD);
5319             }
5320             else {
5321                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5322                     check_uni();
5323                 OPERATOR('+');
5324             }
5325         }
5326
5327     case '*':
5328         if (PL_expect != XOPERATOR) {
5329             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5330             PL_expect = XOPERATOR;
5331             force_ident(PL_tokenbuf, '*');
5332             if (!*PL_tokenbuf)
5333                 PREREF('*');
5334             TERM('*');
5335         }
5336         s++;
5337         if (*s == '*') {
5338             s++;
5339             if (*s == '=' && !PL_lex_allbrackets &&
5340                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5341                 s -= 2;
5342                 TOKEN(0);
5343             }
5344             PWop(OP_POW);
5345         }
5346         if (*s == '=' && !PL_lex_allbrackets &&
5347                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5348             s--;
5349             TOKEN(0);
5350         }
5351         Mop(OP_MULTIPLY);
5352
5353     case '%':
5354         if (PL_expect == XOPERATOR) {
5355             if (s[1] == '=' && !PL_lex_allbrackets &&
5356                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5357                 TOKEN(0);
5358             ++s;
5359             Mop(OP_MODULO);
5360         }
5361         PL_tokenbuf[0] = '%';
5362         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5363                 sizeof PL_tokenbuf - 1, FALSE);
5364         if (!PL_tokenbuf[1]) {
5365             PREREF('%');
5366         }
5367         PL_pending_ident = '%';
5368         TERM('%');
5369
5370     case '^':
5371         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5372                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5373             TOKEN(0);
5374         s++;
5375         BOop(OP_BIT_XOR);
5376     case '[':
5377         if (PL_lex_brackets > 100)
5378             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5379         PL_lex_brackstack[PL_lex_brackets++] = 0;
5380         PL_lex_allbrackets++;
5381         {
5382             const char tmp = *s++;
5383             OPERATOR(tmp);
5384         }
5385     case '~':
5386         if (s[1] == '~'
5387             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5388         {
5389             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5390                 TOKEN(0);
5391             s += 2;
5392             Eop(OP_SMARTMATCH);
5393         }
5394         s++;
5395         OPERATOR('~');
5396     case ',':
5397         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5398             TOKEN(0);
5399         s++;
5400         OPERATOR(',');
5401     case ':':
5402         if (s[1] == ':') {
5403             len = 0;
5404             goto just_a_word_zero_gv;
5405         }
5406         s++;
5407         switch (PL_expect) {
5408             OP *attrs;
5409 #ifdef PERL_MAD
5410             I32 stuffstart;
5411 #endif
5412         case XOPERATOR:
5413             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5414                 break;
5415             PL_bufptr = s;      /* update in case we back off */
5416             if (*s == '=') {
5417                 Perl_croak(aTHX_
5418                            "Use of := for an empty attribute list is not allowed");
5419             }
5420             goto grabattrs;
5421         case XATTRBLOCK:
5422             PL_expect = XBLOCK;
5423             goto grabattrs;
5424         case XATTRTERM:
5425             PL_expect = XTERMBLOCK;
5426          grabattrs:
5427 #ifdef PERL_MAD
5428             stuffstart = s - SvPVX(PL_linestr) - 1;
5429 #endif
5430             s = PEEKSPACE(s);
5431             attrs = NULL;
5432             while (isIDFIRST_lazy_if(s,UTF)) {
5433                 I32 tmp;
5434                 SV *sv;
5435                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5436                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5437                     if (tmp < 0) tmp = -tmp;
5438                     switch (tmp) {
5439                     case KEY_or:
5440                     case KEY_and:
5441                     case KEY_for:
5442                     case KEY_foreach:
5443                     case KEY_unless:
5444                     case KEY_if:
5445                     case KEY_while:
5446                     case KEY_until:
5447                         goto got_attrs;
5448                     default:
5449                         break;
5450                     }
5451                 }
5452                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5453                 if (*d == '(') {
5454                     d = scan_str(d,TRUE,TRUE);
5455                     if (!d) {
5456                         /* MUST advance bufptr here to avoid bogus
5457                            "at end of line" context messages from yyerror().
5458                          */
5459                         PL_bufptr = s + len;
5460                         yyerror("Unterminated attribute parameter in attribute list");
5461                         if (attrs)
5462                             op_free(attrs);
5463                         sv_free(sv);
5464                         return REPORT(0);       /* EOF indicator */
5465                     }
5466                 }
5467                 if (PL_lex_stuff) {
5468                     sv_catsv(sv, PL_lex_stuff);
5469                     attrs = op_append_elem(OP_LIST, attrs,
5470                                         newSVOP(OP_CONST, 0, sv));
5471                     SvREFCNT_dec(PL_lex_stuff);
5472                     PL_lex_stuff = NULL;
5473                 }
5474                 else {
5475                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5476                         sv_free(sv);
5477                         if (PL_in_my == KEY_our) {
5478                             deprecate(":unique");
5479                         }
5480                         else
5481                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5482                     }
5483
5484                     /* NOTE: any CV attrs applied here need to be part of
5485                        the CVf_BUILTIN_ATTRS define in cv.h! */
5486                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5487                         sv_free(sv);
5488                         CvLVALUE_on(PL_compcv);
5489                     }
5490                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5491                         sv_free(sv);
5492                         deprecate(":locked");
5493                     }
5494                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5495                         sv_free(sv);
5496                         CvMETHOD_on(PL_compcv);
5497                     }
5498                     /* After we've set the flags, it could be argued that
5499                        we don't need to do the attributes.pm-based setting
5500                        process, and shouldn't bother appending recognized
5501                        flags.  To experiment with that, uncomment the
5502                        following "else".  (Note that's already been
5503                        uncommented.  That keeps the above-applied built-in
5504                        attributes from being intercepted (and possibly
5505                        rejected) by a package's attribute routines, but is
5506                        justified by the performance win for the common case
5507                        of applying only built-in attributes.) */
5508                     else
5509                         attrs = op_append_elem(OP_LIST, attrs,
5510                                             newSVOP(OP_CONST, 0,
5511                                                     sv));
5512                 }
5513                 s = PEEKSPACE(d);
5514                 if (*s == ':' && s[1] != ':')
5515                     s = PEEKSPACE(s+1);
5516                 else if (s == d)
5517                     break;      /* require real whitespace or :'s */
5518                 /* XXX losing whitespace on sequential attributes here */
5519             }
5520             {
5521                 const char tmp
5522                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5523                 if (*s != ';' && *s != '}' && *s != tmp
5524                     && (tmp != '=' || *s != ')')) {
5525                     const char q = ((*s == '\'') ? '"' : '\'');
5526                     /* If here for an expression, and parsed no attrs, back
5527                        off. */
5528                     if (tmp == '=' && !attrs) {
5529                         s = PL_bufptr;
5530                         break;
5531                     }
5532                     /* MUST advance bufptr here to avoid bogus "at end of line"
5533                        context messages from yyerror().
5534                     */
5535                     PL_bufptr = s;
5536                     yyerror( (const char *)
5537                              (*s
5538                               ? Perl_form(aTHX_ "Invalid separator character "
5539                                           "%c%c%c in attribute list", q, *s, q)
5540                               : "Unterminated attribute list" ) );
5541                     if (attrs)
5542                         op_free(attrs);
5543                     OPERATOR(':');
5544                 }
5545             }
5546         got_attrs:
5547             if (attrs) {
5548                 start_force(PL_curforce);
5549                 NEXTVAL_NEXTTOKE.opval = attrs;
5550                 CURMAD('_', PL_nextwhite);
5551                 force_next(THING);
5552             }
5553 #ifdef PERL_MAD
5554             if (PL_madskills) {
5555                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5556                                      (s - SvPVX(PL_linestr)) - stuffstart);
5557             }
5558 #endif
5559             TOKEN(COLONATTR);
5560         }
5561         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5562             s--;
5563             TOKEN(0);
5564         }
5565         PL_lex_allbrackets--;
5566         OPERATOR(':');
5567     case '(':
5568         s++;
5569         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5570             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5571         else
5572             PL_expect = XTERM;
5573         s = SKIPSPACE1(s);
5574         PL_lex_allbrackets++;
5575         TOKEN('(');
5576     case ';':
5577         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5578             TOKEN(0);
5579         CLINE;
5580         s++;
5581         OPERATOR(';');
5582     case ')':
5583         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5584             TOKEN(0);
5585         s++;
5586         PL_lex_allbrackets--;
5587         s = SKIPSPACE1(s);
5588         if (*s == '{')
5589             PREBLOCK(')');
5590         TERM(')');
5591     case ']':
5592         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5593             TOKEN(0);
5594         s++;
5595         if (PL_lex_brackets <= 0)
5596             yyerror("Unmatched right square bracket");
5597         else
5598             --PL_lex_brackets;
5599         PL_lex_allbrackets--;
5600         if (PL_lex_state == LEX_INTERPNORMAL) {
5601             if (PL_lex_brackets == 0) {
5602                 if (*s == '-' && s[1] == '>')
5603                     PL_lex_state = LEX_INTERPENDMAYBE;
5604                 else if (*s != '[' && *s != '{')
5605                     PL_lex_state = LEX_INTERPEND;
5606             }
5607         }
5608         TERM(']');
5609     case '{':
5610       leftbracket:
5611         s++;
5612         if (PL_lex_brackets > 100) {
5613             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5614         }
5615         switch (PL_expect) {
5616         case XTERM:
5617             if (PL_lex_formbrack) {
5618                 s--;
5619                 PRETERMBLOCK(DO);
5620             }
5621             if (PL_oldoldbufptr == PL_last_lop)
5622                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5623             else
5624                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5625             PL_lex_allbrackets++;
5626             OPERATOR(HASHBRACK);
5627         case XOPERATOR:
5628             while (s < PL_bufend && SPACE_OR_TAB(*s))
5629                 s++;
5630             d = s;
5631             PL_tokenbuf[0] = '\0';
5632             if (d < PL_bufend && *d == '-') {
5633                 PL_tokenbuf[0] = '-';
5634                 d++;
5635                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5636                     d++;
5637             }
5638             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5639                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5640                               FALSE, &len);
5641                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5642                     d++;
5643                 if (*d == '}') {
5644                     const char minus = (PL_tokenbuf[0] == '-');
5645                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5646                     if (minus)
5647                         force_next('-');
5648                 }
5649             }
5650             /* FALL THROUGH */
5651         case XATTRBLOCK:
5652         case XBLOCK:
5653             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5654             PL_lex_allbrackets++;
5655             PL_expect = XSTATE;
5656             break;
5657         case XATTRTERM:
5658         case XTERMBLOCK:
5659             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5660             PL_lex_allbrackets++;
5661             PL_expect = XSTATE;
5662             break;
5663         default: {
5664                 const char *t;
5665                 if (PL_oldoldbufptr == PL_last_lop)
5666                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5667                 else
5668                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5669                 PL_lex_allbrackets++;
5670                 s = SKIPSPACE1(s);
5671                 if (*s == '}') {
5672                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5673                         PL_expect = XTERM;
5674                         /* This hack is to get the ${} in the message. */
5675                         PL_bufptr = s+1;
5676                         yyerror("syntax error");
5677                         break;
5678                     }
5679                     OPERATOR(HASHBRACK);
5680                 }
5681                 /* This hack serves to disambiguate a pair of curlies
5682                  * as being a block or an anon hash.  Normally, expectation
5683                  * determines that, but in cases where we're not in a
5684                  * position to expect anything in particular (like inside
5685                  * eval"") we have to resolve the ambiguity.  This code
5686                  * covers the case where the first term in the curlies is a
5687                  * quoted string.  Most other cases need to be explicitly
5688                  * disambiguated by prepending a "+" before the opening
5689                  * curly in order to force resolution as an anon hash.
5690                  *
5691                  * XXX should probably propagate the outer expectation
5692                  * into eval"" to rely less on this hack, but that could
5693                  * potentially break current behavior of eval"".
5694                  * GSAR 97-07-21
5695                  */
5696                 t = s;
5697                 if (*s == '\'' || *s == '"' || *s == '`') {
5698                     /* common case: get past first string, handling escapes */
5699                     for (t++; t < PL_bufend && *t != *s;)
5700                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5701                             t++;
5702                     t++;
5703                 }
5704                 else if (*s == 'q') {
5705                     if (++t < PL_bufend
5706                         && (!isALNUM(*t)
5707                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5708                                 && !isALNUM(*t))))
5709                     {
5710                         /* skip q//-like construct */
5711                         const char *tmps;
5712                         char open, close, term;
5713                         I32 brackets = 1;
5714
5715                         while (t < PL_bufend && isSPACE(*t))
5716                             t++;
5717                         /* check for q => */
5718                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5719                             OPERATOR(HASHBRACK);
5720                         }
5721                         term = *t;
5722                         open = term;
5723                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5724                             term = tmps[5];
5725                         close = term;
5726                         if (open == close)
5727                             for (t++; t < PL_bufend; t++) {
5728                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5729                                     t++;
5730                                 else if (*t == open)
5731                                     break;
5732                             }
5733                         else {
5734                             for (t++; t < PL_bufend; t++) {
5735                                 if (*t == '\\' && t+1 < PL_bufend)
5736                                     t++;
5737                                 else if (*t == close && --brackets <= 0)
5738                                     break;
5739                                 else if (*t == open)
5740                                     brackets++;
5741                             }
5742                         }
5743                         t++;
5744                     }
5745                     else
5746                         /* skip plain q word */
5747                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5748                              t += UTF8SKIP(t);
5749                 }
5750                 else if (isALNUM_lazy_if(t,UTF)) {
5751                     t += UTF8SKIP(t);
5752                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5753                          t += UTF8SKIP(t);
5754                 }
5755                 while (t < PL_bufend && isSPACE(*t))
5756                     t++;
5757                 /* if comma follows first term, call it an anon hash */
5758                 /* XXX it could be a comma expression with loop modifiers */
5759                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5760                                    || (*t == '=' && t[1] == '>')))
5761                     OPERATOR(HASHBRACK);
5762                 if (PL_expect == XREF)
5763                     PL_expect = XTERM;
5764                 else {
5765                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5766                     PL_expect = XSTATE;
5767                 }
5768             }
5769             break;
5770         }
5771         pl_yylval.ival = CopLINE(PL_curcop);
5772         if (isSPACE(*s) || *s == '#')
5773             PL_copline = NOLINE;   /* invalidate current command line number */
5774         TOKEN('{');
5775     case '}':
5776         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5777             TOKEN(0);
5778       rightbracket:
5779         s++;
5780         if (PL_lex_brackets <= 0)
5781             yyerror("Unmatched right curly bracket");
5782         else
5783             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5784         PL_lex_allbrackets--;
5785         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5786             PL_lex_formbrack = 0;
5787         if (PL_lex_state == LEX_INTERPNORMAL) {
5788             if (PL_lex_brackets == 0) {
5789                 if (PL_expect & XFAKEBRACK) {
5790                     PL_expect &= XENUMMASK;
5791                     PL_lex_state = LEX_INTERPEND;
5792                     PL_bufptr = s;
5793 #if 0
5794                     if (PL_madskills) {
5795                         if (!PL_thiswhite)
5796                             PL_thiswhite = newSVpvs("");
5797                         sv_catpvs(PL_thiswhite,"}");
5798                     }
5799 #endif
5800                     return yylex();     /* ignore fake brackets */
5801                 }
5802                 if (*s == '-' && s[1] == '>')
5803                     PL_lex_state = LEX_INTERPENDMAYBE;
5804                 else if (*s != '[' && *s != '{')
5805                     PL_lex_state = LEX_INTERPEND;
5806             }
5807         }
5808         if (PL_expect & XFAKEBRACK) {
5809             PL_expect &= XENUMMASK;
5810             PL_bufptr = s;
5811             return yylex();             /* ignore fake brackets */
5812         }
5813         start_force(PL_curforce);
5814         if (PL_madskills) {
5815             curmad('X', newSVpvn(s-1,1));
5816             CURMAD('_', PL_thiswhite);
5817         }
5818         force_next('}');
5819 #ifdef PERL_MAD
5820         if (!PL_thistoken)
5821             PL_thistoken = newSVpvs("");
5822 #endif
5823         TOKEN(';');
5824     case '&':
5825         s++;
5826         if (*s++ == '&') {
5827             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5828                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5829                 s -= 2;
5830                 TOKEN(0);
5831             }
5832             AOPERATOR(ANDAND);
5833         }
5834         s--;
5835         if (PL_expect == XOPERATOR) {
5836             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5837                 && isIDFIRST_lazy_if(s,UTF))
5838             {
5839                 CopLINE_dec(PL_curcop);
5840                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5841                 CopLINE_inc(PL_curcop);
5842             }
5843             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5844                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5845                 s--;
5846                 TOKEN(0);
5847             }
5848             BAop(OP_BIT_AND);
5849         }
5850
5851         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5852         if (*PL_tokenbuf) {
5853             PL_expect = XOPERATOR;
5854             force_ident(PL_tokenbuf, '&');
5855         }
5856         else
5857             PREREF('&');
5858         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5859         TERM('&');
5860
5861     case '|':
5862         s++;
5863         if (*s++ == '|') {
5864             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5865                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5866                 s -= 2;
5867                 TOKEN(0);
5868             }
5869             AOPERATOR(OROR);
5870         }
5871         s--;
5872         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5873                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5874             s--;
5875             TOKEN(0);
5876         }
5877         BOop(OP_BIT_OR);
5878     case '=':
5879         s++;
5880         {
5881             const char tmp = *s++;
5882             if (tmp == '=') {
5883                 if (!PL_lex_allbrackets &&
5884                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5885                     s -= 2;
5886                     TOKEN(0);
5887                 }
5888                 Eop(OP_EQ);
5889             }
5890             if (tmp == '>') {
5891                 if (!PL_lex_allbrackets &&
5892                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5893                     s -= 2;
5894                     TOKEN(0);
5895                 }
5896                 OPERATOR(',');
5897             }
5898             if (tmp == '~')
5899                 PMop(OP_MATCH);
5900             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5901                 && strchr("+-*/%.^&|<",tmp))
5902                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5903                             "Reversed %c= operator",(int)tmp);
5904             s--;
5905             if (PL_expect == XSTATE && isALPHA(tmp) &&
5906                 (s == PL_linestart+1 || s[-2] == '\n') )
5907                 {
5908                     if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
5909                         d = PL_bufend;
5910                         while (s < d) {
5911                             if (*s++ == '\n') {
5912                                 incline(s);
5913                                 if (strnEQ(s,"=cut",4)) {
5914                                     s = strchr(s,'\n');
5915                                     if (s)
5916                                         s++;
5917                                     else
5918                                         s = d;
5919                                     incline(s);
5920                                     goto retry;
5921                                 }
5922                             }
5923                         }
5924                         goto retry;
5925                     }
5926 #ifdef PERL_MAD
5927                     if (PL_madskills) {
5928                         if (!PL_thiswhite)
5929                             PL_thiswhite = newSVpvs("");
5930                         sv_catpvn(PL_thiswhite, PL_linestart,
5931                                   PL_bufend - PL_linestart);
5932                     }
5933 #endif
5934                     s = PL_bufend;
5935                     PL_parser->in_pod = 1;
5936                     goto retry;
5937                 }
5938         }
5939         if (PL_lex_brackets < PL_lex_formbrack) {
5940             const char *t = s;
5941 #ifdef PERL_STRICT_CR
5942             while (SPACE_OR_TAB(*t))
5943 #else
5944             while (SPACE_OR_TAB(*t) || *t == '\r')
5945 #endif
5946                 t++;
5947             if (*t == '\n' || *t == '#') {
5948                 s--;
5949                 PL_expect = XBLOCK;
5950                 goto leftbracket;
5951             }
5952         }
5953         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5954             s--;
5955             TOKEN(0);
5956         }
5957         pl_yylval.ival = 0;
5958         OPERATOR(ASSIGNOP);
5959     case '!':
5960         s++;
5961         {
5962             const char tmp = *s++;
5963             if (tmp == '=') {
5964                 /* was this !=~ where !~ was meant?
5965                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5966
5967                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5968                     const char *t = s+1;
5969
5970                     while (t < PL_bufend && isSPACE(*t))
5971                         ++t;
5972
5973                     if (*t == '/' || *t == '?' ||
5974                         ((*t == 'm' || *t == 's' || *t == 'y')
5975                          && !isALNUM(t[1])) ||
5976                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5977                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5978                                     "!=~ should be !~");
5979                 }
5980                 if (!PL_lex_allbrackets &&
5981                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5982                     s -= 2;
5983                     TOKEN(0);
5984                 }
5985                 Eop(OP_NE);
5986             }
5987             if (tmp == '~')
5988                 PMop(OP_NOT);
5989         }
5990         s--;
5991         OPERATOR('!');
5992     case '<':
5993         if (PL_expect != XOPERATOR) {
5994             if (s[1] != '<' && !strchr(s,'>'))
5995                 check_uni();
5996             if (s[1] == '<')
5997                 s = scan_heredoc(s);
5998             else
5999                 s = scan_inputsymbol(s);
6000             TERM(sublex_start());
6001         }
6002         s++;
6003         {
6004             char tmp = *s++;
6005             if (tmp == '<') {
6006                 if (*s == '=' && !PL_lex_allbrackets &&
6007                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6008                     s -= 2;
6009                     TOKEN(0);
6010                 }
6011                 SHop(OP_LEFT_SHIFT);
6012             }
6013             if (tmp == '=') {
6014                 tmp = *s++;
6015                 if (tmp == '>') {
6016                     if (!PL_lex_allbrackets &&
6017                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6018                         s -= 3;
6019                         TOKEN(0);
6020                     }
6021                     Eop(OP_NCMP);
6022                 }
6023                 s--;
6024                 if (!PL_lex_allbrackets &&
6025                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6026                     s -= 2;
6027                     TOKEN(0);
6028                 }
6029                 Rop(OP_LE);
6030             }
6031         }
6032         s--;
6033         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6034             s--;
6035             TOKEN(0);
6036         }
6037         Rop(OP_LT);
6038     case '>':
6039         s++;
6040         {
6041             const char tmp = *s++;
6042             if (tmp == '>') {
6043                 if (*s == '=' && !PL_lex_allbrackets &&
6044                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6045                     s -= 2;
6046                     TOKEN(0);
6047                 }
6048                 SHop(OP_RIGHT_SHIFT);
6049             }
6050             else if (tmp == '=') {
6051                 if (!PL_lex_allbrackets &&
6052                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6053                     s -= 2;
6054                     TOKEN(0);
6055                 }
6056                 Rop(OP_GE);
6057             }
6058         }
6059         s--;
6060         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6061             s--;
6062             TOKEN(0);
6063         }
6064         Rop(OP_GT);
6065
6066     case '$':
6067         CLINE;
6068
6069         if (PL_expect == XOPERATOR) {
6070             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6071                 return deprecate_commaless_var_list();
6072             }
6073         }
6074
6075         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6076             PL_tokenbuf[0] = '@';
6077             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6078                            sizeof PL_tokenbuf - 1, FALSE);
6079             if (PL_expect == XOPERATOR)
6080                 no_op("Array length", s);
6081             if (!PL_tokenbuf[1])
6082                 PREREF(DOLSHARP);
6083             PL_expect = XOPERATOR;
6084             PL_pending_ident = '#';
6085             TOKEN(DOLSHARP);
6086         }
6087
6088         PL_tokenbuf[0] = '$';
6089         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6090                        sizeof PL_tokenbuf - 1, FALSE);
6091         if (PL_expect == XOPERATOR)
6092             no_op("Scalar", s);
6093         if (!PL_tokenbuf[1]) {
6094             if (s == PL_bufend)
6095                 yyerror("Final $ should be \\$ or $name");
6096             PREREF('$');
6097         }
6098
6099         d = s;
6100         {
6101             const char tmp = *s;
6102             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6103                 s = SKIPSPACE1(s);
6104
6105             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6106                 && intuit_more(s)) {
6107                 if (*s == '[') {
6108                     PL_tokenbuf[0] = '@';
6109                     if (ckWARN(WARN_SYNTAX)) {
6110                         char *t = s+1;
6111
6112                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6113                             t++;
6114                         if (*t++ == ',') {
6115                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6116                             while (t < PL_bufend && *t != ']')
6117                                 t++;
6118                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6119                                         "Multidimensional syntax %.*s not supported",
6120                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6121                         }
6122                     }
6123                 }
6124                 else if (*s == '{') {
6125                     char *t;
6126                     PL_tokenbuf[0] = '%';
6127                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6128                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6129                         {
6130                             char tmpbuf[sizeof PL_tokenbuf];
6131                             do {
6132                                 t++;
6133                             } while (isSPACE(*t));
6134                             if (isIDFIRST_lazy_if(t,UTF)) {
6135                                 STRLEN len;
6136                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6137                                               &len);
6138                                 while (isSPACE(*t))
6139                                     t++;
6140                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
6141                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6142                                                 "You need to quote \"%s\"",
6143                                                 tmpbuf);
6144                             }
6145                         }
6146                 }
6147             }
6148
6149             PL_expect = XOPERATOR;
6150             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6151                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6152                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6153                     PL_expect = XOPERATOR;
6154                 else if (strchr("$@\"'`q", *s))
6155                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6156                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6157                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6158                 else if (isIDFIRST_lazy_if(s,UTF)) {
6159                     char tmpbuf[sizeof PL_tokenbuf];
6160                     int t2;
6161                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6162                     if ((t2 = keyword(tmpbuf, len, 0))) {
6163                         /* binary operators exclude handle interpretations */
6164                         switch (t2) {
6165                         case -KEY_x:
6166                         case -KEY_eq:
6167                         case -KEY_ne:
6168                         case -KEY_gt:
6169                         case -KEY_lt:
6170                         case -KEY_ge:
6171                         case -KEY_le:
6172                         case -KEY_cmp:
6173                             break;
6174                         default:
6175                             PL_expect = XTERM;  /* e.g. print $fh length() */
6176                             break;
6177                         }
6178                     }
6179                     else {
6180                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6181                     }
6182                 }
6183                 else if (isDIGIT(*s))
6184                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6185                 else if (*s == '.' && isDIGIT(s[1]))
6186                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6187                 else if ((*s == '?' || *s == '-' || *s == '+')
6188                          && !isSPACE(s[1]) && s[1] != '=')
6189                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6190                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6191                          && s[1] != '/')
6192                     PL_expect = XTERM;          /* e.g. print $fh /.../
6193                                                    XXX except DORDOR operator
6194                                                 */
6195                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6196                          && s[2] != '=')
6197                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6198             }
6199         }
6200         PL_pending_ident = '$';
6201         TOKEN('$');
6202
6203     case '@':
6204         if (PL_expect == XOPERATOR)
6205             no_op("Array", s);
6206         PL_tokenbuf[0] = '@';
6207         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6208         if (!PL_tokenbuf[1]) {
6209             PREREF('@');
6210         }
6211         if (PL_lex_state == LEX_NORMAL)
6212             s = SKIPSPACE1(s);
6213         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6214             if (*s == '{')
6215                 PL_tokenbuf[0] = '%';
6216
6217             /* Warn about @ where they meant $. */
6218             if (*s == '[' || *s == '{') {
6219                 if (ckWARN(WARN_SYNTAX)) {
6220                     const char *t = s + 1;
6221                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6222                         t++;
6223                     if (*t == '}' || *t == ']') {
6224                         t++;
6225                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6226                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6227                             "Scalar value %.*s better written as $%.*s",
6228                             (int)(t-PL_bufptr), PL_bufptr,
6229                             (int)(t-PL_bufptr-1), PL_bufptr+1);
6230                     }
6231                 }
6232             }
6233         }
6234         PL_pending_ident = '@';
6235         TERM('@');
6236
6237      case '/':                  /* may be division, defined-or, or pattern */
6238         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6239             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6240                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6241                 TOKEN(0);
6242             s += 2;
6243             AOPERATOR(DORDOR);
6244         }
6245      case '?':                  /* may either be conditional or pattern */
6246         if (PL_expect == XOPERATOR) {
6247              char tmp = *s++;
6248              if(tmp == '?') {
6249                 if (!PL_lex_allbrackets &&
6250                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6251                     s--;
6252                     TOKEN(0);
6253                 }
6254                 PL_lex_allbrackets++;
6255                 OPERATOR('?');
6256              }
6257              else {
6258                  tmp = *s++;
6259                  if(tmp == '/') {
6260                      /* A // operator. */
6261                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6262                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6263                                             LEX_FAKEEOF_LOGIC)) {
6264                         s -= 2;
6265                         TOKEN(0);
6266                     }
6267                     AOPERATOR(DORDOR);
6268                  }
6269                  else {
6270                      s--;
6271                      if (*s == '=' && !PL_lex_allbrackets &&
6272                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6273                          s--;
6274                          TOKEN(0);
6275                      }
6276                      Mop(OP_DIVIDE);
6277                  }
6278              }
6279          }
6280          else {
6281              /* Disable warning on "study /blah/" */
6282              if (PL_oldoldbufptr == PL_last_uni
6283               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6284                   || memNE(PL_last_uni, "study", 5)
6285                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6286               ))
6287                  check_uni();
6288              if (*s == '?')
6289                  deprecate("?PATTERN? without explicit operator");
6290              s = scan_pat(s,OP_MATCH);
6291              TERM(sublex_start());
6292          }
6293
6294     case '.':
6295         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6296 #ifdef PERL_STRICT_CR
6297             && s[1] == '\n'
6298 #else
6299             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6300 #endif
6301             && (s == PL_linestart || s[-1] == '\n') )
6302         {
6303             PL_lex_formbrack = 0;
6304             PL_expect = XSTATE;
6305             goto rightbracket;
6306         }
6307         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6308             s += 3;
6309             OPERATOR(YADAYADA);
6310         }
6311         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6312             char tmp = *s++;
6313             if (*s == tmp) {
6314                 if (!PL_lex_allbrackets &&
6315                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6316                     s--;
6317                     TOKEN(0);
6318                 }
6319                 s++;
6320                 if (*s == tmp) {
6321                     s++;
6322                     pl_yylval.ival = OPf_SPECIAL;
6323                 }
6324                 else
6325                     pl_yylval.ival = 0;
6326                 OPERATOR(DOTDOT);
6327             }
6328             if (*s == '=' && !PL_lex_allbrackets &&
6329                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6330                 s--;
6331                 TOKEN(0);
6332             }
6333             Aop(OP_CONCAT);
6334         }
6335         /* FALL THROUGH */
6336     case '0': case '1': case '2': case '3': case '4':
6337     case '5': case '6': case '7': case '8': case '9':
6338         s = scan_num(s, &pl_yylval);
6339         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6340         if (PL_expect == XOPERATOR)
6341             no_op("Number",s);
6342         TERM(THING);
6343
6344     case '\'':
6345         s = scan_str(s,!!PL_madskills,FALSE);
6346         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6347         if (PL_expect == XOPERATOR) {
6348             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6349                 return deprecate_commaless_var_list();
6350             }
6351             else
6352                 no_op("String",s);
6353         }
6354         if (!s)
6355             missingterm(NULL);
6356         pl_yylval.ival = OP_CONST;
6357         TERM(sublex_start());
6358
6359     case '"':
6360         s = scan_str(s,!!PL_madskills,FALSE);
6361         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6362         if (PL_expect == XOPERATOR) {
6363             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6364                 return deprecate_commaless_var_list();
6365             }
6366             else
6367                 no_op("String",s);
6368         }
6369         if (!s)
6370             missingterm(NULL);
6371         pl_yylval.ival = OP_CONST;
6372         /* FIXME. I think that this can be const if char *d is replaced by
6373            more localised variables.  */
6374         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6375             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6376                 pl_yylval.ival = OP_STRINGIFY;
6377                 break;
6378             }
6379         }
6380         TERM(sublex_start());
6381
6382     case '`':
6383         s = scan_str(s,!!PL_madskills,FALSE);
6384         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6385         if (PL_expect == XOPERATOR)
6386             no_op("Backticks",s);
6387         if (!s)
6388             missingterm(NULL);
6389         readpipe_override();
6390         TERM(sublex_start());
6391
6392     case '\\':
6393         s++;
6394         if (PL_lex_inwhat && isDIGIT(*s))
6395             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6396                            *s, *s);
6397         if (PL_expect == XOPERATOR)
6398             no_op("Backslash",s);
6399         OPERATOR(REFGEN);
6400
6401     case 'v':
6402         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6403             char *start = s + 2;
6404             while (isDIGIT(*start) || *start == '_')
6405                 start++;
6406             if (*start == '.' && isDIGIT(start[1])) {
6407                 s = scan_num(s, &pl_yylval);
6408                 TERM(THING);
6409             }
6410             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6411             else if (!isALPHA(*start) && (PL_expect == XTERM
6412                         || PL_expect == XREF || PL_expect == XSTATE
6413                         || PL_expect == XTERMORDORDOR)) {
6414                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6415                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6416                 if (!gv) {
6417                     s = scan_num(s, &pl_yylval);
6418                     TERM(THING);
6419                 }
6420             }
6421         }
6422         goto keylookup;
6423     case 'x':
6424         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6425             s++;
6426             Mop(OP_REPEAT);
6427         }
6428         goto keylookup;
6429
6430     case '_':
6431     case 'a': case 'A':
6432     case 'b': case 'B':
6433     case 'c': case 'C':
6434     case 'd': case 'D':
6435     case 'e': case 'E':
6436     case 'f': case 'F':
6437     case 'g': case 'G':
6438     case 'h': case 'H':
6439     case 'i': case 'I':
6440     case 'j': case 'J':
6441     case 'k': case 'K':
6442     case 'l': case 'L':
6443     case 'm': case 'M':
6444     case 'n': case 'N':
6445     case 'o': case 'O':
6446     case 'p': case 'P':
6447     case 'q': case 'Q':
6448     case 'r': case 'R':
6449     case 's': case 'S':
6450     case 't': case 'T':
6451     case 'u': case 'U':
6452               case 'V':
6453     case 'w': case 'W':
6454               case 'X':
6455     case 'y': case 'Y':
6456     case 'z': case 'Z':
6457
6458       keylookup: {
6459         bool anydelim;
6460         I32 tmp;
6461
6462         orig_keyword = 0;
6463         gv = NULL;
6464         gvp = NULL;
6465
6466         PL_bufptr = s;
6467         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6468
6469         /* Some keywords can be followed by any delimiter, including ':' */
6470         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6471
6472         /* x::* is just a word, unless x is "CORE" */
6473         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6474             goto just_a_word;
6475
6476         d = s;
6477         while (d < PL_bufend && isSPACE(*d))
6478                 d++;    /* no comments skipped here, or s### is misparsed */
6479
6480         /* Is this a word before a => operator? */
6481         if (*d == '=' && d[1] == '>') {
6482             CLINE;
6483             pl_yylval.opval
6484                 = (OP*)newSVOP(OP_CONST, 0,
6485                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6486             pl_yylval.opval->op_private = OPpCONST_BARE;
6487             TERM(WORD);
6488         }
6489
6490         /* Check for plugged-in keyword */
6491         {
6492             OP *o;
6493             int result;
6494             char *saved_bufptr = PL_bufptr;
6495             PL_bufptr = s;
6496             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6497             s = PL_bufptr;
6498             if (result == KEYWORD_PLUGIN_DECLINE) {
6499                 /* not a plugged-in keyword */
6500                 PL_bufptr = saved_bufptr;
6501             } else if (result == KEYWORD_PLUGIN_STMT) {
6502                 pl_yylval.opval = o;
6503                 CLINE;
6504                 PL_expect = XSTATE;
6505                 return REPORT(PLUGSTMT);
6506             } else if (result == KEYWORD_PLUGIN_EXPR) {
6507                 pl_yylval.opval = o;
6508                 CLINE;
6509                 PL_expect = XOPERATOR;
6510                 return REPORT(PLUGEXPR);
6511             } else {
6512                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6513                                         PL_tokenbuf);
6514             }
6515         }
6516
6517         /* Check for built-in keyword */
6518         tmp = keyword(PL_tokenbuf, len, 0);
6519
6520         /* Is this a label? */
6521         if (!anydelim && PL_expect == XSTATE
6522               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6523             s = d + 1;
6524             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6525             CLINE;
6526             TOKEN(LABEL);
6527         }
6528
6529         if (tmp < 0) {                  /* second-class keyword? */
6530             GV *ogv = NULL;     /* override (winner) */
6531             GV *hgv = NULL;     /* hidden (loser) */
6532             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6533                 CV *cv;
6534                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6535                                             UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6536                     (cv = GvCVu(gv)))
6537                 {
6538                     if (GvIMPORTED_CV(gv))
6539                         ogv = gv;
6540                     else if (! CvMETHOD(cv))
6541                         hgv = gv;
6542                 }
6543                 if (!ogv &&
6544                     (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6545                                             UTF ? -(I32)len : (I32)len, FALSE)) &&
6546                     (gv = *gvp) && isGV_with_GP(gv) &&
6547                     GvCVu(gv) && GvIMPORTED_CV(gv))
6548                 {
6549                     ogv = gv;
6550                 }
6551             }
6552             if (ogv) {
6553                 orig_keyword = tmp;
6554                 tmp = 0;                /* overridden by import or by GLOBAL */
6555             }
6556             else if (gv && !gvp
6557                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6558                      && GvCVu(gv))
6559             {
6560                 tmp = 0;                /* any sub overrides "weak" keyword */
6561             }
6562             else {                      /* no override */
6563                 tmp = -tmp;
6564                 if (tmp == KEY_dump) {
6565                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6566                                    "dump() better written as CORE::dump()");
6567                 }
6568                 gv = NULL;
6569                 gvp = 0;
6570                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6571                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6572                                    "Ambiguous call resolved as CORE::%s(), "
6573                                    "qualify as such or use &",
6574                                    GvENAME(hgv));
6575             }
6576         }
6577
6578       reserved_word:
6579         switch (tmp) {
6580
6581         default:                        /* not a keyword */
6582             /* Trade off - by using this evil construction we can pull the
6583                variable gv into the block labelled keylookup. If not, then
6584                we have to give it function scope so that the goto from the
6585                earlier ':' case doesn't bypass the initialisation.  */
6586             if (0) {
6587             just_a_word_zero_gv:
6588                 gv = NULL;
6589                 gvp = NULL;
6590                 orig_keyword = 0;
6591             }
6592           just_a_word: {
6593                 SV *sv;
6594                 int pkgname = 0;
6595                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6596                 OP *rv2cv_op;
6597                 CV *cv;
6598 #ifdef PERL_MAD
6599                 SV *nextPL_nextwhite = 0;
6600 #endif
6601
6602
6603                 /* Get the rest if it looks like a package qualifier */
6604
6605                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6606                     STRLEN morelen;
6607                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6608                                   TRUE, &morelen);
6609                     if (!morelen)
6610                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6611                                 *s == '\'' ? "'" : "::");
6612                     len += morelen;
6613                     pkgname = 1;
6614                 }
6615
6616                 if (PL_expect == XOPERATOR) {
6617                     if (PL_bufptr == PL_linestart) {
6618                         CopLINE_dec(PL_curcop);
6619                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6620                         CopLINE_inc(PL_curcop);
6621                     }
6622                     else
6623                         no_op("Bareword",s);
6624                 }
6625
6626                 /* Look for a subroutine with this name in current package,
6627                    unless name is "Foo::", in which case Foo is a bareword
6628                    (and a package name). */
6629
6630                 if (len > 2 && !PL_madskills &&
6631                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6632                 {
6633                     if (ckWARN(WARN_BAREWORD)
6634                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6635                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6636                             "Bareword \"%s\" refers to nonexistent package",
6637                              PL_tokenbuf);
6638                     len -= 2;
6639                     PL_tokenbuf[len] = '\0';
6640                     gv = NULL;
6641                     gvp = 0;
6642                 }
6643                 else {
6644                     if (!gv) {
6645                         /* Mustn't actually add anything to a symbol table.
6646                            But also don't want to "initialise" any placeholder
6647                            constants that might already be there into full
6648                            blown PVGVs with attached PVCV.  */
6649                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6650                                                GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6651                                                SVt_PVCV);
6652                     }
6653                     len = 0;
6654                 }
6655
6656                 /* if we saw a global override before, get the right name */
6657
6658                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6659                     len ? len : strlen(PL_tokenbuf));
6660                 if (gvp) {
6661                     SV * const tmp_sv = sv;
6662                     sv = newSVpvs("CORE::GLOBAL::");
6663                     sv_catsv(sv, tmp_sv);
6664                     SvREFCNT_dec(tmp_sv);
6665                 }
6666
6667 #ifdef PERL_MAD
6668                 if (PL_madskills && !PL_thistoken) {
6669                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6670                     PL_thistoken = newSVpvn(start,s - start);
6671                     PL_realtokenstart = s - SvPVX(PL_linestr);
6672                 }
6673 #endif
6674
6675                 /* Presume this is going to be a bareword of some sort. */
6676                 CLINE;
6677                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6678                 pl_yylval.opval->op_private = OPpCONST_BARE;
6679
6680                 /* And if "Foo::", then that's what it certainly is. */
6681                 if (len)
6682                     goto safe_bareword;
6683
6684                 {
6685                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6686                     const_op->op_private = OPpCONST_BARE;
6687                     rv2cv_op = newCVREF(0, const_op);
6688                 }
6689                 cv = rv2cv_op_cv(rv2cv_op, 0);
6690
6691                 /* See if it's the indirect object for a list operator. */
6692
6693                 if (PL_oldoldbufptr &&
6694                     PL_oldoldbufptr < PL_bufptr &&
6695                     (PL_oldoldbufptr == PL_last_lop
6696                      || PL_oldoldbufptr == PL_last_uni) &&
6697                     /* NO SKIPSPACE BEFORE HERE! */
6698                     (PL_expect == XREF ||
6699                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6700                 {
6701                     bool immediate_paren = *s == '(';
6702
6703                     /* (Now we can afford to cross potential line boundary.) */
6704                     s = SKIPSPACE2(s,nextPL_nextwhite);
6705 #ifdef PERL_MAD
6706                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6707 #endif
6708
6709                     /* Two barewords in a row may indicate method call. */
6710
6711                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6712                         (tmp = intuit_method(s, gv, cv))) {
6713                         op_free(rv2cv_op);
6714                         if (tmp == METHOD && !PL_lex_allbrackets &&
6715                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6716                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6717                         return REPORT(tmp);
6718                     }
6719
6720                     /* If not a declared subroutine, it's an indirect object. */
6721                     /* (But it's an indir obj regardless for sort.) */
6722                     /* Also, if "_" follows a filetest operator, it's a bareword */
6723
6724                     if (
6725                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6726                          (!cv &&
6727                         (PL_last_lop_op != OP_MAPSTART &&
6728                          PL_last_lop_op != OP_GREPSTART))))
6729                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6730                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6731                        )
6732                     {
6733                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6734                         goto bareword;
6735                     }
6736                 }
6737
6738                 PL_expect = XOPERATOR;
6739 #ifdef PERL_MAD
6740                 if (isSPACE(*s))
6741                     s = SKIPSPACE2(s,nextPL_nextwhite);
6742                 PL_nextwhite = nextPL_nextwhite;
6743 #else
6744                 s = skipspace(s);
6745 #endif
6746
6747                 /* Is this a word before a => operator? */
6748                 if (*s == '=' && s[1] == '>' && !pkgname) {
6749                     op_free(rv2cv_op);
6750                     CLINE;
6751                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6752                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6753                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6754                     TERM(WORD);
6755                 }
6756
6757                 /* If followed by a paren, it's certainly a subroutine. */
6758                 if (*s == '(') {
6759                     CLINE;
6760                     if (cv) {
6761                         d = s + 1;
6762                         while (SPACE_OR_TAB(*d))
6763                             d++;
6764                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6765                             s = d + 1;
6766                             goto its_constant;
6767                         }
6768                     }
6769 #ifdef PERL_MAD
6770                     if (PL_madskills) {
6771                         PL_nextwhite = PL_thiswhite;
6772                         PL_thiswhite = 0;
6773                     }
6774                     start_force(PL_curforce);
6775 #endif
6776                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6777                     PL_expect = XOPERATOR;
6778 #ifdef PERL_MAD
6779                     if (PL_madskills) {
6780                         PL_nextwhite = nextPL_nextwhite;
6781                         curmad('X', PL_thistoken);
6782                         PL_thistoken = newSVpvs("");
6783                     }
6784 #endif
6785                     op_free(rv2cv_op);
6786                     force_next(WORD);
6787                     pl_yylval.ival = 0;
6788                     TOKEN('&');
6789                 }
6790
6791                 /* If followed by var or block, call it a method (unless sub) */
6792
6793                 if ((*s == '$' || *s == '{') && !cv) {
6794                     op_free(rv2cv_op);
6795                     PL_last_lop = PL_oldbufptr;
6796                     PL_last_lop_op = OP_METHOD;
6797                     if (!PL_lex_allbrackets &&
6798                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6799                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6800                     PREBLOCK(METHOD);
6801                 }
6802
6803                 /* If followed by a bareword, see if it looks like indir obj. */
6804
6805                 if (!orig_keyword
6806                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6807                         && (tmp = intuit_method(s, gv, cv))) {
6808                     op_free(rv2cv_op);
6809                     if (tmp == METHOD && !PL_lex_allbrackets &&
6810                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6811                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6812                     return REPORT(tmp);
6813                 }
6814
6815                 /* Not a method, so call it a subroutine (if defined) */
6816
6817                 if (cv) {
6818                     if (lastchar == '-')
6819                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6820                                          "Ambiguous use of -%s resolved as -&%s()",
6821                                          PL_tokenbuf, PL_tokenbuf);
6822                     /* Check for a constant sub */
6823                     if ((sv = cv_const_sv(cv))) {
6824                   its_constant:
6825                         op_free(rv2cv_op);
6826                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6827                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6828                         pl_yylval.opval->op_private = 0;
6829                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
6830                         TOKEN(WORD);
6831                     }
6832
6833                     op_free(pl_yylval.opval);
6834                     pl_yylval.opval = rv2cv_op;
6835                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6836                     PL_last_lop = PL_oldbufptr;
6837                     PL_last_lop_op = OP_ENTERSUB;
6838                     /* Is there a prototype? */
6839                     if (
6840 #ifdef PERL_MAD
6841                         cv &&
6842 #endif
6843                         SvPOK(cv))
6844                     {
6845                         STRLEN protolen = CvPROTOLEN(cv);
6846                         const char *proto = CvPROTO(cv);
6847                         if (!protolen)
6848                             TERM(FUNC0SUB);
6849                         while (*proto == ';')
6850                             proto++;
6851                         if (
6852                             (
6853                                 (
6854                                     *proto == '$' || *proto == '_'
6855                                  || *proto == '*' || *proto == '+'
6856                                 )
6857                              && proto[1] == '\0'
6858                             )
6859                          || (
6860                              *proto == '\\' && proto[1] && proto[2] == '\0'
6861                             )
6862                         )
6863                             OPERATOR(UNIOPSUB);
6864                         if (*proto == '\\' && proto[1] == '[') {
6865                             const char *p = proto + 2;
6866                             while(*p && *p != ']')
6867                                 ++p;
6868                             if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6869                         }
6870                         if (*proto == '&' && *s == '{') {
6871                             if (PL_curstash)
6872                                 sv_setpvs(PL_subname, "__ANON__");
6873                             else
6874                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6875                             if (!PL_lex_allbrackets &&
6876                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6877                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6878                             PREBLOCK(LSTOPSUB);
6879                         }
6880                     }
6881 #ifdef PERL_MAD
6882                     {
6883                         if (PL_madskills) {
6884                             PL_nextwhite = PL_thiswhite;
6885                             PL_thiswhite = 0;
6886                         }
6887                         start_force(PL_curforce);
6888                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6889                         PL_expect = XTERM;
6890                         if (PL_madskills) {
6891                             PL_nextwhite = nextPL_nextwhite;
6892                             curmad('X', PL_thistoken);
6893                             PL_thistoken = newSVpvs("");
6894                         }
6895                         force_next(WORD);
6896                         if (!PL_lex_allbrackets &&
6897                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6898                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6899                         TOKEN(NOAMP);
6900                     }
6901                 }
6902
6903                 /* Guess harder when madskills require "best effort". */
6904                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6905                     int probable_sub = 0;
6906                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6907                         probable_sub = 1;
6908                     else if (isALPHA(*s)) {
6909                         char tmpbuf[1024];
6910                         STRLEN tmplen;
6911                         d = s;
6912                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6913                         if (!keyword(tmpbuf, tmplen, 0))
6914                             probable_sub = 1;
6915                         else {
6916                             while (d < PL_bufend && isSPACE(*d))
6917                                 d++;
6918                             if (*d == '=' && d[1] == '>')
6919                                 probable_sub = 1;
6920                         }
6921                     }
6922                     if (probable_sub) {
6923                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
6924                                         SVt_PVCV);
6925                         op_free(pl_yylval.opval);
6926                         pl_yylval.opval = rv2cv_op;
6927                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6928                         PL_last_lop = PL_oldbufptr;
6929                         PL_last_lop_op = OP_ENTERSUB;
6930                         PL_nextwhite = PL_thiswhite;
6931                         PL_thiswhite = 0;
6932                         start_force(PL_curforce);
6933                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6934                         PL_expect = XTERM;
6935                         PL_nextwhite = nextPL_nextwhite;
6936                         curmad('X', PL_thistoken);
6937                         PL_thistoken = newSVpvs("");
6938                         force_next(WORD);
6939                         if (!PL_lex_allbrackets &&
6940                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6941                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6942                         TOKEN(NOAMP);
6943                     }
6944 #else
6945                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6946                     PL_expect = XTERM;
6947                     force_next(WORD);
6948                     if (!PL_lex_allbrackets &&
6949                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6950                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6951                     TOKEN(NOAMP);
6952 #endif
6953                 }
6954
6955                 /* Call it a bare word */
6956
6957                 if (PL_hints & HINT_STRICT_SUBS)
6958                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6959                 else {
6960                 bareword:
6961                     /* after "print" and similar functions (corresponding to
6962                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6963                      * a filehandle should be subject to "strict subs".
6964                      * Likewise for the optional indirect-object argument to system
6965                      * or exec, which can't be a bareword */
6966                     if ((PL_last_lop_op == OP_PRINT
6967                             || PL_last_lop_op == OP_PRTF
6968                             || PL_last_lop_op == OP_SAY
6969                             || PL_last_lop_op == OP_SYSTEM
6970                             || PL_last_lop_op == OP_EXEC)
6971                             && (PL_hints & HINT_STRICT_SUBS))
6972                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6973                     if (lastchar != '-') {
6974                         if (ckWARN(WARN_RESERVED)) {
6975                             d = PL_tokenbuf;
6976                             while (isLOWER(*d))
6977                                 d++;
6978                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
6979                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6980                                        PL_tokenbuf);
6981                         }
6982                     }
6983                 }
6984                 op_free(rv2cv_op);
6985
6986             safe_bareword:
6987                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6988                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6989                                      "Operator or semicolon missing before %c%s",
6990                                      lastchar, PL_tokenbuf);
6991                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6992                                      "Ambiguous use of %c resolved as operator %c",
6993                                      lastchar, lastchar);
6994                 }
6995                 TOKEN(WORD);
6996             }
6997
6998         case KEY___FILE__:
6999             FUN0OP(
7000                 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7001             );
7002
7003         case KEY___LINE__:
7004             FUN0OP(
7005                 (OP*)newSVOP(OP_CONST, 0,
7006                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7007             );
7008
7009         case KEY___PACKAGE__:
7010             FUN0OP(
7011                 (OP*)newSVOP(OP_CONST, 0,
7012                                         (PL_curstash
7013                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7014                                          : &PL_sv_undef))
7015             );
7016
7017         case KEY___DATA__:
7018         case KEY___END__: {
7019             GV *gv;
7020             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7021                 const char *pname = "main";
7022                 STRLEN plen = 4;
7023                 U32 putf8 = 0;
7024                 if (PL_tokenbuf[2] == 'D')
7025                 {
7026                     HV * const stash =
7027                         PL_curstash ? PL_curstash : PL_defstash;
7028                     pname = HvNAME_get(stash);
7029                     plen  = HvNAMELEN (stash);
7030                     if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7031                 }
7032                 gv = gv_fetchpvn_flags(
7033                         Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7034                         plen+6, GV_ADD|putf8, SVt_PVIO
7035                 );
7036                 GvMULTI_on(gv);
7037                 if (!GvIO(gv))
7038                     GvIOp(gv) = newIO();
7039                 IoIFP(GvIOp(gv)) = PL_rsfp;
7040 #if defined(HAS_FCNTL) && defined(F_SETFD)
7041                 {
7042                     const int fd = PerlIO_fileno(PL_rsfp);
7043                     fcntl(fd,F_SETFD,fd >= 3);
7044                 }
7045 #endif
7046                 /* Mark this internal pseudo-handle as clean */
7047                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7048                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7049                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7050                 else
7051                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7052 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7053                 /* if the script was opened in binmode, we need to revert
7054                  * it to text mode for compatibility; but only iff it has CRs
7055                  * XXX this is a questionable hack at best. */
7056                 if (PL_bufend-PL_bufptr > 2
7057                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7058                 {
7059                     Off_t loc = 0;
7060                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7061                         loc = PerlIO_tell(PL_rsfp);
7062                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7063                     }
7064 #ifdef NETWARE
7065                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7066 #else
7067                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7068 #endif  /* NETWARE */
7069                         if (loc > 0)
7070                             PerlIO_seek(PL_rsfp, loc, 0);
7071                     }
7072                 }
7073 #endif
7074 #ifdef PERLIO_LAYERS
7075                 if (!IN_BYTES) {
7076                     if (UTF)
7077                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7078                     else if (PL_encoding) {
7079                         SV *name;
7080                         dSP;
7081                         ENTER;
7082                         SAVETMPS;
7083                         PUSHMARK(sp);
7084                         EXTEND(SP, 1);
7085                         XPUSHs(PL_encoding);
7086                         PUTBACK;
7087                         call_method("name", G_SCALAR);
7088                         SPAGAIN;
7089                         name = POPs;
7090                         PUTBACK;
7091                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7092                                             Perl_form(aTHX_ ":encoding(%"SVf")",
7093                                                       SVfARG(name)));
7094                         FREETMPS;
7095                         LEAVE;
7096                     }
7097                 }
7098 #endif
7099 #ifdef PERL_MAD
7100                 if (PL_madskills) {
7101                     if (PL_realtokenstart >= 0) {
7102                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7103                         if (!PL_endwhite)
7104                             PL_endwhite = newSVpvs("");
7105                         sv_catsv(PL_endwhite, PL_thiswhite);
7106                         PL_thiswhite = 0;
7107                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7108                         PL_realtokenstart = -1;
7109                     }
7110                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7111                            != NULL) ;
7112                 }
7113 #endif
7114                 PL_rsfp = NULL;
7115             }
7116             goto fake_eof;
7117         }
7118
7119         case KEY_AUTOLOAD:
7120         case KEY_DESTROY:
7121         case KEY_BEGIN:
7122         case KEY_UNITCHECK:
7123         case KEY_CHECK:
7124         case KEY_INIT:
7125         case KEY_END:
7126             if (PL_expect == XSTATE) {
7127                 s = PL_bufptr;
7128                 goto really_sub;
7129             }
7130             goto just_a_word;
7131
7132         case KEY_CORE:
7133             if (*s == ':' && s[1] == ':') {
7134                 s += 2;
7135                 d = s;
7136                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7137                 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
7138                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7139                 if (tmp < 0)
7140                     tmp = -tmp;
7141                 else if (tmp == KEY_require || tmp == KEY_do
7142                       || tmp == KEY_glob)
7143                     /* that's a way to remember we saw "CORE::" */
7144                     orig_keyword = tmp;
7145                 goto reserved_word;
7146             }
7147             goto just_a_word;
7148
7149         case KEY_abs:
7150             UNI(OP_ABS);
7151
7152         case KEY_alarm:
7153             UNI(OP_ALARM);
7154
7155         case KEY_accept:
7156             LOP(OP_ACCEPT,XTERM);
7157
7158         case KEY_and:
7159             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7160                 return REPORT(0);
7161             OPERATOR(ANDOP);
7162
7163         case KEY_atan2:
7164             LOP(OP_ATAN2,XTERM);
7165
7166         case KEY_bind:
7167             LOP(OP_BIND,XTERM);
7168
7169         case KEY_binmode:
7170             LOP(OP_BINMODE,XTERM);
7171
7172         case KEY_bless:
7173             LOP(OP_BLESS,XTERM);
7174
7175         case KEY_break:
7176             FUN0(OP_BREAK);
7177
7178         case KEY_chop:
7179             UNI(OP_CHOP);
7180
7181         case KEY_continue:
7182                     /* We have to disambiguate the two senses of
7183                       "continue". If the next token is a '{' then
7184                       treat it as the start of a continue block;
7185                       otherwise treat it as a control operator.
7186                      */
7187                     s = skipspace(s);
7188                     if (*s == '{')
7189             PREBLOCK(CONTINUE);
7190                     else
7191                         FUN0(OP_CONTINUE);
7192
7193         case KEY_chdir:
7194             /* may use HOME */
7195             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7196             UNI(OP_CHDIR);
7197
7198         case KEY_close:
7199             UNI(OP_CLOSE);
7200
7201         case KEY_closedir:
7202             UNI(OP_CLOSEDIR);
7203
7204         case KEY_cmp:
7205             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7206                 return REPORT(0);
7207             Eop(OP_SCMP);
7208
7209         case KEY_caller:
7210             UNI(OP_CALLER);
7211
7212         case KEY_crypt:
7213 #ifdef FCRYPT
7214             if (!PL_cryptseen) {
7215                 PL_cryptseen = TRUE;
7216                 init_des();
7217             }
7218 #endif
7219             LOP(OP_CRYPT,XTERM);
7220
7221         case KEY_chmod:
7222             LOP(OP_CHMOD,XTERM);
7223
7224         case KEY_chown:
7225             LOP(OP_CHOWN,XTERM);
7226
7227         case KEY_connect:
7228             LOP(OP_CONNECT,XTERM);
7229
7230         case KEY_chr:
7231             UNI(OP_CHR);
7232
7233         case KEY_cos:
7234             UNI(OP_COS);
7235
7236         case KEY_chroot:
7237             UNI(OP_CHROOT);
7238
7239         case KEY_default:
7240             PREBLOCK(DEFAULT);
7241
7242         case KEY_do:
7243             s = SKIPSPACE1(s);
7244             if (*s == '{')
7245                 PRETERMBLOCK(DO);
7246             if (*s != '\'')
7247                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7248             if (orig_keyword == KEY_do) {
7249                 orig_keyword = 0;
7250                 pl_yylval.ival = 1;
7251             }
7252             else
7253                 pl_yylval.ival = 0;
7254             OPERATOR(DO);
7255
7256         case KEY_die:
7257             PL_hints |= HINT_BLOCK_SCOPE;
7258             LOP(OP_DIE,XTERM);
7259
7260         case KEY_defined:
7261             UNI(OP_DEFINED);
7262
7263         case KEY_delete:
7264             UNI(OP_DELETE);
7265
7266         case KEY_dbmopen:
7267             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7268                               STR_WITH_LEN("NDBM_File::"),
7269                               STR_WITH_LEN("DB_File::"),
7270                               STR_WITH_LEN("GDBM_File::"),
7271                               STR_WITH_LEN("SDBM_File::"),
7272                               STR_WITH_LEN("ODBM_File::"),
7273                               NULL);
7274             LOP(OP_DBMOPEN,XTERM);
7275
7276         case KEY_dbmclose:
7277             UNI(OP_DBMCLOSE);
7278
7279         case KEY_dump:
7280             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7281             LOOPX(OP_DUMP);
7282
7283         case KEY_else:
7284             PREBLOCK(ELSE);
7285
7286         case KEY_elsif:
7287             pl_yylval.ival = CopLINE(PL_curcop);
7288             OPERATOR(ELSIF);
7289
7290         case KEY_eq:
7291             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7292                 return REPORT(0);
7293             Eop(OP_SEQ);
7294
7295         case KEY_exists:
7296             UNI(OP_EXISTS);
7297
7298         case KEY_exit:
7299             if (PL_madskills)
7300                 UNI(OP_INT);
7301             UNI(OP_EXIT);
7302
7303         case KEY_eval:
7304             s = SKIPSPACE1(s);
7305             if (*s == '{') { /* block eval */
7306                 PL_expect = XTERMBLOCK;
7307                 UNIBRACK(OP_ENTERTRY);
7308             }
7309             else { /* string eval */
7310                 PL_expect = XTERM;
7311                 UNIBRACK(OP_ENTEREVAL);
7312             }
7313
7314         case KEY_evalbytes:
7315             PL_expect = XTERM;
7316             UNIBRACK(-OP_ENTEREVAL);
7317
7318         case KEY_eof:
7319             UNI(OP_EOF);
7320
7321         case KEY_exp:
7322             UNI(OP_EXP);
7323
7324         case KEY_each:
7325             UNI(OP_EACH);
7326
7327         case KEY_exec:
7328             LOP(OP_EXEC,XREF);
7329
7330         case KEY_endhostent:
7331             FUN0(OP_EHOSTENT);
7332
7333         case KEY_endnetent:
7334             FUN0(OP_ENETENT);
7335
7336         case KEY_endservent:
7337             FUN0(OP_ESERVENT);
7338
7339         case KEY_endprotoent:
7340             FUN0(OP_EPROTOENT);
7341
7342         case KEY_endpwent:
7343             FUN0(OP_EPWENT);
7344
7345         case KEY_endgrent:
7346             FUN0(OP_EGRENT);
7347
7348         case KEY_for:
7349         case KEY_foreach:
7350             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7351                 return REPORT(0);
7352             pl_yylval.ival = CopLINE(PL_curcop);
7353             s = SKIPSPACE1(s);
7354             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7355                 char *p = s;
7356 #ifdef PERL_MAD
7357                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7358 #endif
7359
7360                 if ((PL_bufend - p) >= 3 &&
7361                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7362                     p += 2;
7363                 else if ((PL_bufend - p) >= 4 &&
7364                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7365                     p += 3;
7366                 p = PEEKSPACE(p);
7367                 if (isIDFIRST_lazy_if(p,UTF)) {
7368                     p = scan_ident(p, PL_bufend,
7369                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7370                     p = PEEKSPACE(p);
7371                 }
7372                 if (*p != '$')
7373                     Perl_croak(aTHX_ "Missing $ on loop variable");
7374 #ifdef PERL_MAD
7375                 s = SvPVX(PL_linestr) + soff;
7376 #endif
7377             }
7378             OPERATOR(FOR);
7379
7380         case KEY_formline:
7381             LOP(OP_FORMLINE,XTERM);
7382
7383         case KEY_fork:
7384             FUN0(OP_FORK);
7385
7386         case KEY_fcntl:
7387             LOP(OP_FCNTL,XTERM);
7388
7389         case KEY_fileno:
7390             UNI(OP_FILENO);
7391
7392         case KEY_flock:
7393             LOP(OP_FLOCK,XTERM);
7394
7395         case KEY_gt:
7396             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7397                 return REPORT(0);
7398             Rop(OP_SGT);
7399
7400         case KEY_ge:
7401             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7402                 return REPORT(0);
7403             Rop(OP_SGE);
7404
7405         case KEY_grep:
7406             LOP(OP_GREPSTART, XREF);
7407
7408         case KEY_goto:
7409             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7410             LOOPX(OP_GOTO);
7411
7412         case KEY_gmtime:
7413             UNI(OP_GMTIME);
7414
7415         case KEY_getc:
7416             UNIDOR(OP_GETC);
7417
7418         case KEY_getppid:
7419             FUN0(OP_GETPPID);
7420
7421         case KEY_getpgrp:
7422             UNI(OP_GETPGRP);
7423
7424         case KEY_getpriority:
7425             LOP(OP_GETPRIORITY,XTERM);
7426
7427         case KEY_getprotobyname:
7428             UNI(OP_GPBYNAME);
7429
7430         case KEY_getprotobynumber:
7431             LOP(OP_GPBYNUMBER,XTERM);
7432
7433         case KEY_getprotoent:
7434             FUN0(OP_GPROTOENT);
7435
7436         case KEY_getpwent:
7437             FUN0(OP_GPWENT);
7438
7439         case KEY_getpwnam:
7440             UNI(OP_GPWNAM);
7441
7442         case KEY_getpwuid:
7443             UNI(OP_GPWUID);
7444
7445         case KEY_getpeername:
7446             UNI(OP_GETPEERNAME);
7447
7448         case KEY_gethostbyname:
7449             UNI(OP_GHBYNAME);
7450
7451         case KEY_gethostbyaddr:
7452             LOP(OP_GHBYADDR,XTERM);
7453
7454         case KEY_gethostent:
7455             FUN0(OP_GHOSTENT);
7456
7457         case KEY_getnetbyname:
7458             UNI(OP_GNBYNAME);
7459
7460         case KEY_getnetbyaddr:
7461             LOP(OP_GNBYADDR,XTERM);
7462
7463         case KEY_getnetent:
7464             FUN0(OP_GNETENT);
7465
7466         case KEY_getservbyname:
7467             LOP(OP_GSBYNAME,XTERM);
7468
7469         case KEY_getservbyport:
7470             LOP(OP_GSBYPORT,XTERM);
7471
7472         case KEY_getservent:
7473             FUN0(OP_GSERVENT);
7474
7475         case KEY_getsockname:
7476             UNI(OP_GETSOCKNAME);
7477
7478         case KEY_getsockopt:
7479             LOP(OP_GSOCKOPT,XTERM);
7480
7481         case KEY_getgrent:
7482             FUN0(OP_GGRENT);
7483
7484         case KEY_getgrnam:
7485             UNI(OP_GGRNAM);
7486
7487         case KEY_getgrgid:
7488             UNI(OP_GGRGID);
7489
7490         case KEY_getlogin:
7491             FUN0(OP_GETLOGIN);
7492
7493         case KEY_given:
7494             pl_yylval.ival = CopLINE(PL_curcop);
7495             OPERATOR(GIVEN);
7496
7497         case KEY_glob:
7498             LOP(
7499              orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7500              XTERM
7501             );
7502
7503         case KEY_hex:
7504             UNI(OP_HEX);
7505
7506         case KEY_if:
7507             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7508                 return REPORT(0);
7509             pl_yylval.ival = CopLINE(PL_curcop);
7510             OPERATOR(IF);
7511
7512         case KEY_index:
7513             LOP(OP_INDEX,XTERM);
7514
7515         case KEY_int:
7516             UNI(OP_INT);
7517
7518         case KEY_ioctl:
7519             LOP(OP_IOCTL,XTERM);
7520
7521         case KEY_join:
7522             LOP(OP_JOIN,XTERM);
7523
7524         case KEY_keys:
7525             UNI(OP_KEYS);
7526
7527         case KEY_kill:
7528             LOP(OP_KILL,XTERM);
7529
7530         case KEY_last:
7531             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7532             LOOPX(OP_LAST);
7533
7534         case KEY_lc:
7535             UNI(OP_LC);
7536
7537         case KEY_lcfirst:
7538             UNI(OP_LCFIRST);
7539
7540         case KEY_local:
7541             pl_yylval.ival = 0;
7542             OPERATOR(LOCAL);
7543
7544         case KEY_length:
7545             UNI(OP_LENGTH);
7546
7547         case KEY_lt:
7548             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7549                 return REPORT(0);
7550             Rop(OP_SLT);
7551
7552         case KEY_le:
7553             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7554                 return REPORT(0);
7555             Rop(OP_SLE);
7556
7557         case KEY_localtime:
7558             UNI(OP_LOCALTIME);
7559
7560         case KEY_log:
7561             UNI(OP_LOG);
7562
7563         case KEY_link:
7564             LOP(OP_LINK,XTERM);
7565
7566         case KEY_listen:
7567             LOP(OP_LISTEN,XTERM);
7568
7569         case KEY_lock:
7570             UNI(OP_LOCK);
7571
7572         case KEY_lstat:
7573             UNI(OP_LSTAT);
7574
7575         case KEY_m:
7576             s = scan_pat(s,OP_MATCH);
7577             TERM(sublex_start());
7578
7579         case KEY_map:
7580             LOP(OP_MAPSTART, XREF);
7581
7582         case KEY_mkdir:
7583             LOP(OP_MKDIR,XTERM);
7584
7585         case KEY_msgctl:
7586             LOP(OP_MSGCTL,XTERM);
7587
7588         case KEY_msgget:
7589             LOP(OP_MSGGET,XTERM);
7590
7591         case KEY_msgrcv:
7592             LOP(OP_MSGRCV,XTERM);
7593
7594         case KEY_msgsnd:
7595             LOP(OP_MSGSND,XTERM);
7596
7597         case KEY_our:
7598         case KEY_my:
7599         case KEY_state:
7600             PL_in_my = (U16)tmp;
7601             s = SKIPSPACE1(s);
7602             if (isIDFIRST_lazy_if(s,UTF)) {
7603 #ifdef PERL_MAD
7604                 char* start = s;
7605 #endif
7606                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7607                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7608                     goto really_sub;
7609                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7610                 if (!PL_in_my_stash) {
7611                     char tmpbuf[1024];
7612                     PL_bufptr = s;
7613                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7614                     yyerror(tmpbuf);
7615                 }
7616 #ifdef PERL_MAD
7617                 if (PL_madskills) {     /* just add type to declarator token */
7618                     sv_catsv(PL_thistoken, PL_nextwhite);
7619                     PL_nextwhite = 0;
7620                     sv_catpvn(PL_thistoken, start, s - start);
7621                 }
7622 #endif
7623             }
7624             pl_yylval.ival = 1;
7625             OPERATOR(MY);
7626
7627         case KEY_next:
7628             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7629             LOOPX(OP_NEXT);
7630
7631         case KEY_ne:
7632             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7633                 return REPORT(0);
7634             Eop(OP_SNE);
7635
7636         case KEY_no:
7637             s = tokenize_use(0, s);
7638             OPERATOR(USE);
7639
7640         case KEY_not:
7641             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7642                 FUN1(OP_NOT);
7643             else {
7644                 if (!PL_lex_allbrackets &&
7645                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7646                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7647                 OPERATOR(NOTOP);
7648             }
7649
7650         case KEY_open:
7651             s = SKIPSPACE1(s);
7652             if (isIDFIRST_lazy_if(s,UTF)) {
7653                 const char *t;
7654                 for (d = s; isALNUM_lazy_if(d,UTF);)
7655                     d++;
7656                 for (t=d; isSPACE(*t);)
7657                     t++;
7658                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7659                     /* [perl #16184] */
7660                     && !(t[0] == '=' && t[1] == '>')
7661                 ) {
7662                     int parms_len = (int)(d-s);
7663                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7664                            "Precedence problem: open %.*s should be open(%.*s)",
7665                             parms_len, s, parms_len, s);
7666                 }
7667             }
7668             LOP(OP_OPEN,XTERM);
7669
7670         case KEY_or:
7671             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7672                 return REPORT(0);
7673             pl_yylval.ival = OP_OR;
7674             OPERATOR(OROP);
7675
7676         case KEY_ord:
7677             UNI(OP_ORD);
7678
7679         case KEY_oct:
7680             UNI(OP_OCT);
7681
7682         case KEY_opendir:
7683             LOP(OP_OPEN_DIR,XTERM);
7684
7685         case KEY_print:
7686             checkcomma(s,PL_tokenbuf,"filehandle");
7687             LOP(OP_PRINT,XREF);
7688
7689         case KEY_printf:
7690             checkcomma(s,PL_tokenbuf,"filehandle");
7691             LOP(OP_PRTF,XREF);
7692
7693         case KEY_prototype:
7694             UNI(OP_PROTOTYPE);
7695
7696         case KEY_push:
7697             LOP(OP_PUSH,XTERM);
7698
7699         case KEY_pop:
7700             UNIDOR(OP_POP);
7701
7702         case KEY_pos:
7703             UNIDOR(OP_POS);
7704
7705         case KEY_pack:
7706             LOP(OP_PACK,XTERM);
7707
7708         case KEY_package:
7709             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7710             s = SKIPSPACE1(s);
7711             s = force_strict_version(s);
7712             PL_lex_expect = XBLOCK;
7713             OPERATOR(PACKAGE);
7714
7715         case KEY_pipe:
7716             LOP(OP_PIPE_OP,XTERM);
7717
7718         case KEY_q:
7719             s = scan_str(s,!!PL_madskills,FALSE);
7720             if (!s)
7721                 missingterm(NULL);
7722             pl_yylval.ival = OP_CONST;
7723             TERM(sublex_start());
7724
7725         case KEY_quotemeta:
7726             UNI(OP_QUOTEMETA);
7727
7728         case KEY_qw: {
7729             OP *words = NULL;
7730             s = scan_str(s,!!PL_madskills,FALSE);
7731             if (!s)
7732                 missingterm(NULL);
7733             PL_expect = XOPERATOR;
7734             if (SvCUR(PL_lex_stuff)) {
7735                 int warned_comma = !ckWARN(WARN_QW);
7736                 int warned_comment = warned_comma;
7737                 d = SvPV_force(PL_lex_stuff, len);
7738                 while (len) {
7739                     for (; isSPACE(*d) && len; --len, ++d)
7740                         /**/;
7741                     if (len) {
7742                         SV *sv;
7743                         const char *b = d;
7744                         if (!warned_comma || !warned_comment) {
7745                             for (; !isSPACE(*d) && len; --len, ++d) {
7746                                 if (!warned_comma && *d == ',') {
7747                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7748                                         "Possible attempt to separate words with commas");
7749                                     ++warned_comma;
7750                                 }
7751                                 else if (!warned_comment && *d == '#') {
7752                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7753                                         "Possible attempt to put comments in qw() list");
7754                                     ++warned_comment;
7755                                 }
7756                             }
7757                         }
7758                         else {
7759                             for (; !isSPACE(*d) && len; --len, ++d)
7760                                 /**/;
7761                         }
7762                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7763                         words = op_append_elem(OP_LIST, words,
7764                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7765                     }
7766                 }
7767             }
7768             if (!words)
7769                 words = newNULLLIST();
7770             if (PL_lex_stuff) {
7771                 SvREFCNT_dec(PL_lex_stuff);
7772                 PL_lex_stuff = NULL;
7773             }
7774             PL_expect = XOPERATOR;
7775             pl_yylval.opval = sawparens(words);
7776             TOKEN(QWLIST);
7777         }
7778
7779         case KEY_qq:
7780             s = scan_str(s,!!PL_madskills,FALSE);
7781             if (!s)
7782                 missingterm(NULL);
7783             pl_yylval.ival = OP_STRINGIFY;
7784             if (SvIVX(PL_lex_stuff) == '\'')
7785                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7786             TERM(sublex_start());
7787
7788         case KEY_qr:
7789             s = scan_pat(s,OP_QR);
7790             TERM(sublex_start());
7791
7792         case KEY_qx:
7793             s = scan_str(s,!!PL_madskills,FALSE);
7794             if (!s)
7795                 missingterm(NULL);
7796             readpipe_override();
7797             TERM(sublex_start());
7798
7799         case KEY_return:
7800             OLDLOP(OP_RETURN);
7801
7802         case KEY_require:
7803             s = SKIPSPACE1(s);
7804             if (isDIGIT(*s)) {
7805                 s = force_version(s, FALSE);
7806             }
7807             else if (*s != 'v' || !isDIGIT(s[1])
7808                     || (s = force_version(s, TRUE), *s == 'v'))
7809             {
7810                 *PL_tokenbuf = '\0';
7811                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7812                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7813                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7814                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
7815                 else if (*s == '<')
7816                     yyerror("<> should be quotes");
7817             }
7818             if (orig_keyword == KEY_require) {
7819                 orig_keyword = 0;
7820                 pl_yylval.ival = 1;
7821             }
7822             else
7823                 pl_yylval.ival = 0;
7824             PL_expect = XTERM;
7825             PL_bufptr = s;
7826             PL_last_uni = PL_oldbufptr;
7827             PL_last_lop_op = OP_REQUIRE;
7828             s = skipspace(s);
7829             return REPORT( (int)REQUIRE );
7830
7831         case KEY_reset:
7832             UNI(OP_RESET);
7833
7834         case KEY_redo:
7835             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7836             LOOPX(OP_REDO);
7837
7838         case KEY_rename:
7839             LOP(OP_RENAME,XTERM);
7840
7841         case KEY_rand:
7842             UNI(OP_RAND);
7843
7844         case KEY_rmdir:
7845             UNI(OP_RMDIR);
7846
7847         case KEY_rindex:
7848             LOP(OP_RINDEX,XTERM);
7849
7850         case KEY_read:
7851             LOP(OP_READ,XTERM);
7852
7853         case KEY_readdir:
7854             UNI(OP_READDIR);
7855
7856         case KEY_readline:
7857             UNIDOR(OP_READLINE);
7858
7859         case KEY_readpipe:
7860             UNIDOR(OP_BACKTICK);
7861
7862         case KEY_rewinddir:
7863             UNI(OP_REWINDDIR);
7864
7865         case KEY_recv:
7866             LOP(OP_RECV,XTERM);
7867
7868         case KEY_reverse:
7869             LOP(OP_REVERSE,XTERM);
7870
7871         case KEY_readlink:
7872             UNIDOR(OP_READLINK);
7873
7874         case KEY_ref:
7875             UNI(OP_REF);
7876
7877         case KEY_s:
7878             s = scan_subst(s);
7879             if (pl_yylval.opval)
7880                 TERM(sublex_start());
7881             else
7882                 TOKEN(1);       /* force error */
7883
7884         case KEY_say:
7885             checkcomma(s,PL_tokenbuf,"filehandle");
7886             LOP(OP_SAY,XREF);
7887
7888         case KEY_chomp:
7889             UNI(OP_CHOMP);
7890
7891         case KEY_scalar:
7892             UNI(OP_SCALAR);
7893
7894         case KEY_select:
7895             LOP(OP_SELECT,XTERM);
7896
7897         case KEY_seek:
7898             LOP(OP_SEEK,XTERM);
7899
7900         case KEY_semctl:
7901             LOP(OP_SEMCTL,XTERM);
7902
7903         case KEY_semget:
7904             LOP(OP_SEMGET,XTERM);
7905
7906         case KEY_semop:
7907             LOP(OP_SEMOP,XTERM);
7908
7909         case KEY_send:
7910             LOP(OP_SEND,XTERM);
7911
7912         case KEY_setpgrp:
7913             LOP(OP_SETPGRP,XTERM);
7914
7915         case KEY_setpriority:
7916             LOP(OP_SETPRIORITY,XTERM);
7917
7918         case KEY_sethostent:
7919             UNI(OP_SHOSTENT);
7920
7921         case KEY_setnetent:
7922             UNI(OP_SNETENT);
7923
7924         case KEY_setservent:
7925             UNI(OP_SSERVENT);
7926
7927         case KEY_setprotoent:
7928             UNI(OP_SPROTOENT);
7929
7930         case KEY_setpwent:
7931             FUN0(OP_SPWENT);
7932
7933         case KEY_setgrent:
7934             FUN0(OP_SGRENT);
7935
7936         case KEY_seekdir:
7937             LOP(OP_SEEKDIR,XTERM);
7938
7939         case KEY_setsockopt:
7940             LOP(OP_SSOCKOPT,XTERM);
7941
7942         case KEY_shift:
7943             UNIDOR(OP_SHIFT);
7944
7945         case KEY_shmctl:
7946             LOP(OP_SHMCTL,XTERM);
7947
7948         case KEY_shmget:
7949             LOP(OP_SHMGET,XTERM);
7950
7951         case KEY_shmread:
7952             LOP(OP_SHMREAD,XTERM);
7953
7954         case KEY_shmwrite:
7955             LOP(OP_SHMWRITE,XTERM);
7956
7957         case KEY_shutdown:
7958             LOP(OP_SHUTDOWN,XTERM);
7959
7960         case KEY_sin:
7961             UNI(OP_SIN);
7962
7963         case KEY_sleep:
7964             UNI(OP_SLEEP);
7965
7966         case KEY_socket:
7967             LOP(OP_SOCKET,XTERM);
7968
7969         case KEY_socketpair:
7970             LOP(OP_SOCKPAIR,XTERM);
7971
7972         case KEY_sort:
7973             checkcomma(s,PL_tokenbuf,"subroutine name");
7974             s = SKIPSPACE1(s);
7975             if (*s == ';' || *s == ')')         /* probably a close */
7976                 Perl_croak(aTHX_ "sort is now a reserved word");
7977             PL_expect = XTERM;
7978             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7979             LOP(OP_SORT,XREF);
7980
7981         case KEY_split:
7982             LOP(OP_SPLIT,XTERM);
7983
7984         case KEY_sprintf:
7985             LOP(OP_SPRINTF,XTERM);
7986
7987         case KEY_splice:
7988             LOP(OP_SPLICE,XTERM);
7989
7990         case KEY_sqrt:
7991             UNI(OP_SQRT);
7992
7993         case KEY_srand:
7994             UNI(OP_SRAND);
7995
7996         case KEY_stat:
7997             UNI(OP_STAT);
7998
7999         case KEY_study:
8000             UNI(OP_STUDY);
8001
8002         case KEY_substr:
8003             LOP(OP_SUBSTR,XTERM);
8004
8005         case KEY_format:
8006         case KEY_sub:
8007           really_sub:
8008             {
8009                 char tmpbuf[sizeof PL_tokenbuf];
8010                 SSize_t tboffset = 0;
8011                 expectation attrful;
8012                 bool have_name, have_proto;
8013                 const int key = tmp;
8014
8015 #ifdef PERL_MAD
8016                 SV *tmpwhite = 0;
8017
8018                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8019                 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
8020                 PL_thistoken = 0;
8021
8022                 d = s;
8023                 s = SKIPSPACE2(s,tmpwhite);
8024 #else
8025                 s = skipspace(s);
8026 #endif
8027
8028                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8029                     (*s == ':' && s[1] == ':'))
8030                 {
8031 #ifdef PERL_MAD
8032                     SV *nametoke = NULL;
8033 #endif
8034
8035                     PL_expect = XBLOCK;
8036                     attrful = XATTRBLOCK;
8037                     /* remember buffer pos'n for later force_word */
8038                     tboffset = s - PL_oldbufptr;
8039                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
8040 #ifdef PERL_MAD
8041                     if (PL_madskills)
8042                         nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8043 #endif
8044                     if (memchr(tmpbuf, ':', len))
8045                         sv_setpvn(PL_subname, tmpbuf, len);
8046                     else {
8047                         sv_setsv(PL_subname,PL_curstname);
8048                         sv_catpvs(PL_subname,"::");
8049                         sv_catpvn(PL_subname,tmpbuf,len);
8050                     }
8051                     if (SvUTF8(PL_linestr))
8052                         SvUTF8_on(PL_subname);
8053                     have_name = TRUE;
8054
8055 #ifdef PERL_MAD
8056
8057                     start_force(0);
8058                     CURMAD('X', nametoke);
8059                     CURMAD('_', tmpwhite);
8060                     (void) force_word(PL_oldbufptr + tboffset, WORD,
8061                                       FALSE, TRUE, TRUE);
8062
8063                     s = SKIPSPACE2(d,tmpwhite);
8064 #else
8065                     s = skipspace(d);
8066 #endif
8067                 }
8068                 else {
8069                     if (key == KEY_my)
8070                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
8071                     PL_expect = XTERMBLOCK;
8072                     attrful = XATTRTERM;
8073                     sv_setpvs(PL_subname,"?");
8074                     have_name = FALSE;
8075                 }
8076
8077                 if (key == KEY_format) {
8078                     if (*s == '=')
8079                         PL_lex_formbrack = PL_lex_brackets + 1;
8080 #ifdef PERL_MAD
8081                     PL_thistoken = subtoken;
8082                     s = d;
8083 #else
8084                     if (have_name)
8085                         (void) force_word(PL_oldbufptr + tboffset, WORD,
8086                                           FALSE, TRUE, TRUE);
8087 #endif
8088                     OPERATOR(FORMAT);
8089                 }
8090
8091                 /* Look for a prototype */
8092                 if (*s == '(') {
8093                     char *p;
8094                     bool bad_proto = FALSE;
8095                     bool in_brackets = FALSE;
8096                     char greedy_proto = ' ';
8097                     bool proto_after_greedy_proto = FALSE;
8098                     bool must_be_last = FALSE;
8099                     bool underscore = FALSE;
8100                     bool seen_underscore = FALSE;
8101                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8102                     STRLEN tmplen;
8103
8104                     s = scan_str(s,!!PL_madskills,FALSE);
8105                     if (!s)
8106                         Perl_croak(aTHX_ "Prototype not terminated");
8107                     /* strip spaces and check for bad characters */
8108                     d = SvPV(PL_lex_stuff, tmplen);
8109                     tmp = 0;
8110                     for (p = d; tmplen; tmplen--, ++p) {
8111                         if (!isSPACE(*p)) {
8112                             d[tmp++] = *p;
8113
8114                             if (warnillegalproto) {
8115                                 if (must_be_last)
8116                                     proto_after_greedy_proto = TRUE;
8117                                 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8118                                     bad_proto = TRUE;
8119                                 }
8120                                 else {
8121                                     if ( underscore ) {
8122                                         if ( *p != ';' )
8123                                             bad_proto = TRUE;
8124                                         underscore = FALSE;
8125                                     }
8126                                     if ( *p == '[' ) {
8127                                         in_brackets = TRUE;
8128                                     }
8129                                     else if ( *p == ']' ) {
8130                                         in_brackets = FALSE;
8131                                     }
8132                                     else if ( (*p == '@' || *p == '%') &&
8133                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8134                                          !in_brackets ) {
8135                                         must_be_last = TRUE;
8136                                         greedy_proto = *p;
8137                                     }
8138                                     else if ( *p == '_' ) {
8139                                         underscore = seen_underscore = TRUE;
8140                                     }
8141                                 }
8142                             }
8143                         }
8144                     }
8145                     d[tmp] = '\0';
8146                     if (proto_after_greedy_proto)
8147                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8148                                     "Prototype after '%c' for %"SVf" : %s",
8149                                     greedy_proto, SVfARG(PL_subname), d);
8150                     if (bad_proto) {
8151                         SV *dsv = newSVpvs_flags("", SVs_TEMP);
8152                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8153                                     "Illegal character %sin prototype for %"SVf" : %s",
8154                                     seen_underscore ? "after '_' " : "",
8155                                     SVfARG(PL_subname),
8156                                     sv_uni_display(dsv,
8157                                          newSVpvn_flags(d, tmp, SVs_TEMP | SvUTF8(PL_lex_stuff)),
8158                                          tmp, UNI_DISPLAY_ISPRINT));
8159                     }
8160                     SvCUR_set(PL_lex_stuff, tmp);
8161                     have_proto = TRUE;
8162
8163 #ifdef PERL_MAD
8164                     start_force(0);
8165                     CURMAD('q', PL_thisopen);
8166                     CURMAD('_', tmpwhite);
8167                     CURMAD('=', PL_thisstuff);
8168                     CURMAD('Q', PL_thisclose);
8169                     NEXTVAL_NEXTTOKE.opval =
8170                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8171                     PL_lex_stuff = NULL;
8172                     force_next(THING);
8173
8174                     s = SKIPSPACE2(s,tmpwhite);
8175 #else
8176                     s = skipspace(s);
8177 #endif
8178                 }
8179                 else
8180                     have_proto = FALSE;
8181
8182                 if (*s == ':' && s[1] != ':')
8183                     PL_expect = attrful;
8184                 else if (*s != '{' && key == KEY_sub) {
8185                     if (!have_name)
8186                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8187                     else if (*s != ';' && *s != '}')
8188                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8189                 }
8190
8191 #ifdef PERL_MAD
8192                 start_force(0);
8193                 if (tmpwhite) {
8194                     if (PL_madskills)
8195                         curmad('^', newSVpvs(""));
8196                     CURMAD('_', tmpwhite);
8197                 }
8198                 force_next(0);
8199
8200                 PL_thistoken = subtoken;
8201 #else
8202                 if (have_proto) {
8203                     NEXTVAL_NEXTTOKE.opval =
8204                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8205                     PL_lex_stuff = NULL;
8206                     force_next(THING);
8207                 }
8208 #endif
8209                 if (!have_name) {
8210                     if (PL_curstash)
8211                         sv_setpvs(PL_subname, "__ANON__");
8212                     else
8213                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8214                     TOKEN(ANONSUB);
8215                 }
8216 #ifndef PERL_MAD
8217                 (void) force_word(PL_oldbufptr + tboffset, WORD,
8218                                   FALSE, TRUE, TRUE);
8219 #endif
8220                 if (key == KEY_my)
8221                     TOKEN(MYSUB);
8222                 TOKEN(SUB);
8223             }
8224
8225         case KEY_system:
8226             LOP(OP_SYSTEM,XREF);
8227
8228         case KEY_symlink:
8229             LOP(OP_SYMLINK,XTERM);
8230
8231         case KEY_syscall:
8232             LOP(OP_SYSCALL,XTERM);
8233
8234         case KEY_sysopen:
8235             LOP(OP_SYSOPEN,XTERM);
8236
8237         case KEY_sysseek:
8238             LOP(OP_SYSSEEK,XTERM);
8239
8240         case KEY_sysread:
8241             LOP(OP_SYSREAD,XTERM);
8242
8243         case KEY_syswrite:
8244             LOP(OP_SYSWRITE,XTERM);
8245
8246         case KEY_tr:
8247             s = scan_trans(s);
8248             TERM(sublex_start());
8249
8250         case KEY_tell:
8251             UNI(OP_TELL);
8252
8253         case KEY_telldir:
8254             UNI(OP_TELLDIR);
8255
8256         case KEY_tie:
8257             LOP(OP_TIE,XTERM);
8258
8259         case KEY_tied:
8260             UNI(OP_TIED);
8261
8262         case KEY_time:
8263             FUN0(OP_TIME);
8264
8265         case KEY_times:
8266             FUN0(OP_TMS);
8267
8268         case KEY_truncate:
8269             LOP(OP_TRUNCATE,XTERM);
8270
8271         case KEY_uc:
8272             UNI(OP_UC);
8273
8274         case KEY_ucfirst:
8275             UNI(OP_UCFIRST);
8276
8277         case KEY_untie:
8278             UNI(OP_UNTIE);
8279
8280         case KEY_until:
8281             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8282                 return REPORT(0);
8283             pl_yylval.ival = CopLINE(PL_curcop);
8284             OPERATOR(UNTIL);
8285
8286         case KEY_unless:
8287             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8288                 return REPORT(0);
8289             pl_yylval.ival = CopLINE(PL_curcop);
8290             OPERATOR(UNLESS);
8291
8292         case KEY_unlink:
8293             LOP(OP_UNLINK,XTERM);
8294
8295         case KEY_undef:
8296             UNIDOR(OP_UNDEF);
8297
8298         case KEY_unpack:
8299             LOP(OP_UNPACK,XTERM);
8300
8301         case KEY_utime:
8302             LOP(OP_UTIME,XTERM);
8303
8304         case KEY_umask:
8305             UNIDOR(OP_UMASK);
8306
8307         case KEY_unshift:
8308             LOP(OP_UNSHIFT,XTERM);
8309
8310         case KEY_use:
8311             s = tokenize_use(1, s);
8312             OPERATOR(USE);
8313
8314         case KEY_values:
8315             UNI(OP_VALUES);
8316
8317         case KEY_vec:
8318             LOP(OP_VEC,XTERM);
8319
8320         case KEY_when:
8321             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8322                 return REPORT(0);
8323             pl_yylval.ival = CopLINE(PL_curcop);
8324             OPERATOR(WHEN);
8325
8326         case KEY_while:
8327             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8328                 return REPORT(0);
8329             pl_yylval.ival = CopLINE(PL_curcop);
8330             OPERATOR(WHILE);
8331
8332         case KEY_warn:
8333             PL_hints |= HINT_BLOCK_SCOPE;
8334             LOP(OP_WARN,XTERM);
8335
8336         case KEY_wait:
8337             FUN0(OP_WAIT);
8338
8339         case KEY_waitpid:
8340             LOP(OP_WAITPID,XTERM);
8341
8342         case KEY_wantarray:
8343             FUN0(OP_WANTARRAY);
8344
8345         case KEY_write:
8346 #ifdef EBCDIC
8347         {
8348             char ctl_l[2];
8349             ctl_l[0] = toCTRL('L');
8350             ctl_l[1] = '\0';
8351             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8352         }
8353 #else
8354             /* Make sure $^L is defined */
8355             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8356 #endif
8357             UNI(OP_ENTERWRITE);
8358
8359         case KEY_x:
8360             if (PL_expect == XOPERATOR) {
8361                 if (*s == '=' && !PL_lex_allbrackets &&
8362                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8363                     return REPORT(0);
8364                 Mop(OP_REPEAT);
8365             }
8366             check_uni();
8367             goto just_a_word;
8368
8369         case KEY_xor:
8370             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8371                 return REPORT(0);
8372             pl_yylval.ival = OP_XOR;
8373             OPERATOR(OROP);
8374
8375         case KEY_y:
8376             s = scan_trans(s);
8377             TERM(sublex_start());
8378         }
8379     }}
8380 }
8381 #ifdef __SC__
8382 #pragma segment Main
8383 #endif
8384
8385 static int
8386 S_pending_ident(pTHX)
8387 {
8388     dVAR;
8389     register char *d;
8390     PADOFFSET tmp = 0;
8391     /* pit holds the identifier we read and pending_ident is reset */
8392     char pit = PL_pending_ident;
8393     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8394     /* All routes through this function want to know if there is a colon.  */
8395     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8396     PL_pending_ident = 0;
8397
8398     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8399     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8400           "### Pending identifier '%s'\n", PL_tokenbuf); });
8401
8402     /* if we're in a my(), we can't allow dynamics here.
8403        $foo'bar has already been turned into $foo::bar, so
8404        just check for colons.
8405
8406        if it's a legal name, the OP is a PADANY.
8407     */
8408     if (PL_in_my) {
8409         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8410             if (has_colon)
8411                 yyerror(Perl_form(aTHX_ "No package name allowed for "
8412                                   "variable %s in \"our\"",
8413                                   PL_tokenbuf));
8414             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8415         }
8416         else {
8417             if (has_colon)
8418                 yyerror(Perl_form(aTHX_ PL_no_myglob,
8419                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8420
8421             pl_yylval.opval = newOP(OP_PADANY, 0);
8422             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8423                                                         UTF ? SVf_UTF8 : 0);
8424             return PRIVATEREF;
8425         }
8426     }
8427
8428     /*
8429        build the ops for accesses to a my() variable.
8430
8431        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8432        then used in a comparison.  This catches most, but not
8433        all cases.  For instance, it catches
8434            sort { my($a); $a <=> $b }
8435        but not
8436            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8437        (although why you'd do that is anyone's guess).
8438     */
8439
8440     if (!has_colon) {
8441         if (!PL_in_my)
8442             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8443                                     UTF ? SVf_UTF8 : 0);
8444         if (tmp != NOT_IN_PAD) {
8445             /* might be an "our" variable" */
8446             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8447                 /* build ops for a bareword */
8448                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8449                 HEK * const stashname = HvNAME_HEK(stash);
8450                 SV *  const sym = newSVhek(stashname);
8451                 sv_catpvs(sym, "::");
8452                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8453                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8454                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8455                 gv_fetchsv(sym,
8456                     (PL_in_eval
8457                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8458                         : GV_ADDMULTI
8459                     ),
8460                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8461                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8462                      : SVt_PVHV));
8463                 return WORD;
8464             }
8465
8466             /* if it's a sort block and they're naming $a or $b */
8467             if (PL_last_lop_op == OP_SORT &&
8468                 PL_tokenbuf[0] == '$' &&
8469                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8470                 && !PL_tokenbuf[2])
8471             {
8472                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8473                      d < PL_bufend && *d != '\n';
8474                      d++)
8475                 {
8476                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8477                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8478                               PL_tokenbuf);
8479                     }
8480                 }
8481             }
8482
8483             pl_yylval.opval = newOP(OP_PADANY, 0);
8484             pl_yylval.opval->op_targ = tmp;
8485             return PRIVATEREF;
8486         }
8487     }
8488
8489     /*
8490        Whine if they've said @foo in a doublequoted string,
8491        and @foo isn't a variable we can find in the symbol
8492        table.
8493     */
8494     if (ckWARN(WARN_AMBIGUOUS) &&
8495         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8496         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8497                                         ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8498         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8499                 /* DO NOT warn for @- and @+ */
8500                 && !( PL_tokenbuf[2] == '\0' &&
8501                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8502            )
8503         {
8504             /* Downgraded from fatal to warning 20000522 mjd */
8505             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8506                         "Possible unintended interpolation of %s in string",
8507                         PL_tokenbuf);
8508         }
8509     }
8510
8511     /* build ops for a bareword */
8512     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
8513                                                       tokenbuf_len - 1,
8514                                                       UTF ? SVf_UTF8 : 0 ));
8515     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8516     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8517                      (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8518                      | ( UTF ? SVf_UTF8 : 0 ),
8519                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8520                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8521                       : SVt_PVHV));
8522     return WORD;
8523 }
8524
8525 STATIC void
8526 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8527 {
8528     dVAR;
8529
8530     PERL_ARGS_ASSERT_CHECKCOMMA;
8531
8532     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8533         if (ckWARN(WARN_SYNTAX)) {
8534             int level = 1;
8535             const char *w;
8536             for (w = s+2; *w && level; w++) {
8537                 if (*w == '(')
8538                     ++level;
8539                 else if (*w == ')')
8540                     --level;
8541             }
8542             while (isSPACE(*w))
8543                 ++w;
8544             /* the list of chars below is for end of statements or
8545              * block / parens, boolean operators (&&, ||, //) and branch
8546              * constructs (or, and, if, until, unless, while, err, for).
8547              * Not a very solid hack... */
8548             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8549                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8550                             "%s (...) interpreted as function",name);
8551         }
8552     }
8553     while (s < PL_bufend && isSPACE(*s))
8554         s++;
8555     if (*s == '(')
8556         s++;
8557     while (s < PL_bufend && isSPACE(*s))
8558         s++;
8559     if (isIDFIRST_lazy_if(s,UTF)) {
8560         const char * const w = s++;
8561         while (isALNUM_lazy_if(s,UTF))
8562             s++;
8563         while (s < PL_bufend && isSPACE(*s))
8564             s++;
8565         if (*s == ',') {
8566             GV* gv;
8567             if (keyword(w, s - w, 0))
8568                 return;
8569
8570             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8571             if (gv && GvCVu(gv))
8572                 return;
8573             Perl_croak(aTHX_ "No comma allowed after %s", what);
8574         }
8575     }
8576 }
8577
8578 /* Either returns sv, or mortalizes sv and returns a new SV*.
8579    Best used as sv=new_constant(..., sv, ...).
8580    If s, pv are NULL, calls subroutine with one argument,
8581    and type is used with error messages only. */
8582
8583 STATIC SV *
8584 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8585                SV *sv, SV *pv, const char *type, STRLEN typelen)
8586 {
8587     dVAR; dSP;
8588     HV * const table = GvHV(PL_hintgv);          /* ^H */
8589     SV *res;
8590     SV **cvp;
8591     SV *cv, *typesv;
8592     const char *why1 = "", *why2 = "", *why3 = "";
8593
8594     PERL_ARGS_ASSERT_NEW_CONSTANT;
8595
8596     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8597         SV *msg;
8598
8599         why2 = (const char *)
8600             (strEQ(key,"charnames")
8601              ? "(possibly a missing \"use charnames ...\")"
8602              : "");
8603         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8604                             (type ? type: "undef"), why2);
8605
8606         /* This is convoluted and evil ("goto considered harmful")
8607          * but I do not understand the intricacies of all the different
8608          * failure modes of %^H in here.  The goal here is to make
8609          * the most probable error message user-friendly. --jhi */
8610
8611         goto msgdone;
8612
8613     report:
8614         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8615                             (type ? type: "undef"), why1, why2, why3);
8616     msgdone:
8617         yyerror(SvPVX_const(msg));
8618         SvREFCNT_dec(msg);
8619         return sv;
8620     }
8621
8622     /* charnames doesn't work well if there have been errors found */
8623     if (PL_error_count > 0 && strEQ(key,"charnames"))
8624         return &PL_sv_undef;
8625
8626     cvp = hv_fetch(table, key, keylen, FALSE);
8627     if (!cvp || !SvOK(*cvp)) {
8628         why1 = "$^H{";
8629         why2 = key;
8630         why3 = "} is not defined";
8631         goto report;
8632     }
8633     sv_2mortal(sv);                     /* Parent created it permanently */
8634     cv = *cvp;
8635     if (!pv && s)
8636         pv = newSVpvn_flags(s, len, SVs_TEMP);
8637     if (type && pv)
8638         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8639     else
8640         typesv = &PL_sv_undef;
8641
8642     PUSHSTACKi(PERLSI_OVERLOAD);
8643     ENTER ;
8644     SAVETMPS;
8645
8646     PUSHMARK(SP) ;
8647     EXTEND(sp, 3);
8648     if (pv)
8649         PUSHs(pv);
8650     PUSHs(sv);
8651     if (pv)
8652         PUSHs(typesv);
8653     PUTBACK;
8654     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8655
8656     SPAGAIN ;
8657
8658     /* Check the eval first */
8659     if (!PL_in_eval && SvTRUE(ERRSV)) {
8660         sv_catpvs(ERRSV, "Propagated");
8661         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8662         (void)POPs;
8663         res = SvREFCNT_inc_simple(sv);
8664     }
8665     else {
8666         res = POPs;
8667         SvREFCNT_inc_simple_void(res);
8668     }
8669
8670     PUTBACK ;
8671     FREETMPS ;
8672     LEAVE ;
8673     POPSTACK;
8674
8675     if (!SvOK(res)) {
8676         why1 = "Call to &{$^H{";
8677         why2 = key;
8678         why3 = "}} did not return a defined value";
8679         sv = res;
8680         goto report;
8681     }
8682
8683     return res;
8684 }
8685
8686 /* Returns a NUL terminated string, with the length of the string written to
8687    *slp
8688    */
8689 STATIC char *
8690 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8691 {
8692     dVAR;
8693     register char *d = dest;
8694     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
8695
8696     PERL_ARGS_ASSERT_SCAN_WORD;
8697
8698     for (;;) {
8699         if (d >= e)
8700             Perl_croak(aTHX_ ident_too_long);
8701         if (isALNUM(*s))        /* UTF handled below */
8702             *d++ = *s++;
8703         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8704             *d++ = ':';
8705             *d++ = ':';
8706             s++;
8707         }
8708         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8709             *d++ = *s++;
8710             *d++ = *s++;
8711         }
8712         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8713             char *t = s + UTF8SKIP(s);
8714             size_t len;
8715             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8716                 t += UTF8SKIP(t);
8717             len = t - s;
8718             if (d + len > e)
8719                 Perl_croak(aTHX_ ident_too_long);
8720             Copy(s, d, len, char);
8721             d += len;
8722             s = t;
8723         }
8724         else {
8725             *d = '\0';
8726             *slp = d - dest;
8727             return s;
8728         }
8729     }
8730 }
8731
8732 STATIC char *
8733 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8734 {
8735     dVAR;
8736     char *bracket = NULL;
8737     char funny = *s++;
8738     register char *d = dest;
8739     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
8740
8741     PERL_ARGS_ASSERT_SCAN_IDENT;
8742
8743     if (isSPACE(*s))
8744         s = PEEKSPACE(s);
8745     if (isDIGIT(*s)) {
8746         while (isDIGIT(*s)) {
8747             if (d >= e)
8748                 Perl_croak(aTHX_ ident_too_long);
8749             *d++ = *s++;
8750         }
8751     }
8752     else {
8753         for (;;) {
8754             if (d >= e)
8755                 Perl_croak(aTHX_ ident_too_long);
8756             if (isALNUM(*s))    /* UTF handled below */
8757                 *d++ = *s++;
8758             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8759                 *d++ = ':';
8760                 *d++ = ':';
8761                 s++;
8762             }
8763             else if (*s == ':' && s[1] == ':') {
8764                 *d++ = *s++;
8765                 *d++ = *s++;
8766             }
8767             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8768                 char *t = s + UTF8SKIP(s);
8769                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8770                     t += UTF8SKIP(t);
8771                 if (d + (t - s) > e)
8772                     Perl_croak(aTHX_ ident_too_long);
8773                 Copy(s, d, t - s, char);
8774                 d += t - s;
8775                 s = t;
8776             }
8777             else
8778                 break;
8779         }
8780     }
8781     *d = '\0';
8782     d = dest;
8783     if (*d) {
8784         if (PL_lex_state != LEX_NORMAL)
8785             PL_lex_state = LEX_INTERPENDMAYBE;
8786         return s;
8787     }
8788     if (*s == '$' && s[1] &&
8789         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8790     {
8791         return s;
8792     }
8793     if (*s == '{') {
8794         bracket = s;
8795         s++;
8796     }
8797     else if (ck_uni)
8798         check_uni();
8799     if (s < send) {
8800         if (UTF) {
8801             const STRLEN skip = UTF8SKIP(s);
8802             STRLEN i;
8803             d[skip] = '\0';
8804             for ( i = 0; i < skip; i++ )
8805                 d[i] = *s++;
8806         }
8807         else {
8808             *d = *s++;
8809             d[1] = '\0';
8810         }
8811     }
8812     if (*d == '^' && *s && isCONTROLVAR(*s)) {
8813         *d = toCTRL(*s);
8814         s++;
8815     }
8816     if (bracket) {
8817         if (isSPACE(s[-1])) {
8818             while (s < send) {
8819                 const char ch = *s++;
8820                 if (!SPACE_OR_TAB(ch)) {
8821                     *d = ch;
8822                     break;
8823                 }
8824             }
8825         }
8826         if (isIDFIRST_lazy_if(d,UTF)) {
8827             d += UTF8SKIP(d);
8828             if (UTF) {
8829                 char *end = s;
8830                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8831                     end += UTF8SKIP(end);
8832                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8833                         end += UTF8SKIP(end);
8834                 }
8835                 Copy(s, d, end - s, char);
8836                 d += end - s;
8837                 s = end;
8838             }
8839             else {
8840                 while ((isALNUM(*s) || *s == ':') && d < e)
8841                     *d++ = *s++;
8842                 if (d >= e)
8843                     Perl_croak(aTHX_ ident_too_long);
8844             }
8845             *d = '\0';
8846             while (s < send && SPACE_OR_TAB(*s))
8847                 s++;
8848             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8849                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8850                     const char * const brack =
8851                         (const char *)
8852                         ((*s == '[') ? "[...]" : "{...}");
8853    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8854                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8855                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8856                         funny, dest, brack, funny, dest, brack);
8857                 }
8858                 bracket++;
8859                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8860                 PL_lex_allbrackets++;
8861                 return s;
8862             }
8863         }
8864         /* Handle extended ${^Foo} variables
8865          * 1999-02-27 mjd-perl-patch@plover.com */
8866         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8867                  && isALNUM(*s))
8868         {
8869             d++;
8870             while (isALNUM(*s) && d < e) {
8871                 *d++ = *s++;
8872             }
8873             if (d >= e)
8874                 Perl_croak(aTHX_ ident_too_long);
8875             *d = '\0';
8876         }
8877         if (*s == '}') {
8878             s++;
8879             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8880                 PL_lex_state = LEX_INTERPEND;
8881                 PL_expect = XREF;
8882             }
8883             if (PL_lex_state == LEX_NORMAL) {
8884                 if (ckWARN(WARN_AMBIGUOUS) &&
8885                     (keyword(dest, d - dest, 0)
8886                      || get_cvn_flags(dest, d - dest, 0)))
8887                 {
8888                     if (funny == '#')
8889                         funny = '@';
8890                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8891                         "Ambiguous use of %c{%s} resolved to %c%s",
8892                         funny, dest, funny, dest);
8893                 }
8894             }
8895         }
8896         else {
8897             s = bracket;                /* let the parser handle it */
8898             *dest = '\0';
8899         }
8900     }
8901     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8902         PL_lex_state = LEX_INTERPEND;
8903     return s;
8904 }
8905
8906 static bool
8907 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8908
8909     /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8910      * the parse starting at 's', based on the subset that are valid in this
8911      * context input to this routine in 'valid_flags'. Advances s.  Returns
8912      * TRUE if the input was a valid flag, so the next char may be as well;
8913      * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8914      * current regex.  This routine will set it to any charset modifier found.
8915      * The caller shouldn't change it.  This way, another charset modifier
8916      * encountered in the parse can be detected as an error, as we have decided
8917      * allow only one */
8918
8919     const char c = **s;
8920
8921     if (! strchr(valid_flags, c)) {
8922         if (isALNUM(c)) {
8923             goto deprecate;
8924         }
8925         return FALSE;
8926     }
8927
8928     switch (c) {
8929
8930         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8931         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
8932         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
8933         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
8934         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
8935         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8936         case LOCALE_PAT_MOD:
8937
8938             /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8939              * can't be regex modifiers.
8940              * In 5.14, s///le is legal and ambiguous.  Try to disambiguate as
8941              * much as easily done.  s///lei, for example, has to mean regex
8942              * modifiers if it's not an error (as does any word character
8943              * following the 'e').  Otherwise, we resolve to the backwards-
8944              * compatible, but less likely 's/// le ...', i.e. as meaning
8945              * less-than-or-equal.  The reason it's not likely is that s//
8946              * returns a number for code in the field (/r returns a string, but
8947              * that wasn't added until the 5.13 series), and so '<=' should be
8948              * used for comparing, not 'le'. */
8949             if (*((*s) + 1) == 't') {
8950                 goto deprecate;
8951             }
8952             else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
8953
8954                 /* 'e' is valid only for substitutes, s///e.  If it is not
8955                  * valid in the current context, then 'm//le' must mean the
8956                  * comparison operator, so use the regular deprecation message.
8957                  */
8958                 if (! strchr(valid_flags, 'e')) {
8959                     goto deprecate;
8960                 }
8961                 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8962                     "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'.  In Perl 5.16, it will be resolved the other way");
8963                 return FALSE;
8964             }
8965             if (*charset) {
8966                 goto multiple_charsets;
8967             }
8968             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8969             *charset = c;
8970             break;
8971         case UNICODE_PAT_MOD:
8972             /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8973              * 'n' means they can't be regex modifiers */
8974             if (*((*s) + 1) == 'n') {
8975                 goto deprecate;
8976             }
8977             if (*charset) {
8978                 goto multiple_charsets;
8979             }
8980             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8981             *charset = c;
8982             break;
8983         case ASCII_RESTRICT_PAT_MOD:
8984             /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8985              * can't be regex modifiers */
8986             if (*((*s) + 1) == 'n') {
8987                 goto deprecate;
8988             }
8989
8990             if (! *charset) {
8991                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8992             }
8993             else {
8994
8995                 /* Error if previous modifier wasn't an 'a', but if it was, see
8996                  * if, and accept, a second occurrence (only) */
8997                 if (*charset != 'a'
8998                     || get_regex_charset(*pmfl)
8999                         != REGEX_ASCII_RESTRICTED_CHARSET)
9000                 {
9001                         goto multiple_charsets;
9002                 }
9003                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9004             }
9005             *charset = c;
9006             break;
9007         case DEPENDS_PAT_MOD:
9008             if (*charset) {
9009                 goto multiple_charsets;
9010             }
9011             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9012             *charset = c;
9013             break;
9014     }
9015
9016     (*s)++;
9017     return TRUE;
9018
9019     deprecate:
9020         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
9021             "Having no space between pattern and following word is deprecated");
9022         return FALSE;
9023
9024     multiple_charsets:
9025         if (*charset != c) {
9026             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9027         }
9028         else if (c == 'a') {
9029             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9030         }
9031         else {
9032             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9033         }
9034
9035         /* Pretend that it worked, so will continue processing before dieing */
9036         (*s)++;
9037         return TRUE;
9038 }
9039
9040 STATIC char *
9041 S_scan_pat(pTHX_ char *start, I32 type)
9042 {
9043     dVAR;
9044     PMOP *pm;
9045     char *s = scan_str(start,!!PL_madskills,FALSE);
9046     const char * const valid_flags =
9047         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9048     char charset = '\0';    /* character set modifier */
9049 #ifdef PERL_MAD
9050     char *modstart;
9051 #endif
9052
9053     PERL_ARGS_ASSERT_SCAN_PAT;
9054
9055     if (!s) {
9056         const char * const delimiter = skipspace(start);
9057         Perl_croak(aTHX_
9058                    (const char *)
9059                    (*delimiter == '?'
9060                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
9061                     : "Search pattern not terminated" ));
9062     }
9063
9064     pm = (PMOP*)newPMOP(type, 0);
9065     if (PL_multi_open == '?') {
9066         /* This is the only point in the code that sets PMf_ONCE:  */
9067         pm->op_pmflags |= PMf_ONCE;
9068
9069         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9070            allows us to restrict the list needed by reset to just the ??
9071            matches.  */
9072         assert(type != OP_TRANS);
9073         if (PL_curstash) {
9074             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9075             U32 elements;
9076             if (!mg) {
9077                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9078                                  0);
9079             }
9080             elements = mg->mg_len / sizeof(PMOP**);
9081             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9082             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9083             mg->mg_len = elements * sizeof(PMOP**);
9084             PmopSTASH_set(pm,PL_curstash);
9085         }
9086     }
9087 #ifdef PERL_MAD
9088     modstart = s;
9089 #endif
9090     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9091 #ifdef PERL_MAD
9092     if (PL_madskills && modstart != s) {
9093         SV* tmptoken = newSVpvn(modstart, s - modstart);
9094         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9095     }
9096 #endif
9097     /* issue a warning if /c is specified,but /g is not */
9098     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9099     {
9100         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9101                        "Use of /c modifier is meaningless without /g" );
9102     }
9103
9104     PL_lex_op = (OP*)pm;
9105     pl_yylval.ival = OP_MATCH;
9106     return s;
9107 }
9108
9109 STATIC char *
9110 S_scan_subst(pTHX_ char *start)
9111 {
9112     dVAR;
9113     char *s;
9114     register PMOP *pm;
9115     I32 first_start;
9116     I32 es = 0;
9117     char charset = '\0';    /* character set modifier */
9118 #ifdef PERL_MAD
9119     char *modstart;
9120 #endif
9121
9122     PERL_ARGS_ASSERT_SCAN_SUBST;
9123
9124     pl_yylval.ival = OP_NULL;
9125
9126     s = scan_str(start,!!PL_madskills,FALSE);
9127
9128     if (!s)
9129         Perl_croak(aTHX_ "Substitution pattern not terminated");
9130
9131     if (s[-1] == PL_multi_open)
9132         s--;
9133 #ifdef PERL_MAD
9134     if (PL_madskills) {
9135         CURMAD('q', PL_thisopen);
9136         CURMAD('_', PL_thiswhite);
9137         CURMAD('E', PL_thisstuff);
9138         CURMAD('Q', PL_thisclose);
9139         PL_realtokenstart = s - SvPVX(PL_linestr);
9140     }
9141 #endif
9142
9143     first_start = PL_multi_start;
9144     s = scan_str(s,!!PL_madskills,FALSE);
9145     if (!s) {
9146         if (PL_lex_stuff) {
9147             SvREFCNT_dec(PL_lex_stuff);
9148             PL_lex_stuff = NULL;
9149         }
9150         Perl_croak(aTHX_ "Substitution replacement not terminated");
9151     }
9152     PL_multi_start = first_start;       /* so whole substitution is taken together */
9153
9154     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9155
9156 #ifdef PERL_MAD
9157     if (PL_madskills) {
9158         CURMAD('z', PL_thisopen);
9159         CURMAD('R', PL_thisstuff);
9160         CURMAD('Z', PL_thisclose);
9161     }
9162     modstart = s;
9163 #endif
9164
9165     while (*s) {
9166         if (*s == EXEC_PAT_MOD) {
9167             s++;
9168             es++;
9169         }
9170         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9171         {
9172             break;
9173         }
9174     }
9175
9176 #ifdef PERL_MAD
9177     if (PL_madskills) {
9178         if (modstart != s)
9179             curmad('m', newSVpvn(modstart, s - modstart));
9180         append_madprops(PL_thismad, (OP*)pm, 0);
9181         PL_thismad = 0;
9182     }
9183 #endif
9184     if ((pm->op_pmflags & PMf_CONTINUE)) {
9185         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9186     }
9187
9188     if (es) {
9189         SV * const repl = newSVpvs("");
9190
9191         PL_sublex_info.super_bufptr = s;
9192         PL_sublex_info.super_bufend = PL_bufend;
9193         PL_multi_end = 0;
9194         pm->op_pmflags |= PMf_EVAL;
9195         while (es-- > 0) {
9196             if (es)
9197                 sv_catpvs(repl, "eval ");
9198             else
9199                 sv_catpvs(repl, "do ");
9200         }
9201         sv_catpvs(repl, "{");
9202         sv_catsv(repl, PL_lex_repl);
9203         if (strchr(SvPVX(PL_lex_repl), '#'))
9204             sv_catpvs(repl, "\n");
9205         sv_catpvs(repl, "}");
9206         SvEVALED_on(repl);
9207         SvREFCNT_dec(PL_lex_repl);
9208         PL_lex_repl = repl;
9209     }
9210
9211     PL_lex_op = (OP*)pm;
9212     pl_yylval.ival = OP_SUBST;
9213     return s;
9214 }
9215
9216 STATIC char *
9217 S_scan_trans(pTHX_ char *start)
9218 {
9219     dVAR;
9220     register char* s;
9221     OP *o;
9222     short *tbl;
9223     U8 squash;
9224     U8 del;
9225     U8 complement;
9226     bool nondestruct = 0;
9227 #ifdef PERL_MAD
9228     char *modstart;
9229 #endif
9230
9231     PERL_ARGS_ASSERT_SCAN_TRANS;
9232
9233     pl_yylval.ival = OP_NULL;
9234
9235     s = scan_str(start,!!PL_madskills,FALSE);
9236     if (!s)
9237         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9238
9239     if (s[-1] == PL_multi_open)
9240         s--;
9241 #ifdef PERL_MAD
9242     if (PL_madskills) {
9243         CURMAD('q', PL_thisopen);
9244         CURMAD('_', PL_thiswhite);
9245         CURMAD('E', PL_thisstuff);
9246         CURMAD('Q', PL_thisclose);
9247         PL_realtokenstart = s - SvPVX(PL_linestr);
9248     }
9249 #endif
9250
9251     s = scan_str(s,!!PL_madskills,FALSE);
9252     if (!s) {
9253         if (PL_lex_stuff) {
9254             SvREFCNT_dec(PL_lex_stuff);
9255             PL_lex_stuff = NULL;
9256         }
9257         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9258     }
9259     if (PL_madskills) {
9260         CURMAD('z', PL_thisopen);
9261         CURMAD('R', PL_thisstuff);
9262         CURMAD('Z', PL_thisclose);
9263     }
9264
9265     complement = del = squash = 0;
9266 #ifdef PERL_MAD
9267     modstart = s;
9268 #endif
9269     while (1) {
9270         switch (*s) {
9271         case 'c':
9272             complement = OPpTRANS_COMPLEMENT;
9273             break;
9274         case 'd':
9275             del = OPpTRANS_DELETE;
9276             break;
9277         case 's':
9278             squash = OPpTRANS_SQUASH;
9279             break;
9280         case 'r':
9281             nondestruct = 1;
9282             break;
9283         default:
9284             goto no_more;
9285         }
9286         s++;
9287     }
9288   no_more:
9289
9290     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
9291     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
9292     o->op_private &= ~OPpTRANS_ALL;
9293     o->op_private |= del|squash|complement|
9294       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9295       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
9296
9297     PL_lex_op = o;
9298     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9299
9300 #ifdef PERL_MAD
9301     if (PL_madskills) {
9302         if (modstart != s)
9303             curmad('m', newSVpvn(modstart, s - modstart));
9304         append_madprops(PL_thismad, o, 0);
9305         PL_thismad = 0;
9306     }
9307 #endif
9308
9309     return s;
9310 }
9311
9312 STATIC char *
9313 S_scan_heredoc(pTHX_ register char *s)
9314 {
9315     dVAR;
9316     SV *herewas;
9317     I32 op_type = OP_SCALAR;
9318     I32 len;
9319     SV *tmpstr;
9320     char term;
9321     const char *found_newline;
9322     register char *d;
9323     register char *e;
9324     char *peek;
9325     const int outer = (PL_rsfp || PL_parser->filtered)
9326                    && !(PL_lex_inwhat == OP_SCALAR);
9327 #ifdef PERL_MAD
9328     I32 stuffstart = s - SvPVX(PL_linestr);
9329     char *tstart;
9330
9331     PL_realtokenstart = -1;
9332 #endif
9333
9334     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9335
9336     s += 2;
9337     d = PL_tokenbuf;
9338     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9339     if (!outer)
9340         *d++ = '\n';
9341     peek = s;
9342     while (SPACE_OR_TAB(*peek))
9343         peek++;
9344     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9345         s = peek;
9346         term = *s++;
9347         s = delimcpy(d, e, s, PL_bufend, term, &len);
9348         d += len;
9349         if (s < PL_bufend)
9350             s++;
9351     }
9352     else {
9353         if (*s == '\\')
9354             s++, term = '\'';
9355         else
9356             term = '"';
9357         if (!isALNUM_lazy_if(s,UTF))
9358             deprecate("bare << to mean <<\"\"");
9359         for (; isALNUM_lazy_if(s,UTF); s++) {
9360             if (d < e)
9361                 *d++ = *s;
9362         }
9363     }
9364     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9365         Perl_croak(aTHX_ "Delimiter for here document is too long");
9366     *d++ = '\n';
9367     *d = '\0';
9368     len = d - PL_tokenbuf;
9369
9370 #ifdef PERL_MAD
9371     if (PL_madskills) {
9372         tstart = PL_tokenbuf + !outer;
9373         PL_thisclose = newSVpvn(tstart, len - !outer);
9374         tstart = SvPVX(PL_linestr) + stuffstart;
9375         PL_thisopen = newSVpvn(tstart, s - tstart);
9376         stuffstart = s - SvPVX(PL_linestr);
9377     }
9378 #endif
9379 #ifndef PERL_STRICT_CR
9380     d = strchr(s, '\r');
9381     if (d) {
9382         char * const olds = s;
9383         s = d;
9384         while (s < PL_bufend) {
9385             if (*s == '\r') {
9386                 *d++ = '\n';
9387                 if (*++s == '\n')
9388                     s++;
9389             }
9390             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9391                 *d++ = *s++;
9392                 s++;
9393             }
9394             else
9395                 *d++ = *s++;
9396         }
9397         *d = '\0';
9398         PL_bufend = d;
9399         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9400         s = olds;
9401     }
9402 #endif
9403 #ifdef PERL_MAD
9404     found_newline = 0;
9405 #endif
9406     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9407         herewas = newSVpvn(s,PL_bufend-s);
9408     }
9409     else {
9410 #ifdef PERL_MAD
9411         herewas = newSVpvn(s-1,found_newline-s+1);
9412 #else
9413         s--;
9414         herewas = newSVpvn(s,found_newline-s);
9415 #endif
9416     }
9417 #ifdef PERL_MAD
9418     if (PL_madskills) {
9419         tstart = SvPVX(PL_linestr) + stuffstart;
9420         if (PL_thisstuff)
9421             sv_catpvn(PL_thisstuff, tstart, s - tstart);
9422         else
9423             PL_thisstuff = newSVpvn(tstart, s - tstart);
9424     }
9425 #endif
9426     s += SvCUR(herewas);
9427
9428 #ifdef PERL_MAD
9429     stuffstart = s - SvPVX(PL_linestr);
9430
9431     if (found_newline)
9432         s--;
9433 #endif
9434
9435     tmpstr = newSV_type(SVt_PVIV);
9436     SvGROW(tmpstr, 80);
9437     if (term == '\'') {
9438         op_type = OP_CONST;
9439         SvIV_set(tmpstr, -1);
9440     }
9441     else if (term == '`') {
9442         op_type = OP_BACKTICK;
9443         SvIV_set(tmpstr, '\\');
9444     }
9445
9446     CLINE;
9447     PL_multi_start = CopLINE(PL_curcop);
9448     PL_multi_open = PL_multi_close = '<';
9449     term = *PL_tokenbuf;
9450     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp
9451      && !PL_parser->filtered) {
9452         char * const bufptr = PL_sublex_info.super_bufptr;
9453         char * const bufend = PL_sublex_info.super_bufend;
9454         char * const olds = s - SvCUR(herewas);
9455         s = strchr(bufptr, '\n');
9456         if (!s)
9457             s = bufend;
9458         d = s;
9459         while (s < bufend &&
9460           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9461             if (*s++ == '\n')
9462                 CopLINE_inc(PL_curcop);
9463         }
9464         if (s >= bufend) {
9465             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9466             missingterm(PL_tokenbuf);
9467         }
9468         sv_setpvn(herewas,bufptr,d-bufptr+1);
9469         sv_setpvn(tmpstr,d+1,s-d);
9470         s += len - 1;
9471         sv_catpvn(herewas,s,bufend-s);
9472         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9473
9474         s = olds;
9475         goto retval;
9476     }
9477     else if (!outer) {
9478         d = s;
9479         while (s < PL_bufend &&
9480           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9481             if (*s++ == '\n')
9482                 CopLINE_inc(PL_curcop);
9483         }
9484         if (s >= PL_bufend) {
9485             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9486             missingterm(PL_tokenbuf);
9487         }
9488         sv_setpvn(tmpstr,d+1,s-d);
9489 #ifdef PERL_MAD
9490         if (PL_madskills) {
9491             if (PL_thisstuff)
9492                 sv_catpvn(PL_thisstuff, d + 1, s - d);
9493             else
9494                 PL_thisstuff = newSVpvn(d + 1, s - d);
9495             stuffstart = s - SvPVX(PL_linestr);
9496         }
9497 #endif
9498         s += len - 1;
9499         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9500
9501         sv_catpvn(herewas,s,PL_bufend-s);
9502         sv_setsv(PL_linestr,herewas);
9503         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9504         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9505         PL_last_lop = PL_last_uni = NULL;
9506     }
9507     else
9508         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
9509     while (s >= PL_bufend) {    /* multiple line string? */
9510 #ifdef PERL_MAD
9511         if (PL_madskills) {
9512             tstart = SvPVX(PL_linestr) + stuffstart;
9513             if (PL_thisstuff)
9514                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9515             else
9516                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9517         }
9518 #endif
9519         PL_bufptr = s;
9520         CopLINE_inc(PL_curcop);
9521         if (!outer || !lex_next_chunk(0)) {
9522             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9523             missingterm(PL_tokenbuf);
9524         }
9525         CopLINE_dec(PL_curcop);
9526         s = PL_bufptr;
9527 #ifdef PERL_MAD
9528         stuffstart = s - SvPVX(PL_linestr);
9529 #endif
9530         CopLINE_inc(PL_curcop);
9531         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9532         PL_last_lop = PL_last_uni = NULL;
9533 #ifndef PERL_STRICT_CR
9534         if (PL_bufend - PL_linestart >= 2) {
9535             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9536                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9537             {
9538                 PL_bufend[-2] = '\n';
9539                 PL_bufend--;
9540                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9541             }
9542             else if (PL_bufend[-1] == '\r')
9543                 PL_bufend[-1] = '\n';
9544         }
9545         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9546             PL_bufend[-1] = '\n';
9547 #endif
9548         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9549             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9550             *(SvPVX(PL_linestr) + off ) = ' ';
9551             lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
9552             sv_catsv(PL_linestr,herewas);
9553             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9554             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9555         }
9556         else {
9557             s = PL_bufend;
9558             sv_catsv(tmpstr,PL_linestr);
9559         }
9560     }
9561     s++;
9562 retval:
9563     PL_multi_end = CopLINE(PL_curcop);
9564     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9565         SvPV_shrink_to_cur(tmpstr);
9566     }
9567     SvREFCNT_dec(herewas);
9568     if (!IN_BYTES) {
9569         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9570             SvUTF8_on(tmpstr);
9571         else if (PL_encoding)
9572             sv_recode_to_utf8(tmpstr, PL_encoding);
9573     }
9574     PL_lex_stuff = tmpstr;
9575     pl_yylval.ival = op_type;
9576     return s;
9577 }
9578
9579 /* scan_inputsymbol
9580    takes: current position in input buffer
9581    returns: new position in input buffer
9582    side-effects: pl_yylval and lex_op are set.
9583
9584    This code handles:
9585
9586    <>           read from ARGV
9587    <FH>         read from filehandle
9588    <pkg::FH>    read from package qualified filehandle
9589    <pkg'FH>     read from package qualified filehandle
9590    <$fh>        read from filehandle in $fh
9591    <*.h>        filename glob
9592
9593 */
9594
9595 STATIC char *
9596 S_scan_inputsymbol(pTHX_ char *start)
9597 {
9598     dVAR;
9599     register char *s = start;           /* current position in buffer */
9600     char *end;
9601     I32 len;
9602     char *d = PL_tokenbuf;                                      /* start of temp holding space */
9603     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
9604
9605     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9606
9607     end = strchr(s, '\n');
9608     if (!end)
9609         end = PL_bufend;
9610     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
9611
9612     /* die if we didn't have space for the contents of the <>,
9613        or if it didn't end, or if we see a newline
9614     */
9615
9616     if (len >= (I32)sizeof PL_tokenbuf)
9617         Perl_croak(aTHX_ "Excessively long <> operator");
9618     if (s >= end)
9619         Perl_croak(aTHX_ "Unterminated <> operator");
9620
9621     s++;
9622
9623     /* check for <$fh>
9624        Remember, only scalar variables are interpreted as filehandles by
9625        this code.  Anything more complex (e.g., <$fh{$num}>) will be
9626        treated as a glob() call.
9627        This code makes use of the fact that except for the $ at the front,
9628        a scalar variable and a filehandle look the same.
9629     */
9630     if (*d == '$' && d[1]) d++;
9631
9632     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9633     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9634         d += UTF ? UTF8SKIP(d) : 1;
9635
9636     /* If we've tried to read what we allow filehandles to look like, and
9637        there's still text left, then it must be a glob() and not a getline.
9638        Use scan_str to pull out the stuff between the <> and treat it
9639        as nothing more than a string.
9640     */
9641
9642     if (d - PL_tokenbuf != len) {
9643         pl_yylval.ival = OP_GLOB;
9644         s = scan_str(start,!!PL_madskills,FALSE);
9645         if (!s)
9646            Perl_croak(aTHX_ "Glob not terminated");
9647         return s;
9648     }
9649     else {
9650         bool readline_overriden = FALSE;
9651         GV *gv_readline;
9652         GV **gvp;
9653         /* we're in a filehandle read situation */
9654         d = PL_tokenbuf;
9655
9656         /* turn <> into <ARGV> */
9657         if (!len)
9658             Copy("ARGV",d,5,char);
9659
9660         /* Check whether readline() is overriden */
9661         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9662         if ((gv_readline
9663                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9664                 ||
9665                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9666                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9667                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9668             readline_overriden = TRUE;
9669
9670         /* if <$fh>, create the ops to turn the variable into a
9671            filehandle
9672         */
9673         if (*d == '$') {
9674             /* try to find it in the pad for this block, otherwise find
9675                add symbol table ops
9676             */
9677             const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9678             if (tmp != NOT_IN_PAD) {
9679                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9680                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9681                     HEK * const stashname = HvNAME_HEK(stash);
9682                     SV * const sym = sv_2mortal(newSVhek(stashname));
9683                     sv_catpvs(sym, "::");
9684                     sv_catpv(sym, d+1);
9685                     d = SvPVX(sym);
9686                     goto intro_sym;
9687                 }
9688                 else {
9689                     OP * const o = newOP(OP_PADSV, 0);
9690                     o->op_targ = tmp;
9691                     PL_lex_op = readline_overriden
9692                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9693                                 op_append_elem(OP_LIST, o,
9694                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9695                         : (OP*)newUNOP(OP_READLINE, 0, o);
9696                 }
9697             }
9698             else {
9699                 GV *gv;
9700                 ++d;
9701 intro_sym:
9702                 gv = gv_fetchpv(d,
9703                                 (PL_in_eval
9704                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
9705                                  : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
9706                                 SVt_PV);
9707                 PL_lex_op = readline_overriden
9708                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9709                             op_append_elem(OP_LIST,
9710                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9711                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9712                     : (OP*)newUNOP(OP_READLINE, 0,
9713                             newUNOP(OP_RV2SV, 0,
9714                                 newGVOP(OP_GV, 0, gv)));
9715             }
9716             if (!readline_overriden)
9717                 PL_lex_op->op_flags |= OPf_SPECIAL;
9718             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9719             pl_yylval.ival = OP_NULL;
9720         }
9721
9722         /* If it's none of the above, it must be a literal filehandle
9723            (<Foo::BAR> or <FOO>) so build a simple readline OP */
9724         else {
9725             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9726             PL_lex_op = readline_overriden
9727                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9728                         op_append_elem(OP_LIST,
9729                             newGVOP(OP_GV, 0, gv),
9730                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9731                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9732             pl_yylval.ival = OP_NULL;
9733         }
9734     }
9735
9736     return s;
9737 }
9738
9739
9740 /* scan_str
9741    takes: start position in buffer
9742           keep_quoted preserve \ on the embedded delimiter(s)
9743           keep_delims preserve the delimiters around the string
9744    returns: position to continue reading from buffer
9745    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9746         updates the read buffer.
9747
9748    This subroutine pulls a string out of the input.  It is called for:
9749         q               single quotes           q(literal text)
9750         '               single quotes           'literal text'
9751         qq              double quotes           qq(interpolate $here please)
9752         "               double quotes           "interpolate $here please"
9753         qx              backticks               qx(/bin/ls -l)
9754         `               backticks               `/bin/ls -l`
9755         qw              quote words             @EXPORT_OK = qw( func() $spam )
9756         m//             regexp match            m/this/
9757         s///            regexp substitute       s/this/that/
9758         tr///           string transliterate    tr/this/that/
9759         y///            string transliterate    y/this/that/
9760         ($*@)           sub prototypes          sub foo ($)
9761         (stuff)         sub attr parameters     sub foo : attr(stuff)
9762         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
9763
9764    In most of these cases (all but <>, patterns and transliterate)
9765    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
9766    calls scan_str().  s/// makes yylex() call scan_subst() which calls
9767    scan_str().  tr/// and y/// make yylex() call scan_trans() which
9768    calls scan_str().
9769
9770    It skips whitespace before the string starts, and treats the first
9771    character as the delimiter.  If the delimiter is one of ([{< then
9772    the corresponding "close" character )]}> is used as the closing
9773    delimiter.  It allows quoting of delimiters, and if the string has
9774    balanced delimiters ([{<>}]) it allows nesting.
9775
9776    On success, the SV with the resulting string is put into lex_stuff or,
9777    if that is already non-NULL, into lex_repl. The second case occurs only
9778    when parsing the RHS of the special constructs s/// and tr/// (y///).
9779    For convenience, the terminating delimiter character is stuffed into
9780    SvIVX of the SV.
9781 */
9782
9783 STATIC char *
9784 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9785 {
9786     dVAR;
9787     SV *sv;                             /* scalar value: string */
9788     const char *tmps;                   /* temp string, used for delimiter matching */
9789     register char *s = start;           /* current position in the buffer */
9790     register char term;                 /* terminating character */
9791     register char *to;                  /* current position in the sv's data */
9792     I32 brackets = 1;                   /* bracket nesting level */
9793     bool has_utf8 = FALSE;              /* is there any utf8 content? */
9794     I32 termcode;                       /* terminating char. code */
9795     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
9796     STRLEN termlen;                     /* length of terminating string */
9797     int last_off = 0;                   /* last position for nesting bracket */
9798 #ifdef PERL_MAD
9799     int stuffstart;
9800     char *tstart;
9801 #endif
9802
9803     PERL_ARGS_ASSERT_SCAN_STR;
9804
9805     /* skip space before the delimiter */
9806     if (isSPACE(*s)) {
9807         s = PEEKSPACE(s);
9808     }
9809
9810 #ifdef PERL_MAD
9811     if (PL_realtokenstart >= 0) {
9812         stuffstart = PL_realtokenstart;
9813         PL_realtokenstart = -1;
9814     }
9815     else
9816         stuffstart = start - SvPVX(PL_linestr);
9817 #endif
9818     /* mark where we are, in case we need to report errors */
9819     CLINE;
9820
9821     /* after skipping whitespace, the next character is the terminator */
9822     term = *s;
9823     if (!UTF) {
9824         termcode = termstr[0] = term;
9825         termlen = 1;
9826     }
9827     else {
9828         termcode = utf8_to_uvchr((U8*)s, &termlen);
9829         Copy(s, termstr, termlen, U8);
9830         if (!UTF8_IS_INVARIANT(term))
9831             has_utf8 = TRUE;
9832     }
9833
9834     /* mark where we are */
9835     PL_multi_start = CopLINE(PL_curcop);
9836     PL_multi_open = term;
9837
9838     /* find corresponding closing delimiter */
9839     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9840         termcode = termstr[0] = term = tmps[5];
9841
9842     PL_multi_close = term;
9843
9844     /* create a new SV to hold the contents.  79 is the SV's initial length.
9845        What a random number. */
9846     sv = newSV_type(SVt_PVIV);
9847     SvGROW(sv, 80);
9848     SvIV_set(sv, termcode);
9849     (void)SvPOK_only(sv);               /* validate pointer */
9850
9851     /* move past delimiter and try to read a complete string */
9852     if (keep_delims)
9853         sv_catpvn(sv, s, termlen);
9854     s += termlen;
9855 #ifdef PERL_MAD
9856     tstart = SvPVX(PL_linestr) + stuffstart;
9857     if (!PL_thisopen && !keep_delims) {
9858         PL_thisopen = newSVpvn(tstart, s - tstart);
9859         stuffstart = s - SvPVX(PL_linestr);
9860     }
9861 #endif
9862     for (;;) {
9863         if (PL_encoding && !UTF) {
9864             bool cont = TRUE;
9865
9866             while (cont) {
9867                 int offset = s - SvPVX_const(PL_linestr);
9868                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9869                                            &offset, (char*)termstr, termlen);
9870                 const char * const ns = SvPVX_const(PL_linestr) + offset;
9871                 char * const svlast = SvEND(sv) - 1;
9872
9873                 for (; s < ns; s++) {
9874                     if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9875                         CopLINE_inc(PL_curcop);
9876                 }
9877                 if (!found)
9878                     goto read_more_line;
9879                 else {
9880                     /* handle quoted delimiters */
9881                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9882                         const char *t;
9883                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9884                             t--;
9885                         if ((svlast-1 - t) % 2) {
9886                             if (!keep_quoted) {
9887                                 *(svlast-1) = term;
9888                                 *svlast = '\0';
9889                                 SvCUR_set(sv, SvCUR(sv) - 1);
9890                             }
9891                             continue;
9892                         }
9893                     }
9894                     if (PL_multi_open == PL_multi_close) {
9895                         cont = FALSE;
9896                     }
9897                     else {
9898                         const char *t;
9899                         char *w;
9900                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9901                             /* At here, all closes are "was quoted" one,
9902                                so we don't check PL_multi_close. */
9903                             if (*t == '\\') {
9904                                 if (!keep_quoted && *(t+1) == PL_multi_open)
9905                                     t++;
9906                                 else
9907                                     *w++ = *t++;
9908                             }
9909                             else if (*t == PL_multi_open)
9910                                 brackets++;
9911
9912                             *w = *t;
9913                         }
9914                         if (w < t) {
9915                             *w++ = term;
9916                             *w = '\0';
9917                             SvCUR_set(sv, w - SvPVX_const(sv));
9918                         }
9919                         last_off = w - SvPVX(sv);
9920                         if (--brackets <= 0)
9921                             cont = FALSE;
9922                     }
9923                 }
9924             }
9925             if (!keep_delims) {
9926                 SvCUR_set(sv, SvCUR(sv) - 1);
9927                 *SvEND(sv) = '\0';
9928             }
9929             break;
9930         }
9931
9932         /* extend sv if need be */
9933         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9934         /* set 'to' to the next character in the sv's string */
9935         to = SvPVX(sv)+SvCUR(sv);
9936
9937         /* if open delimiter is the close delimiter read unbridle */
9938         if (PL_multi_open == PL_multi_close) {
9939             for (; s < PL_bufend; s++,to++) {
9940                 /* embedded newlines increment the current line number */
9941                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9942                     CopLINE_inc(PL_curcop);
9943                 /* handle quoted delimiters */
9944                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9945                     if (!keep_quoted && s[1] == term)
9946                         s++;
9947                 /* any other quotes are simply copied straight through */
9948                     else
9949                         *to++ = *s++;
9950                 }
9951                 /* terminate when run out of buffer (the for() condition), or
9952                    have found the terminator */
9953                 else if (*s == term) {
9954                     if (termlen == 1)
9955                         break;
9956                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9957                         break;
9958                 }
9959                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9960                     has_utf8 = TRUE;
9961                 *to = *s;
9962             }
9963         }
9964
9965         /* if the terminator isn't the same as the start character (e.g.,
9966            matched brackets), we have to allow more in the quoting, and
9967            be prepared for nested brackets.
9968         */
9969         else {
9970             /* read until we run out of string, or we find the terminator */
9971             for (; s < PL_bufend; s++,to++) {
9972                 /* embedded newlines increment the line count */
9973                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9974                     CopLINE_inc(PL_curcop);
9975                 /* backslashes can escape the open or closing characters */
9976                 if (*s == '\\' && s+1 < PL_bufend) {
9977                     if (!keep_quoted &&
9978                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9979                         s++;
9980                     else
9981                         *to++ = *s++;
9982                 }
9983                 /* allow nested opens and closes */
9984                 else if (*s == PL_multi_close && --brackets <= 0)
9985                     break;
9986                 else if (*s == PL_multi_open)
9987                     brackets++;
9988                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9989                     has_utf8 = TRUE;
9990                 *to = *s;
9991             }
9992         }
9993         /* terminate the copied string and update the sv's end-of-string */
9994         *to = '\0';
9995         SvCUR_set(sv, to - SvPVX_const(sv));
9996
9997         /*
9998          * this next chunk reads more into the buffer if we're not done yet
9999          */
10000
10001         if (s < PL_bufend)
10002             break;              /* handle case where we are done yet :-) */
10003
10004 #ifndef PERL_STRICT_CR
10005         if (to - SvPVX_const(sv) >= 2) {
10006             if ((to[-2] == '\r' && to[-1] == '\n') ||
10007                 (to[-2] == '\n' && to[-1] == '\r'))
10008             {
10009                 to[-2] = '\n';
10010                 to--;
10011                 SvCUR_set(sv, to - SvPVX_const(sv));
10012             }
10013             else if (to[-1] == '\r')
10014                 to[-1] = '\n';
10015         }
10016         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10017             to[-1] = '\n';
10018 #endif
10019
10020      read_more_line:
10021         /* if we're out of file, or a read fails, bail and reset the current
10022            line marker so we can report where the unterminated string began
10023         */
10024 #ifdef PERL_MAD
10025         if (PL_madskills) {
10026             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10027             if (PL_thisstuff)
10028                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10029             else
10030                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10031         }
10032 #endif
10033         CopLINE_inc(PL_curcop);
10034         PL_bufptr = PL_bufend;
10035         if (!lex_next_chunk(0)) {
10036             sv_free(sv);
10037             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10038             return NULL;
10039         }
10040         s = PL_bufptr;
10041 #ifdef PERL_MAD
10042         stuffstart = 0;
10043 #endif
10044     }
10045
10046     /* at this point, we have successfully read the delimited string */
10047
10048     if (!PL_encoding || UTF) {
10049 #ifdef PERL_MAD
10050         if (PL_madskills) {
10051             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10052             const int len = s - tstart;
10053             if (PL_thisstuff)
10054                 sv_catpvn(PL_thisstuff, tstart, len);
10055             else
10056                 PL_thisstuff = newSVpvn(tstart, len);
10057             if (!PL_thisclose && !keep_delims)
10058                 PL_thisclose = newSVpvn(s,termlen);
10059         }
10060 #endif
10061
10062         if (keep_delims)
10063             sv_catpvn(sv, s, termlen);
10064         s += termlen;
10065     }
10066 #ifdef PERL_MAD
10067     else {
10068         if (PL_madskills) {
10069             char * const tstart = SvPVX(PL_linestr) + stuffstart;
10070             const int len = s - tstart - termlen;
10071             if (PL_thisstuff)
10072                 sv_catpvn(PL_thisstuff, tstart, len);
10073             else
10074                 PL_thisstuff = newSVpvn(tstart, len);
10075             if (!PL_thisclose && !keep_delims)
10076                 PL_thisclose = newSVpvn(s - termlen,termlen);
10077         }
10078     }
10079 #endif
10080     if (has_utf8 || PL_encoding)
10081         SvUTF8_on(sv);
10082
10083     PL_multi_end = CopLINE(PL_curcop);
10084
10085     /* if we allocated too much space, give some back */
10086     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10087         SvLEN_set(sv, SvCUR(sv) + 1);
10088         SvPV_renew(sv, SvLEN(sv));
10089     }
10090
10091     /* decide whether this is the first or second quoted string we've read
10092        for this op
10093     */
10094
10095     if (PL_lex_stuff)
10096         PL_lex_repl = sv;
10097     else
10098         PL_lex_stuff = sv;
10099     return s;
10100 }
10101
10102 /*
10103   scan_num
10104   takes: pointer to position in buffer
10105   returns: pointer to new position in buffer
10106   side-effects: builds ops for the constant in pl_yylval.op
10107
10108   Read a number in any of the formats that Perl accepts:
10109
10110   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10111   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10112   0b[01](_?[01])*
10113   0[0-7](_?[0-7])*
10114   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10115
10116   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10117   thing it reads.
10118
10119   If it reads a number without a decimal point or an exponent, it will
10120   try converting the number to an integer and see if it can do so
10121   without loss of precision.
10122 */
10123
10124 char *
10125 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10126 {
10127     dVAR;
10128     register const char *s = start;     /* current position in buffer */
10129     register char *d;                   /* destination in temp buffer */
10130     register char *e;                   /* end of temp buffer */
10131     NV nv;                              /* number read, as a double */
10132     SV *sv = NULL;                      /* place to put the converted number */
10133     bool floatit;                       /* boolean: int or float? */
10134     const char *lastub = NULL;          /* position of last underbar */
10135     static char const number_too_long[] = "Number too long";
10136
10137     PERL_ARGS_ASSERT_SCAN_NUM;
10138
10139     /* We use the first character to decide what type of number this is */
10140
10141     switch (*s) {
10142     default:
10143       Perl_croak(aTHX_ "panic: scan_num");
10144
10145     /* if it starts with a 0, it could be an octal number, a decimal in
10146        0.13 disguise, or a hexadecimal number, or a binary number. */
10147     case '0':
10148         {
10149           /* variables:
10150              u          holds the "number so far"
10151              shift      the power of 2 of the base
10152                         (hex == 4, octal == 3, binary == 1)
10153              overflowed was the number more than we can hold?
10154
10155              Shift is used when we add a digit.  It also serves as an "are
10156              we in octal/hex/binary?" indicator to disallow hex characters
10157              when in octal mode.
10158            */
10159             NV n = 0.0;
10160             UV u = 0;
10161             I32 shift;
10162             bool overflowed = FALSE;
10163             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10164             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10165             static const char* const bases[5] =
10166               { "", "binary", "", "octal", "hexadecimal" };
10167             static const char* const Bases[5] =
10168               { "", "Binary", "", "Octal", "Hexadecimal" };
10169             static const char* const maxima[5] =
10170               { "",
10171                 "0b11111111111111111111111111111111",
10172                 "",
10173                 "037777777777",
10174                 "0xffffffff" };
10175             const char *base, *Base, *max;
10176
10177             /* check for hex */
10178             if (s[1] == 'x' || s[1] == 'X') {
10179                 shift = 4;
10180                 s += 2;
10181                 just_zero = FALSE;
10182             } else if (s[1] == 'b' || s[1] == 'B') {
10183                 shift = 1;
10184                 s += 2;
10185                 just_zero = FALSE;
10186             }
10187             /* check for a decimal in disguise */
10188             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10189                 goto decimal;
10190             /* so it must be octal */
10191             else {
10192                 shift = 3;
10193                 s++;
10194             }
10195
10196             if (*s == '_') {
10197                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10198                                "Misplaced _ in number");
10199                lastub = s++;
10200             }
10201
10202             base = bases[shift];
10203             Base = Bases[shift];
10204             max  = maxima[shift];
10205
10206             /* read the rest of the number */
10207             for (;;) {
10208                 /* x is used in the overflow test,
10209                    b is the digit we're adding on. */
10210                 UV x, b;
10211
10212                 switch (*s) {
10213
10214                 /* if we don't mention it, we're done */
10215                 default:
10216                     goto out;
10217
10218                 /* _ are ignored -- but warned about if consecutive */
10219                 case '_':
10220                     if (lastub && s == lastub + 1)
10221                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10222                                        "Misplaced _ in number");
10223                     lastub = s++;
10224                     break;
10225
10226                 /* 8 and 9 are not octal */
10227                 case '8': case '9':
10228                     if (shift == 3)
10229                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10230                     /* FALL THROUGH */
10231
10232                 /* octal digits */
10233                 case '2': case '3': case '4':
10234                 case '5': case '6': case '7':
10235                     if (shift == 1)
10236                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10237                     /* FALL THROUGH */
10238
10239                 case '0': case '1':
10240                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10241                     goto digit;
10242
10243                 /* hex digits */
10244                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10245                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10246                     /* make sure they said 0x */
10247                     if (shift != 4)
10248                         goto out;
10249                     b = (*s++ & 7) + 9;
10250
10251                     /* Prepare to put the digit we have onto the end
10252                        of the number so far.  We check for overflows.
10253                     */
10254
10255                   digit:
10256                     just_zero = FALSE;
10257                     if (!overflowed) {
10258                         x = u << shift; /* make room for the digit */
10259
10260                         if ((x >> shift) != u
10261                             && !(PL_hints & HINT_NEW_BINARY)) {
10262                             overflowed = TRUE;
10263                             n = (NV) u;
10264                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10265                                              "Integer overflow in %s number",
10266                                              base);
10267                         } else
10268                             u = x | b;          /* add the digit to the end */
10269                     }
10270                     if (overflowed) {
10271                         n *= nvshift[shift];
10272                         /* If an NV has not enough bits in its
10273                          * mantissa to represent an UV this summing of
10274                          * small low-order numbers is a waste of time
10275                          * (because the NV cannot preserve the
10276                          * low-order bits anyway): we could just
10277                          * remember when did we overflow and in the
10278                          * end just multiply n by the right
10279                          * amount. */
10280                         n += (NV) b;
10281                     }
10282                     break;
10283                 }
10284             }
10285
10286           /* if we get here, we had success: make a scalar value from
10287              the number.
10288           */
10289           out:
10290
10291             /* final misplaced underbar check */
10292             if (s[-1] == '_') {
10293                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10294             }
10295
10296             if (overflowed) {
10297                 if (n > 4294967295.0)
10298                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10299                                    "%s number > %s non-portable",
10300                                    Base, max);
10301                 sv = newSVnv(n);
10302             }
10303             else {
10304 #if UVSIZE > 4
10305                 if (u > 0xffffffff)
10306                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10307                                    "%s number > %s non-portable",
10308                                    Base, max);
10309 #endif
10310                 sv = newSVuv(u);
10311             }
10312             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10313                 sv = new_constant(start, s - start, "integer",
10314                                   sv, NULL, NULL, 0);
10315             else if (PL_hints & HINT_NEW_BINARY)
10316                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10317         }
10318         break;
10319
10320     /*
10321       handle decimal numbers.
10322       we're also sent here when we read a 0 as the first digit
10323     */
10324     case '1': case '2': case '3': case '4': case '5':
10325     case '6': case '7': case '8': case '9': case '.':
10326       decimal:
10327         d = PL_tokenbuf;
10328         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10329         floatit = FALSE;
10330
10331         /* read next group of digits and _ and copy into d */
10332         while (isDIGIT(*s) || *s == '_') {
10333             /* skip underscores, checking for misplaced ones
10334                if -w is on
10335             */
10336             if (*s == '_') {
10337                 if (lastub && s == lastub + 1)
10338                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10339                                    "Misplaced _ in number");
10340                 lastub = s++;
10341             }
10342             else {
10343                 /* check for end of fixed-length buffer */
10344                 if (d >= e)
10345                     Perl_croak(aTHX_ number_too_long);
10346                 /* if we're ok, copy the character */
10347                 *d++ = *s++;
10348             }
10349         }
10350
10351         /* final misplaced underbar check */
10352         if (lastub && s == lastub + 1) {
10353             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10354         }
10355
10356         /* read a decimal portion if there is one.  avoid
10357            3..5 being interpreted as the number 3. followed
10358            by .5
10359         */
10360         if (*s == '.' && s[1] != '.') {
10361             floatit = TRUE;
10362             *d++ = *s++;
10363
10364             if (*s == '_') {
10365                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10366                                "Misplaced _ in number");
10367                 lastub = s;
10368             }
10369
10370             /* copy, ignoring underbars, until we run out of digits.
10371             */
10372             for (; isDIGIT(*s) || *s == '_'; s++) {
10373                 /* fixed length buffer check */
10374                 if (d >= e)
10375                     Perl_croak(aTHX_ number_too_long);
10376                 if (*s == '_') {
10377                    if (lastub && s == lastub + 1)
10378                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10379                                       "Misplaced _ in number");
10380                    lastub = s;
10381                 }
10382                 else
10383                     *d++ = *s;
10384             }
10385             /* fractional part ending in underbar? */
10386             if (s[-1] == '_') {
10387                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10388                                "Misplaced _ in number");
10389             }
10390             if (*s == '.' && isDIGIT(s[1])) {
10391                 /* oops, it's really a v-string, but without the "v" */
10392                 s = start;
10393                 goto vstring;
10394             }
10395         }
10396
10397         /* read exponent part, if present */
10398         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10399             floatit = TRUE;
10400             s++;
10401
10402             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10403             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
10404
10405             /* stray preinitial _ */
10406             if (*s == '_') {
10407                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10408                                "Misplaced _ in number");
10409                 lastub = s++;
10410             }
10411
10412             /* allow positive or negative exponent */
10413             if (*s == '+' || *s == '-')
10414                 *d++ = *s++;
10415
10416             /* stray initial _ */
10417             if (*s == '_') {
10418                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10419                                "Misplaced _ in number");
10420                 lastub = s++;
10421             }
10422
10423             /* read digits of exponent */
10424             while (isDIGIT(*s) || *s == '_') {
10425                 if (isDIGIT(*s)) {
10426                     if (d >= e)
10427                         Perl_croak(aTHX_ number_too_long);
10428                     *d++ = *s++;
10429                 }
10430                 else {
10431                    if (((lastub && s == lastub + 1) ||
10432                         (!isDIGIT(s[1]) && s[1] != '_')))
10433                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10434                                       "Misplaced _ in number");
10435                    lastub = s++;
10436                 }
10437             }
10438         }
10439
10440
10441         /*
10442            We try to do an integer conversion first if no characters
10443            indicating "float" have been found.
10444          */
10445
10446         if (!floatit) {
10447             UV uv;
10448             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10449
10450             if (flags == IS_NUMBER_IN_UV) {
10451               if (uv <= IV_MAX)
10452                 sv = newSViv(uv); /* Prefer IVs over UVs. */
10453               else
10454                 sv = newSVuv(uv);
10455             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10456               if (uv <= (UV) IV_MIN)
10457                 sv = newSViv(-(IV)uv);
10458               else
10459                 floatit = TRUE;
10460             } else
10461               floatit = TRUE;
10462         }
10463         if (floatit) {
10464             /* terminate the string */
10465             *d = '\0';
10466             nv = Atof(PL_tokenbuf);
10467             sv = newSVnv(nv);
10468         }
10469
10470         if ( floatit
10471              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10472             const char *const key = floatit ? "float" : "integer";
10473             const STRLEN keylen = floatit ? 5 : 7;
10474             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10475                                 key, keylen, sv, NULL, NULL, 0);
10476         }
10477         break;
10478
10479     /* if it starts with a v, it could be a v-string */
10480     case 'v':
10481 vstring:
10482                 sv = newSV(5); /* preallocate storage space */
10483                 s = scan_vstring(s, PL_bufend, sv);
10484         break;
10485     }
10486
10487     /* make the op for the constant and return */
10488
10489     if (sv)
10490         lvalp->opval = newSVOP(OP_CONST, 0, sv);
10491     else
10492         lvalp->opval = NULL;
10493
10494     return (char *)s;
10495 }
10496
10497 STATIC char *
10498 S_scan_formline(pTHX_ register char *s)
10499 {
10500     dVAR;
10501     register char *eol;
10502     register char *t;
10503     SV * const stuff = newSVpvs("");
10504     bool needargs = FALSE;
10505     bool eofmt = FALSE;
10506 #ifdef PERL_MAD
10507     char *tokenstart = s;
10508     SV* savewhite = NULL;
10509
10510     if (PL_madskills) {
10511         savewhite = PL_thiswhite;
10512         PL_thiswhite = 0;
10513     }
10514 #endif
10515
10516     PERL_ARGS_ASSERT_SCAN_FORMLINE;
10517
10518     while (!needargs) {
10519         if (*s == '.') {
10520             t = s+1;
10521 #ifdef PERL_STRICT_CR
10522             while (SPACE_OR_TAB(*t))
10523                 t++;
10524 #else
10525             while (SPACE_OR_TAB(*t) || *t == '\r')
10526                 t++;
10527 #endif
10528             if (*t == '\n' || t == PL_bufend) {
10529                 eofmt = TRUE;
10530                 break;
10531             }
10532         }
10533         if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
10534             eol = (char *) memchr(s,'\n',PL_bufend-s);
10535             if (!eol++)
10536                 eol = PL_bufend;
10537         }
10538         else
10539             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10540         if (*s != '#') {
10541             for (t = s; t < eol; t++) {
10542                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10543                     needargs = FALSE;
10544                     goto enough;        /* ~~ must be first line in formline */
10545                 }
10546                 if (*t == '@' || *t == '^')
10547                     needargs = TRUE;
10548             }
10549             if (eol > s) {
10550                 sv_catpvn(stuff, s, eol-s);
10551 #ifndef PERL_STRICT_CR
10552                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10553                     char *end = SvPVX(stuff) + SvCUR(stuff);
10554                     end[-2] = '\n';
10555                     end[-1] = '\0';
10556                     SvCUR_set(stuff, SvCUR(stuff) - 1);
10557                 }
10558 #endif
10559             }
10560             else
10561               break;
10562         }
10563         s = (char*)eol;
10564         if (PL_rsfp || PL_parser->filtered) {
10565             bool got_some;
10566 #ifdef PERL_MAD
10567             if (PL_madskills) {
10568                 if (PL_thistoken)
10569                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10570                 else
10571                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10572             }
10573 #endif
10574             PL_bufptr = PL_bufend;
10575             CopLINE_inc(PL_curcop);
10576             got_some = lex_next_chunk(0);
10577             CopLINE_dec(PL_curcop);
10578             s = PL_bufptr;
10579 #ifdef PERL_MAD
10580             tokenstart = PL_bufptr;
10581 #endif
10582             if (!got_some)
10583                 break;
10584         }
10585         incline(s);
10586     }
10587   enough:
10588     if (SvCUR(stuff)) {
10589         PL_expect = XTERM;
10590         if (needargs) {
10591             PL_lex_state = LEX_NORMAL;
10592             start_force(PL_curforce);
10593             NEXTVAL_NEXTTOKE.ival = 0;
10594             force_next(',');
10595         }
10596         else
10597             PL_lex_state = LEX_FORMLINE;
10598         if (!IN_BYTES) {
10599             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10600                 SvUTF8_on(stuff);
10601             else if (PL_encoding)
10602                 sv_recode_to_utf8(stuff, PL_encoding);
10603         }
10604         start_force(PL_curforce);
10605         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10606         force_next(THING);
10607         start_force(PL_curforce);
10608         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10609         force_next(LSTOP);
10610     }
10611     else {
10612         SvREFCNT_dec(stuff);
10613         if (eofmt)
10614             PL_lex_formbrack = 0;
10615         PL_bufptr = s;
10616     }
10617 #ifdef PERL_MAD
10618     if (PL_madskills) {
10619         if (PL_thistoken)
10620             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10621         else
10622             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10623         PL_thiswhite = savewhite;
10624     }
10625 #endif
10626     return s;
10627 }
10628
10629 I32
10630 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10631 {
10632     dVAR;
10633     const I32 oldsavestack_ix = PL_savestack_ix;
10634     CV* const outsidecv = PL_compcv;
10635
10636     if (PL_compcv) {
10637         assert(SvTYPE(PL_compcv) == SVt_PVCV);
10638     }
10639     SAVEI32(PL_subline);
10640     save_item(PL_subname);
10641     SAVESPTR(PL_compcv);
10642
10643     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10644     CvFLAGS(PL_compcv) |= flags;
10645
10646     PL_subline = CopLINE(PL_curcop);
10647     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10648     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10649     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10650
10651     return oldsavestack_ix;
10652 }
10653
10654 #ifdef __SC__
10655 #pragma segment Perl_yylex
10656 #endif
10657 static int
10658 S_yywarn(pTHX_ const char *const s)
10659 {
10660     dVAR;
10661
10662     PERL_ARGS_ASSERT_YYWARN;
10663
10664     PL_in_eval |= EVAL_WARNONLY;
10665     yyerror(s);
10666     PL_in_eval &= ~EVAL_WARNONLY;
10667     return 0;
10668 }
10669
10670 int
10671 Perl_yyerror(pTHX_ const char *const s)
10672 {
10673     dVAR;
10674     const char *where = NULL;
10675     const char *context = NULL;
10676     int contlen = -1;
10677     SV *msg;
10678     int yychar  = PL_parser->yychar;
10679
10680     PERL_ARGS_ASSERT_YYERROR;
10681
10682     if (!yychar || (yychar == ';' && !PL_rsfp))
10683         where = "at EOF";
10684     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10685       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10686       PL_oldbufptr != PL_bufptr) {
10687         /*
10688                 Only for NetWare:
10689                 The code below is removed for NetWare because it abends/crashes on NetWare
10690                 when the script has error such as not having the closing quotes like:
10691                     if ($var eq "value)
10692                 Checking of white spaces is anyway done in NetWare code.
10693         */
10694 #ifndef NETWARE
10695         while (isSPACE(*PL_oldoldbufptr))
10696             PL_oldoldbufptr++;
10697 #endif
10698         context = PL_oldoldbufptr;
10699         contlen = PL_bufptr - PL_oldoldbufptr;
10700     }
10701     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10702       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10703         /*
10704                 Only for NetWare:
10705                 The code below is removed for NetWare because it abends/crashes on NetWare
10706                 when the script has error such as not having the closing quotes like:
10707                     if ($var eq "value)
10708                 Checking of white spaces is anyway done in NetWare code.
10709         */
10710 #ifndef NETWARE
10711         while (isSPACE(*PL_oldbufptr))
10712             PL_oldbufptr++;
10713 #endif
10714         context = PL_oldbufptr;
10715         contlen = PL_bufptr - PL_oldbufptr;
10716     }
10717     else if (yychar > 255)
10718         where = "next token ???";
10719     else if (yychar == -2) { /* YYEMPTY */
10720         if (PL_lex_state == LEX_NORMAL ||
10721            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10722             where = "at end of line";
10723         else if (PL_lex_inpat)
10724             where = "within pattern";
10725         else
10726             where = "within string";
10727     }
10728     else {
10729         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
10730         if (yychar < 32)
10731             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10732         else if (isPRINT_LC(yychar)) {
10733             const char string = yychar;
10734             sv_catpvn(where_sv, &string, 1);
10735         }
10736         else
10737             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10738         where = SvPVX_const(where_sv);
10739     }
10740     msg = sv_2mortal(newSVpv(s, 0));
10741     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10742         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10743     if (context)
10744         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10745     else
10746         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10747     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10748         Perl_sv_catpvf(aTHX_ msg,
10749         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10750                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10751         PL_multi_end = 0;
10752     }
10753     if (PL_in_eval & EVAL_WARNONLY) {
10754         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10755     }
10756     else
10757         qerror(msg);
10758     if (PL_error_count >= 10) {
10759         if (PL_in_eval && SvCUR(ERRSV))
10760             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10761                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
10762         else
10763             Perl_croak(aTHX_ "%s has too many errors.\n",
10764             OutCopFILE(PL_curcop));
10765     }
10766     PL_in_my = 0;
10767     PL_in_my_stash = NULL;
10768     return 0;
10769 }
10770 #ifdef __SC__
10771 #pragma segment Main
10772 #endif
10773
10774 STATIC char*
10775 S_swallow_bom(pTHX_ U8 *s)
10776 {
10777     dVAR;
10778     const STRLEN slen = SvCUR(PL_linestr);
10779
10780     PERL_ARGS_ASSERT_SWALLOW_BOM;
10781
10782     switch (s[0]) {
10783     case 0xFF:
10784         if (s[1] == 0xFE) {
10785             /* UTF-16 little-endian? (or UTF-32LE?) */
10786             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
10787                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10788 #ifndef PERL_NO_UTF16_FILTER
10789             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10790             s += 2;
10791             if (PL_bufend > (char*)s) {
10792                 s = add_utf16_textfilter(s, TRUE);
10793             }
10794 #else
10795             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10796 #endif
10797         }
10798         break;
10799     case 0xFE:
10800         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
10801 #ifndef PERL_NO_UTF16_FILTER
10802             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10803             s += 2;
10804             if (PL_bufend > (char *)s) {
10805                 s = add_utf16_textfilter(s, FALSE);
10806             }
10807 #else
10808             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10809 #endif
10810         }
10811         break;
10812     case 0xEF:
10813         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10814             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10815             s += 3;                      /* UTF-8 */
10816         }
10817         break;
10818     case 0:
10819         if (slen > 3) {
10820              if (s[1] == 0) {
10821                   if (s[2] == 0xFE && s[3] == 0xFF) {
10822                        /* UTF-32 big-endian */
10823                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10824                   }
10825              }
10826              else if (s[2] == 0 && s[3] != 0) {
10827                   /* Leading bytes
10828                    * 00 xx 00 xx
10829                    * are a good indicator of UTF-16BE. */
10830 #ifndef PERL_NO_UTF16_FILTER
10831                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10832                   s = add_utf16_textfilter(s, FALSE);
10833 #else
10834                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10835 #endif
10836              }
10837         }
10838 #ifdef EBCDIC
10839     case 0xDD:
10840         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10841             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10842             s += 4;                      /* UTF-8 */
10843         }
10844         break;
10845 #endif
10846
10847     default:
10848          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10849                   /* Leading bytes
10850                    * xx 00 xx 00
10851                    * are a good indicator of UTF-16LE. */
10852 #ifndef PERL_NO_UTF16_FILTER
10853               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10854               s = add_utf16_textfilter(s, TRUE);
10855 #else
10856               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10857 #endif
10858          }
10859     }
10860     return (char*)s;
10861 }
10862
10863
10864 #ifndef PERL_NO_UTF16_FILTER
10865 static I32
10866 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10867 {
10868     dVAR;
10869     SV *const filter = FILTER_DATA(idx);
10870     /* We re-use this each time round, throwing the contents away before we
10871        return.  */
10872     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10873     SV *const utf8_buffer = filter;
10874     IV status = IoPAGE(filter);
10875     const bool reverse = cBOOL(IoLINES(filter));
10876     I32 retval;
10877
10878     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10879
10880     /* As we're automatically added, at the lowest level, and hence only called
10881        from this file, we can be sure that we're not called in block mode. Hence
10882        don't bother writing code to deal with block mode.  */
10883     if (maxlen) {
10884         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10885     }
10886     if (status < 0) {
10887         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10888     }
10889     DEBUG_P(PerlIO_printf(Perl_debug_log,
10890                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10891                           FPTR2DPTR(void *, S_utf16_textfilter),
10892                           reverse ? 'l' : 'b', idx, maxlen, status,
10893                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10894
10895     while (1) {
10896         STRLEN chars;
10897         STRLEN have;
10898         I32 newlen;
10899         U8 *end;
10900         /* First, look in our buffer of existing UTF-8 data:  */
10901         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10902
10903         if (nl) {
10904             ++nl;
10905         } else if (status == 0) {
10906             /* EOF */
10907             IoPAGE(filter) = 0;
10908             nl = SvEND(utf8_buffer);
10909         }
10910         if (nl) {
10911             STRLEN got = nl - SvPVX(utf8_buffer);
10912             /* Did we have anything to append?  */
10913             retval = got != 0;
10914             sv_catpvn(sv, SvPVX(utf8_buffer), got);
10915             /* Everything else in this code works just fine if SVp_POK isn't
10916                set.  This, however, needs it, and we need it to work, else
10917                we loop infinitely because the buffer is never consumed.  */
10918             sv_chop(utf8_buffer, nl);
10919             break;
10920         }
10921
10922         /* OK, not a complete line there, so need to read some more UTF-16.
10923            Read an extra octect if the buffer currently has an odd number. */
10924         while (1) {
10925             if (status <= 0)
10926                 break;
10927             if (SvCUR(utf16_buffer) >= 2) {
10928                 /* Location of the high octet of the last complete code point.
10929                    Gosh, UTF-16 is a pain. All the benefits of variable length,
10930                    *coupled* with all the benefits of partial reads and
10931                    endianness.  */
10932                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10933                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10934
10935                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10936                     break;
10937                 }
10938
10939                 /* We have the first half of a surrogate. Read more.  */
10940                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10941             }
10942
10943             status = FILTER_READ(idx + 1, utf16_buffer,
10944                                  160 + (SvCUR(utf16_buffer) & 1));
10945             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10946             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10947             if (status < 0) {
10948                 /* Error */
10949                 IoPAGE(filter) = status;
10950                 return status;
10951             }
10952         }
10953
10954         chars = SvCUR(utf16_buffer) >> 1;
10955         have = SvCUR(utf8_buffer);
10956         SvGROW(utf8_buffer, have + chars * 3 + 1);
10957
10958         if (reverse) {
10959             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10960                                          (U8*)SvPVX_const(utf8_buffer) + have,
10961                                          chars * 2, &newlen);
10962         } else {
10963             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10964                                 (U8*)SvPVX_const(utf8_buffer) + have,
10965                                 chars * 2, &newlen);
10966         }
10967         SvCUR_set(utf8_buffer, have + newlen);
10968         *end = '\0';
10969
10970         /* No need to keep this SV "well-formed" with a '\0' after the end, as
10971            it's private to us, and utf16_to_utf8{,reversed} take a
10972            (pointer,length) pair, rather than a NUL-terminated string.  */
10973         if(SvCUR(utf16_buffer) & 1) {
10974             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10975             SvCUR_set(utf16_buffer, 1);
10976         } else {
10977             SvCUR_set(utf16_buffer, 0);
10978         }
10979     }
10980     DEBUG_P(PerlIO_printf(Perl_debug_log,
10981                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10982                           status,
10983                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10984     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10985     return retval;
10986 }
10987
10988 static U8 *
10989 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10990 {
10991     SV *filter = filter_add(S_utf16_textfilter, NULL);
10992
10993     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10994
10995     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10996     sv_setpvs(filter, "");
10997     IoLINES(filter) = reversed;
10998     IoPAGE(filter) = 1; /* Not EOF */
10999
11000     /* Sadly, we have to return a valid pointer, come what may, so we have to
11001        ignore any error return from this.  */
11002     SvCUR_set(PL_linestr, 0);
11003     if (FILTER_READ(0, PL_linestr, 0)) {
11004         SvUTF8_on(PL_linestr);
11005     } else {
11006         SvUTF8_on(PL_linestr);
11007     }
11008     PL_bufend = SvEND(PL_linestr);
11009     return (U8*)SvPVX(PL_linestr);
11010 }
11011 #endif
11012
11013 /*
11014 Returns a pointer to the next character after the parsed
11015 vstring, as well as updating the passed in sv.
11016
11017 Function must be called like
11018
11019         sv = newSV(5);
11020         s = scan_vstring(s,e,sv);
11021
11022 where s and e are the start and end of the string.
11023 The sv should already be large enough to store the vstring
11024 passed in, for performance reasons.
11025
11026 */
11027
11028 char *
11029 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11030 {
11031     dVAR;
11032     const char *pos = s;
11033     const char *start = s;
11034
11035     PERL_ARGS_ASSERT_SCAN_VSTRING;
11036
11037     if (*pos == 'v') pos++;  /* get past 'v' */
11038     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11039         pos++;
11040     if ( *pos != '.') {
11041         /* this may not be a v-string if followed by => */
11042         const char *next = pos;
11043         while (next < e && isSPACE(*next))
11044             ++next;
11045         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11046             /* return string not v-string */
11047             sv_setpvn(sv,(char *)s,pos-s);
11048             return (char *)pos;
11049         }
11050     }
11051
11052     if (!isALPHA(*pos)) {
11053         U8 tmpbuf[UTF8_MAXBYTES+1];
11054
11055         if (*s == 'v')
11056             s++;  /* get past 'v' */
11057
11058         sv_setpvs(sv, "");
11059
11060         for (;;) {
11061             /* this is atoi() that tolerates underscores */
11062             U8 *tmpend;
11063             UV rev = 0;
11064             const char *end = pos;
11065             UV mult = 1;
11066             while (--end >= s) {
11067                 if (*end != '_') {
11068                     const UV orev = rev;
11069                     rev += (*end - '0') * mult;
11070                     mult *= 10;
11071                     if (orev > rev)
11072                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11073                                          "Integer overflow in decimal number");
11074                 }
11075             }
11076 #ifdef EBCDIC
11077             if (rev > 0x7FFFFFFF)
11078                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11079 #endif
11080             /* Append native character for the rev point */
11081             tmpend = uvchr_to_utf8(tmpbuf, rev);
11082             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11083             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11084                  SvUTF8_on(sv);
11085             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11086                  s = ++pos;
11087             else {
11088                  s = pos;
11089                  break;
11090             }
11091             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11092                  pos++;
11093         }
11094         SvPOK_on(sv);
11095         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11096         SvRMAGICAL_on(sv);
11097     }
11098     return (char *)s;
11099 }
11100
11101 int
11102 Perl_keyword_plugin_standard(pTHX_
11103         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11104 {
11105     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11106     PERL_UNUSED_CONTEXT;
11107     PERL_UNUSED_ARG(keyword_ptr);
11108     PERL_UNUSED_ARG(keyword_len);
11109     PERL_UNUSED_ARG(op_ptr);
11110     return KEYWORD_PLUGIN_DECLINE;
11111 }
11112
11113 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11114 static void
11115 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11116 {
11117     SAVEI32(PL_lex_brackets);
11118     if (PL_lex_brackets > 100)
11119         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11120     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11121     SAVEI32(PL_lex_allbrackets);
11122     PL_lex_allbrackets = 0;
11123     SAVEI8(PL_lex_fakeeof);
11124     PL_lex_fakeeof = (U8)fakeeof;
11125     if(yyparse(gramtype) && !PL_parser->error_count)
11126         qerror(Perl_mess(aTHX_ "Parse error"));
11127 }
11128
11129 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11130 static OP *
11131 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11132 {
11133     OP *o;
11134     ENTER;
11135     SAVEVPTR(PL_eval_root);
11136     PL_eval_root = NULL;
11137     parse_recdescent(gramtype, fakeeof);
11138     o = PL_eval_root;
11139     LEAVE;
11140     return o;
11141 }
11142
11143 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11144 static OP *
11145 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11146 {
11147     OP *exprop;
11148     if (flags & ~PARSE_OPTIONAL)
11149         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11150     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11151     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11152         if (!PL_parser->error_count)
11153             qerror(Perl_mess(aTHX_ "Parse error"));
11154         exprop = newOP(OP_NULL, 0);
11155     }
11156     return exprop;
11157 }
11158
11159 /*
11160 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11161
11162 Parse a Perl arithmetic expression.  This may contain operators of precedence
11163 down to the bit shift operators.  The expression must be followed (and thus
11164 terminated) either by a comparison or lower-precedence operator or by
11165 something that would normally terminate an expression such as semicolon.
11166 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11167 otherwise it is mandatory.  It is up to the caller to ensure that the
11168 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11169 the source of the code to be parsed and the lexical context for the
11170 expression.
11171
11172 The op tree representing the expression is returned.  If an optional
11173 expression is absent, a null pointer is returned, otherwise the pointer
11174 will be non-null.
11175
11176 If an error occurs in parsing or compilation, in most cases a valid op
11177 tree is returned anyway.  The error is reflected in the parser state,
11178 normally resulting in a single exception at the top level of parsing
11179 which covers all the compilation errors that occurred.  Some compilation
11180 errors, however, will throw an exception immediately.
11181
11182 =cut
11183 */
11184
11185 OP *
11186 Perl_parse_arithexpr(pTHX_ U32 flags)
11187 {
11188     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11189 }
11190
11191 /*
11192 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11193
11194 Parse a Perl term expression.  This may contain operators of precedence
11195 down to the assignment operators.  The expression must be followed (and thus
11196 terminated) either by a comma or lower-precedence operator or by
11197 something that would normally terminate an expression such as semicolon.
11198 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11199 otherwise it is mandatory.  It is up to the caller to ensure that the
11200 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11201 the source of the code to be parsed and the lexical context for the
11202 expression.
11203
11204 The op tree representing the expression is returned.  If an optional
11205 expression is absent, a null pointer is returned, otherwise the pointer
11206 will be non-null.
11207
11208 If an error occurs in parsing or compilation, in most cases a valid op
11209 tree is returned anyway.  The error is reflected in the parser state,
11210 normally resulting in a single exception at the top level of parsing
11211 which covers all the compilation errors that occurred.  Some compilation
11212 errors, however, will throw an exception immediately.
11213
11214 =cut
11215 */
11216
11217 OP *
11218 Perl_parse_termexpr(pTHX_ U32 flags)
11219 {
11220     return parse_expr(LEX_FAKEEOF_COMMA, flags);
11221 }
11222
11223 /*
11224 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11225
11226 Parse a Perl list expression.  This may contain operators of precedence
11227 down to the comma operator.  The expression must be followed (and thus
11228 terminated) either by a low-precedence logic operator such as C<or> or by
11229 something that would normally terminate an expression such as semicolon.
11230 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11231 otherwise it is mandatory.  It is up to the caller to ensure that the
11232 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11233 the source of the code to be parsed and the lexical context for the
11234 expression.
11235
11236 The op tree representing the expression is returned.  If an optional
11237 expression is absent, a null pointer is returned, otherwise the pointer
11238 will be non-null.
11239
11240 If an error occurs in parsing or compilation, in most cases a valid op
11241 tree is returned anyway.  The error is reflected in the parser state,
11242 normally resulting in a single exception at the top level of parsing
11243 which covers all the compilation errors that occurred.  Some compilation
11244 errors, however, will throw an exception immediately.
11245
11246 =cut
11247 */
11248
11249 OP *
11250 Perl_parse_listexpr(pTHX_ U32 flags)
11251 {
11252     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11253 }
11254
11255 /*
11256 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11257
11258 Parse a single complete Perl expression.  This allows the full
11259 expression grammar, including the lowest-precedence operators such
11260 as C<or>.  The expression must be followed (and thus terminated) by a
11261 token that an expression would normally be terminated by: end-of-file,
11262 closing bracketing punctuation, semicolon, or one of the keywords that
11263 signals a postfix expression-statement modifier.  If I<flags> includes
11264 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11265 mandatory.  It is up to the caller to ensure that the dynamic parser
11266 state (L</PL_parser> et al) is correctly set to reflect the source of
11267 the code to be parsed and the lexical context for the expression.
11268
11269 The op tree representing the expression is returned.  If an optional
11270 expression is absent, a null pointer is returned, otherwise the pointer
11271 will be non-null.
11272
11273 If an error occurs in parsing or compilation, in most cases a valid op
11274 tree is returned anyway.  The error is reflected in the parser state,
11275 normally resulting in a single exception at the top level of parsing
11276 which covers all the compilation errors that occurred.  Some compilation
11277 errors, however, will throw an exception immediately.
11278
11279 =cut
11280 */
11281
11282 OP *
11283 Perl_parse_fullexpr(pTHX_ U32 flags)
11284 {
11285     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11286 }
11287
11288 /*
11289 =for apidoc Amx|OP *|parse_block|U32 flags
11290
11291 Parse a single complete Perl code block.  This consists of an opening
11292 brace, a sequence of statements, and a closing brace.  The block
11293 constitutes a lexical scope, so C<my> variables and various compile-time
11294 effects can be contained within it.  It is up to the caller to ensure
11295 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11296 reflect the source of the code to be parsed and the lexical context for
11297 the statement.
11298
11299 The op tree representing the code block is returned.  This is always a
11300 real op, never a null pointer.  It will normally be a C<lineseq> list,
11301 including C<nextstate> or equivalent ops.  No ops to construct any kind
11302 of runtime scope are included by virtue of it being a block.
11303
11304 If an error occurs in parsing or compilation, in most cases a valid op
11305 tree (most likely null) is returned anyway.  The error is reflected in
11306 the parser state, normally resulting in a single exception at the top
11307 level of parsing which covers all the compilation errors that occurred.
11308 Some compilation errors, however, will throw an exception immediately.
11309
11310 The I<flags> parameter is reserved for future use, and must always
11311 be zero.
11312
11313 =cut
11314 */
11315
11316 OP *
11317 Perl_parse_block(pTHX_ U32 flags)
11318 {
11319     if (flags)
11320         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11321     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11322 }
11323
11324 /*
11325 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11326
11327 Parse a single unadorned Perl statement.  This may be a normal imperative
11328 statement or a declaration that has compile-time effect.  It does not
11329 include any label or other affixture.  It is up to the caller to ensure
11330 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11331 reflect the source of the code to be parsed and the lexical context for
11332 the statement.
11333
11334 The op tree representing the statement is returned.  This may be a
11335 null pointer if the statement is null, for example if it was actually
11336 a subroutine definition (which has compile-time side effects).  If not
11337 null, it will be ops directly implementing the statement, suitable to
11338 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
11339 equivalent op (except for those embedded in a scope contained entirely
11340 within the statement).
11341
11342 If an error occurs in parsing or compilation, in most cases a valid op
11343 tree (most likely null) is returned anyway.  The error is reflected in
11344 the parser state, normally resulting in a single exception at the top
11345 level of parsing which covers all the compilation errors that occurred.
11346 Some compilation errors, however, will throw an exception immediately.
11347
11348 The I<flags> parameter is reserved for future use, and must always
11349 be zero.
11350
11351 =cut
11352 */
11353
11354 OP *
11355 Perl_parse_barestmt(pTHX_ U32 flags)
11356 {
11357     if (flags)
11358         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11359     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11360 }
11361
11362 /*
11363 =for apidoc Amx|SV *|parse_label|U32 flags
11364
11365 Parse a single label, possibly optional, of the type that may prefix a
11366 Perl statement.  It is up to the caller to ensure that the dynamic parser
11367 state (L</PL_parser> et al) is correctly set to reflect the source of
11368 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
11369 label is optional, otherwise it is mandatory.
11370
11371 The name of the label is returned in the form of a fresh scalar.  If an
11372 optional label is absent, a null pointer is returned.
11373
11374 If an error occurs in parsing, which can only occur if the label is
11375 mandatory, a valid label is returned anyway.  The error is reflected in
11376 the parser state, normally resulting in a single exception at the top
11377 level of parsing which covers all the compilation errors that occurred.
11378
11379 =cut
11380 */
11381
11382 SV *
11383 Perl_parse_label(pTHX_ U32 flags)
11384 {
11385     if (flags & ~PARSE_OPTIONAL)
11386         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11387     if (PL_lex_state == LEX_KNOWNEXT) {
11388         PL_parser->yychar = yylex();
11389         if (PL_parser->yychar == LABEL) {
11390             char *lpv = pl_yylval.pval;
11391             STRLEN llen = strlen(lpv);
11392             SV *lsv;
11393             PL_parser->yychar = YYEMPTY;
11394             lsv = newSV_type(SVt_PV);
11395             SvPV_set(lsv, lpv);
11396             SvCUR_set(lsv, llen);
11397             SvLEN_set(lsv, llen+1);
11398             SvPOK_on(lsv);
11399             return lsv;
11400         } else {
11401             yyunlex();
11402             goto no_label;
11403         }
11404     } else {
11405         char *s, *t;
11406         U8 c;
11407         STRLEN wlen, bufptr_pos;
11408         lex_read_space(0);
11409         t = s = PL_bufptr;
11410         c = (U8)*s;
11411         if (!isIDFIRST_A(c))
11412             goto no_label;
11413         do {
11414             c = (U8)*++t;
11415         } while(isWORDCHAR_A(c));
11416         wlen = t - s;
11417         if (word_takes_any_delimeter(s, wlen))
11418             goto no_label;
11419         bufptr_pos = s - SvPVX(PL_linestr);
11420         PL_bufptr = t;
11421         lex_read_space(LEX_KEEP_PREVIOUS);
11422         t = PL_bufptr;
11423         s = SvPVX(PL_linestr) + bufptr_pos;
11424         if (t[0] == ':' && t[1] != ':') {
11425             PL_oldoldbufptr = PL_oldbufptr;
11426             PL_oldbufptr = s;
11427             PL_bufptr = t+1;
11428             return newSVpvn(s, wlen);
11429         } else {
11430             PL_bufptr = s;
11431             no_label:
11432             if (flags & PARSE_OPTIONAL) {
11433                 return NULL;
11434             } else {
11435                 qerror(Perl_mess(aTHX_ "Parse error"));
11436                 return newSVpvs("x");
11437             }
11438         }
11439     }
11440 }
11441
11442 /*
11443 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11444
11445 Parse a single complete Perl statement.  This may be a normal imperative
11446 statement or a declaration that has compile-time effect, and may include
11447 optional labels.  It is up to the caller to ensure that the dynamic
11448 parser state (L</PL_parser> et al) is correctly set to reflect the source
11449 of the code to be parsed and the lexical context for the statement.
11450
11451 The op tree representing the statement is returned.  This may be a
11452 null pointer if the statement is null, for example if it was actually
11453 a subroutine definition (which has compile-time side effects).  If not
11454 null, it will be the result of a L</newSTATEOP> call, normally including
11455 a C<nextstate> or equivalent op.
11456
11457 If an error occurs in parsing or compilation, in most cases a valid op
11458 tree (most likely null) is returned anyway.  The error is reflected in
11459 the parser state, normally resulting in a single exception at the top
11460 level of parsing which covers all the compilation errors that occurred.
11461 Some compilation errors, however, will throw an exception immediately.
11462
11463 The I<flags> parameter is reserved for future use, and must always
11464 be zero.
11465
11466 =cut
11467 */
11468
11469 OP *
11470 Perl_parse_fullstmt(pTHX_ U32 flags)
11471 {
11472     if (flags)
11473         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11474     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11475 }
11476
11477 /*
11478 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11479
11480 Parse a sequence of zero or more Perl statements.  These may be normal
11481 imperative statements, including optional labels, or declarations
11482 that have compile-time effect, or any mixture thereof.  The statement
11483 sequence ends when a closing brace or end-of-file is encountered in a
11484 place where a new statement could have validly started.  It is up to
11485 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11486 is correctly set to reflect the source of the code to be parsed and the
11487 lexical context for the statements.
11488
11489 The op tree representing the statement sequence is returned.  This may
11490 be a null pointer if the statements were all null, for example if there
11491 were no statements or if there were only subroutine definitions (which
11492 have compile-time side effects).  If not null, it will be a C<lineseq>
11493 list, normally including C<nextstate> or equivalent ops.
11494
11495 If an error occurs in parsing or compilation, in most cases a valid op
11496 tree is returned anyway.  The error is reflected in the parser state,
11497 normally resulting in a single exception at the top level of parsing
11498 which covers all the compilation errors that occurred.  Some compilation
11499 errors, however, will throw an exception immediately.
11500
11501 The I<flags> parameter is reserved for future use, and must always
11502 be zero.
11503
11504 =cut
11505 */
11506
11507 OP *
11508 Perl_parse_stmtseq(pTHX_ U32 flags)
11509 {
11510     OP *stmtseqop;
11511     I32 c;
11512     if (flags)
11513         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11514     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11515     c = lex_peek_unichar(0);
11516     if (c != -1 && c != /*{*/'}')
11517         qerror(Perl_mess(aTHX_ "Parse error"));
11518     return stmtseqop;
11519 }
11520
11521 void
11522 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
11523 {
11524     PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
11525     deprecate("qw(...) as parentheses");
11526     force_next((4<<24)|')');
11527     if (qwlist->op_type == OP_STUB) {
11528         op_free(qwlist);
11529     }
11530     else {
11531         start_force(PL_curforce);
11532         NEXTVAL_NEXTTOKE.opval = qwlist;
11533         force_next(THING);
11534     }
11535     force_next((2<<24)|'(');
11536 }
11537
11538 /*
11539  * Local variables:
11540  * c-indentation-style: bsd
11541  * c-basic-offset: 4
11542  * indent-tabs-mode: t
11543  * End:
11544  *
11545  * ex: set ts=8 sts=4 sw=4 noet:
11546  */