toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  52 #define PL_lex_casemods         (PL_parser->lex_casemods)
  53 #define PL_lex_casestack        (PL_parser->lex_casestack)
  54 #define PL_lex_defer            (PL_parser->lex_defer)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_expect           (PL_parser->lex_expect)
  57 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  58 #define PL_lex_inpat            (PL_parser->lex_inpat)
  59 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  60 #define PL_lex_op               (PL_parser->lex_op)
  61 #define PL_lex_repl             (PL_parser->lex_repl)
  62 #define PL_lex_starts           (PL_parser->lex_starts)
  63 #define PL_lex_stuff            (PL_parser->lex_stuff)
  64 #define PL_multi_start          (PL_parser->multi_start)
  65 #define PL_multi_open           (PL_parser->multi_open)
  66 #define PL_multi_close          (PL_parser->multi_close)
  67 #define PL_pending_ident        (PL_parser->pending_ident)
  68 #define PL_preambled            (PL_parser->preambled)
  69 #define PL_sublex_info          (PL_parser->sublex_info)
  70 #define PL_linestr              (PL_parser->linestr)
  71 #define PL_expect               (PL_parser->expect)
  72 #define PL_copline              (PL_parser->copline)
  73 #define PL_bufptr               (PL_parser->bufptr)
  74 #define PL_oldbufptr            (PL_parser->oldbufptr)
  75 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  76 #define PL_linestart            (PL_parser->linestart)
  77 #define PL_bufend               (PL_parser->bufend)
  78 #define PL_last_uni             (PL_parser->last_uni)
  79 #define PL_last_lop             (PL_parser->last_lop)
  80 #define PL_last_lop_op          (PL_parser->last_lop_op)
  81 #define PL_lex_state            (PL_parser->lex_state)
  82 #define PL_rsfp                 (PL_parser->rsfp)
  83 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  84 #define PL_in_my                (PL_parser->in_my)
  85 #define PL_in_my_stash          (PL_parser->in_my_stash)
  86 #define PL_tokenbuf             (PL_parser->tokenbuf)
  87 #define PL_multi_end            (PL_parser->multi_end)
  88 #define PL_error_count          (PL_parser->error_count)
  89
  90 #ifdef PERL_MAD
  91 #  define PL_endwhite           (PL_parser->endwhite)
  92 #  define PL_faketokens         (PL_parser->faketokens)
  93 #  define PL_lasttoke           (PL_parser->lasttoke)
  94 #  define PL_nextwhite          (PL_parser->nextwhite)
  95 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  96 #  define PL_skipwhite          (PL_parser->skipwhite)
  97 #  define PL_thisclose          (PL_parser->thisclose)
  98 #  define PL_thismad            (PL_parser->thismad)
  99 #  define PL_thisopen           (PL_parser->thisopen)
 100 #  define PL_thisstuff          (PL_parser->thisstuff)
 101 #  define PL_thistoken          (PL_parser->thistoken)
 102 #  define PL_thiswhite          (PL_parser->thiswhite)
 103 #  define PL_thiswhite          (PL_parser->thiswhite)
 104 #  define PL_nexttoke           (PL_parser->nexttoke)
 105 #  define PL_curforce           (PL_parser->curforce)
 106 #else
 107 #  define PL_nexttoke           (PL_parser->nexttoke)
 108 #  define PL_nexttype           (PL_parser->nexttype)
 109 #  define PL_nextval            (PL_parser->nextval)
 110 #endif
 111
 112 /* This can't be done with embed.fnc, because struct yy_parser contains a
 113    member named pending_ident, which clashes with the generated #define  */
 114 static int
 115 S_pending_ident(pTHX);
 116
 117 static const char ident_too_long[] = "Identifier too long";
 118
 119 #ifdef PERL_MAD
 120 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 121 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 122 #else
 123 #  define CURMAD(slot,sv)
 124 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 125 #endif
 126
 127 #define XFAKEBRACK 128
 128 #define XENUMMASK 127
 129
 130 #ifdef USE_UTF8_SCRIPTS
 131 #   define UTF (!IN_BYTES)
 132 #else
 133 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 134 #endif
 135
 136 /* The maximum number of characters preceding the unrecognized one to display */
 137 #define UNRECOGNIZED_PRECEDE_COUNT 10
 138
 139 /* In variables named $^X, these are the legal values for X.
 140  * 1999-02-27 mjd-perl-patch@plover.com */
 141 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 142
 143 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 144
 145 /* LEX_* are values for PL_lex_state, the state of the lexer.
 146  * They are arranged oddly so that the guard on the switch statement
 147  * can get by with a single comparison (if the compiler is smart enough).
 148  */
 149
 150 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 151
 152 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 153 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 154 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 155 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 156 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 157
 158                                    /* at end of code, eg "$x" followed by:  */
 159 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 160 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 161
 162 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 163                                         string or after \E, $foo, etc       */
 164 #define LEX_INTERPCONST          2 /* NOT USED */
 165 #define LEX_FORMLINE             1 /* expecting a format line               */
 166 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 167
 168
 169 #ifdef DEBUGGING
 170 static const char* const lex_state_names[] = {
 171     "KNOWNEXT",
 172     "FORMLINE",
 173     "INTERPCONST",
 174     "INTERPCONCAT",
 175     "INTERPENDMAYBE",
 176     "INTERPEND",
 177     "INTERPSTART",
 178     "INTERPPUSH",
 179     "INTERPCASEMOD",
 180     "INTERPNORMAL",
 181     "NORMAL"
 182 };
 183 #endif
 184
 185 #ifdef ff_next
 186 #undef ff_next
 187 #endif
 188
 189 #include "keywords.h"
 190
 191 /* CLINE is a macro that ensures PL_copline has a sane value */
 192
 193 #ifdef CLINE
 194 #undef CLINE
 195 #endif
 196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 197
 198 #ifdef PERL_MAD
 199 #  define SKIPSPACE0(s) skipspace0(s)
 200 #  define SKIPSPACE1(s) skipspace1(s)
 201 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 202 #  define PEEKSPACE(s) skipspace2(s,0)
 203 #else
 204 #  define SKIPSPACE0(s) skipspace(s)
 205 #  define SKIPSPACE1(s) skipspace(s)
 206 #  define SKIPSPACE2(s,tsv) skipspace(s)
 207 #  define PEEKSPACE(s) skipspace(s)
 208 #endif
 209
 210 /*
 211  * Convenience functions to return different tokens and prime the
 212  * lexer for the next token.  They all take an argument.
 213  *
 214  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 215  * OPERATOR     : generic operator
 216  * AOPERATOR    : assignment operator
 217  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 218  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 219  * PREREF       : *EXPR where EXPR is not a simple identifier
 220  * TERM         : expression term
 221  * LOOPX        : loop exiting command (goto, last, dump, etc)
 222  * FTST         : file test operator
 223  * FUN0         : zero-argument function
 224  * FUN1         : not used, except for not, which isn't a UNIOP
 225  * BOop         : bitwise or or xor
 226  * BAop         : bitwise and
 227  * SHop         : shift operator
 228  * PWop         : power operator
 229  * PMop         : pattern-matching operator
 230  * Aop          : addition-level operator
 231  * Mop          : multiplication-level operator
 232  * Eop          : equality-testing operator
 233  * Rop          : relational operator <= != gt
 234  *
 235  * Also see LOP and lop() below.
 236  */
 237
 238 #ifdef DEBUGGING /* Serve -DT. */
 239 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 240 #else
 241 #   define REPORT(retval) (retval)
 242 #endif
 243
 244 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 245 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 246 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 247 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 248 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 249 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 250 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 251 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 252 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 253 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 254 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 255 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 256 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 257 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 258 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 259 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 260 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 261 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 262 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 263 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 264
 265 /* This bit of chicanery makes a unary function followed by
 266  * a parenthesis into a function with one argument, highest precedence.
 267  * The UNIDOR macro is for unary functions that can be followed by the //
 268  * operator (such as C<shift // 0>).
 269  */
 270 #define UNI2(f,x) { \
 271         pl_yylval.ival = f; \
 272         PL_expect = x; \
 273         PL_bufptr = s; \
 274         PL_last_uni = PL_oldbufptr; \
 275         PL_last_lop_op = f; \
 276         if (*s == '(') \
 277             return REPORT( (int)FUNC1 ); \
 278         s = PEEKSPACE(s); \
 279         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 280         }
 281 #define UNI(f)    UNI2(f,XTERM)
 282 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 283
 284 #define UNIBRACK(f) { \
 285         pl_yylval.ival = f; \
 286         PL_bufptr = s; \
 287         PL_last_uni = PL_oldbufptr; \
 288         if (*s == '(') \
 289             return REPORT( (int)FUNC1 ); \
 290         s = PEEKSPACE(s); \
 291         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 292         }
 293
 294 /* grandfather return to old style */
 295 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 296
 297 #ifdef DEBUGGING
 298
 299 /* how to interpret the pl_yylval associated with the token */
 300 enum token_type {
 301     TOKENTYPE_NONE,
 302     TOKENTYPE_IVAL,
 303     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 304     TOKENTYPE_PVAL,
 305     TOKENTYPE_OPVAL,
 306     TOKENTYPE_GVVAL
 307 };
 308
 309 static struct debug_tokens {
 310     const int token;
 311     enum token_type type;
 312     const char *name;
 313 } const debug_tokens[] =
 314 {
 315     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 316     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 317     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 318     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 319     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 320     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 321     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 322     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 323     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 324     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 325     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 326     { DO,               TOKENTYPE_NONE,         "DO" },
 327     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 328     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 329     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 330     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 331     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 332     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 333     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 334     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 335     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 336     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 337     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 338     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 339     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 340     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 341     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 342     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 343     { IF,               TOKENTYPE_IVAL,         "IF" },
 344     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 345     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 346     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 347     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 348     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 349     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 350     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 351     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 352     { MY,               TOKENTYPE_IVAL,         "MY" },
 353     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 354     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 355     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 356     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 357     { OROR,             TOKENTYPE_NONE,         "OROR" },
 358     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 359     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 360     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 361     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 362     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 363     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 364     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 365     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 366     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 367     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 368     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 369     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 370     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 371     { SUB,              TOKENTYPE_NONE,         "SUB" },
 372     { THING,            TOKENTYPE_OPVAL,        "THING" },
 373     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 374     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 375     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 376     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 377     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 378     { USE,              TOKENTYPE_IVAL,         "USE" },
 379     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 380     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 381     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 382     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 383     { 0,                TOKENTYPE_NONE,         NULL }
 384 };
 385
 386 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 387
 388 STATIC int
 389 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 390 {
 391     dVAR;
 392
 393     PERL_ARGS_ASSERT_TOKEREPORT;
 394
 395     if (DEBUG_T_TEST) {
 396         const char *name = NULL;
 397         enum token_type type = TOKENTYPE_NONE;
 398         const struct debug_tokens *p;
 399         SV* const report = newSVpvs("<== ");
 400
 401         for (p = debug_tokens; p->token; p++) {
 402             if (p->token == (int)rv) {
 403                 name = p->name;
 404                 type = p->type;
 405                 break;
 406             }
 407         }
 408         if (name)
 409             Perl_sv_catpv(aTHX_ report, name);
 410         else if ((char)rv > ' ' && (char)rv < '~')
 411             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 412         else if (!rv)
 413             sv_catpvs(report, "EOF");
 414         else
 415             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 416         switch (type) {
 417         case TOKENTYPE_NONE:
 418         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 419             break;
 420         case TOKENTYPE_IVAL:
 421             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 422             break;
 423         case TOKENTYPE_OPNUM:
 424             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 425                                     PL_op_name[lvalp->ival]);
 426             break;
 427         case TOKENTYPE_PVAL:
 428             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 429             break;
 430         case TOKENTYPE_OPVAL:
 431             if (lvalp->opval) {
 432                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 433                                     PL_op_name[lvalp->opval->op_type]);
 434                 if (lvalp->opval->op_type == OP_CONST) {
 435                     Perl_sv_catpvf(aTHX_ report, " %s",
 436                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 437                 }
 438
 439             }
 440             else
 441                 sv_catpvs(report, "(opval=null)");
 442             break;
 443         }
 444         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 445     };
 446     return (int)rv;
 447 }
 448
 449
 450 /* print the buffer with suitable escapes */
 451
 452 STATIC void
 453 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 454 {
 455     SV* const tmp = newSVpvs("");
 456
 457     PERL_ARGS_ASSERT_PRINTBUF;
 458
 459     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 460     SvREFCNT_dec(tmp);
 461 }
 462
 463 #endif
 464
 465 static int
 466 S_deprecate_commaless_var_list(pTHX) {
 467     PL_expect = XTERM;
 468     deprecate("comma-less variable list");
 469     return REPORT(','); /* grandfather non-comma-format format */
 470 }
 471
 472 /*
 473  * S_ao
 474  *
 475  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 476  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 477  */
 478
 479 STATIC int
 480 S_ao(pTHX_ int toketype)
 481 {
 482     dVAR;
 483     if (*PL_bufptr == '=') {
 484         PL_bufptr++;
 485         if (toketype == ANDAND)
 486             pl_yylval.ival = OP_ANDASSIGN;
 487         else if (toketype == OROR)
 488             pl_yylval.ival = OP_ORASSIGN;
 489         else if (toketype == DORDOR)
 490             pl_yylval.ival = OP_DORASSIGN;
 491         toketype = ASSIGNOP;
 492     }
 493     return toketype;
 494 }
 495
 496 /*
 497  * S_no_op
 498  * When Perl expects an operator and finds something else, no_op
 499  * prints the warning.  It always prints "<something> found where
 500  * operator expected.  It prints "Missing semicolon on previous line?"
 501  * if the surprise occurs at the start of the line.  "do you need to
 502  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 503  * where the compiler doesn't know if foo is a method call or a function.
 504  * It prints "Missing operator before end of line" if there's nothing
 505  * after the missing operator, or "... before <...>" if there is something
 506  * after the missing operator.
 507  */
 508
 509 STATIC void
 510 S_no_op(pTHX_ const char *const what, char *s)
 511 {
 512     dVAR;
 513     char * const oldbp = PL_bufptr;
 514     const bool is_first = (PL_oldbufptr == PL_linestart);
 515
 516     PERL_ARGS_ASSERT_NO_OP;
 517
 518     if (!s)
 519         s = oldbp;
 520     else
 521         PL_bufptr = s;
 522     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 523     if (ckWARN_d(WARN_SYNTAX)) {
 524         if (is_first)
 525             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 526                     "\t(Missing semicolon on previous line?)\n");
 527         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 528             const char *t;
 529             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 530                 NOOP;
 531             if (t < PL_bufptr && isSPACE(*t))
 532                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 533                         "\t(Do you need to predeclare %.*s?)\n",
 534                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 535         }
 536         else {
 537             assert(s >= oldbp);
 538             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 539                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 540         }
 541     }
 542     PL_bufptr = oldbp;
 543 }
 544
 545 /*
 546  * S_missingterm
 547  * Complain about missing quote/regexp/heredoc terminator.
 548  * If it's called with NULL then it cauterizes the line buffer.
 549  * If we're in a delimited string and the delimiter is a control
 550  * character, it's reformatted into a two-char sequence like ^C.
 551  * This is fatal.
 552  */
 553
 554 STATIC void
 555 S_missingterm(pTHX_ char *s)
 556 {
 557     dVAR;
 558     char tmpbuf[3];
 559     char q;
 560     if (s) {
 561         char * const nl = strrchr(s,'\n');
 562         if (nl)
 563             *nl = '\0';
 564     }
 565     else if (isCNTRL(PL_multi_close)) {
 566         *tmpbuf = '^';
 567         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 568         tmpbuf[2] = '\0';
 569         s = tmpbuf;
 570     }
 571     else {
 572         *tmpbuf = (char)PL_multi_close;
 573         tmpbuf[1] = '\0';
 574         s = tmpbuf;
 575     }
 576     q = strchr(s,'"') ? '\'' : '"';
 577     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 578 }
 579
 580 #define FEATURE_IS_ENABLED(name)                                        \
 581         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 582             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 583 /* The longest string we pass in.  */
 584 #define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
 585
 586 /*
 587  * S_feature_is_enabled
 588  * Check whether the named feature is enabled.
 589  */
 590 STATIC bool
 591 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 592 {
 593     dVAR;
 594     HV * const hinthv = GvHV(PL_hintgv);
 595     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 596
 597     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 598
 599     assert(namelen <= MAX_FEATURE_LEN);
 600     memcpy(&he_name[8], name, namelen);
 601
 602     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 603 }
 604
 605 /*
 606  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 607  * utf16-to-utf8-reversed.
 608  */
 609
 610 #ifdef PERL_CR_FILTER
 611 static void
 612 strip_return(SV *sv)
 613 {
 614     register const char *s = SvPVX_const(sv);
 615     register const char * const e = s + SvCUR(sv);
 616
 617     PERL_ARGS_ASSERT_STRIP_RETURN;
 618
 619     /* outer loop optimized to do nothing if there are no CR-LFs */
 620     while (s < e) {
 621         if (*s++ == '\r' && *s == '\n') {
 622             /* hit a CR-LF, need to copy the rest */
 623             register char *d = s - 1;
 624             *d++ = *s++;
 625             while (s < e) {
 626                 if (*s == '\r' && s[1] == '\n')
 627                     s++;
 628                 *d++ = *s++;
 629             }
 630             SvCUR(sv) -= s - d;
 631             return;
 632         }
 633     }
 634 }
 635
 636 STATIC I32
 637 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 638 {
 639     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 640     if (count > 0 && !maxlen)
 641         strip_return(sv);
 642     return count;
 643 }
 644 #endif
 645
 646
 647
 648 /*
 649  * Perl_lex_start
 650  *
 651  * Create a parser object and initialise its parser and lexer fields
 652  *
 653  * rsfp       is the opened file handle to read from (if any),
 654  *
 655  * line       holds any initial content already read from the file (or in
 656  *            the case of no file, such as an eval, the whole contents);
 657  *
 658  * new_filter indicates that this is a new file and it shouldn't inherit
 659  *            the filters from the current parser (ie require).
 660  */
 661
 662 void
 663 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 664 {
 665     dVAR;
 666     const char *s = NULL;
 667     STRLEN len;
 668     yy_parser *parser, *oparser;
 669
 670     /* create and initialise a parser */
 671
 672     Newxz(parser, 1, yy_parser);
 673     parser->old_parser = oparser = PL_parser;
 674     PL_parser = parser;
 675
 676     parser->stack = NULL;
 677     parser->ps = NULL;
 678     parser->stack_size = 0;
 679
 680     /* on scope exit, free this parser and restore any outer one */
 681     SAVEPARSER(parser);
 682     parser->saved_curcop = PL_curcop;
 683
 684     /* initialise lexer state */
 685
 686 #ifdef PERL_MAD
 687     parser->curforce = -1;
 688 #else
 689     parser->nexttoke = 0;
 690 #endif
 691     parser->error_count = oparser ? oparser->error_count : 0;
 692     parser->copline = NOLINE;
 693     parser->lex_state = LEX_NORMAL;
 694     parser->expect = XSTATE;
 695     parser->rsfp = rsfp;
 696     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 697                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 698
 699     Newx(parser->lex_brackstack, 120, char);
 700     Newx(parser->lex_casestack, 12, char);
 701     *parser->lex_casestack = '\0';
 702
 703     if (line) {
 704         s = SvPV_const(line, len);
 705     } else {
 706         len = 0;
 707     }
 708
 709     if (!len) {
 710         parser->linestr = newSVpvs("\n;");
 711     } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
 712         /* avoid tie/overload weirdness */
 713         parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
 714         if (s[len-1] != ';')
 715             sv_catpvs(parser->linestr, "\n;");
 716     } else {
 717         SvTEMP_off(line);
 718         SvREFCNT_inc_simple_void_NN(line);
 719         parser->linestr = line;
 720     }
 721     parser->oldoldbufptr =
 722         parser->oldbufptr =
 723         parser->bufptr =
 724         parser->linestart = SvPVX(parser->linestr);
 725     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 726     parser->last_lop = parser->last_uni = NULL;
 727 }
 728
 729
 730 /* delete a parser object */
 731
 732 void
 733 Perl_parser_free(pTHX_  const yy_parser *parser)
 734 {
 735     PERL_ARGS_ASSERT_PARSER_FREE;
 736
 737     PL_curcop = parser->saved_curcop;
 738     SvREFCNT_dec(parser->linestr);
 739
 740     if (parser->rsfp == PerlIO_stdin())
 741         PerlIO_clearerr(parser->rsfp);
 742     else if (parser->rsfp && (!parser->old_parser ||
 743                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 744         PerlIO_close(parser->rsfp);
 745     SvREFCNT_dec(parser->rsfp_filters);
 746
 747     Safefree(parser->lex_brackstack);
 748     Safefree(parser->lex_casestack);
 749     PL_parser = parser->old_parser;
 750     Safefree(parser);
 751 }
 752
 753
 754 /*
 755  * Perl_lex_end
 756  * Finalizer for lexing operations.  Must be called when the parser is
 757  * done with the lexer.
 758  */
 759
 760 void
 761 Perl_lex_end(pTHX)
 762 {
 763     dVAR;
 764     PL_doextract = FALSE;
 765 }
 766
 767 /*
 768 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 769
 770 Buffer scalar containing the chunk currently under consideration of the
 771 text currently being lexed.  This is always a plain string scalar (for
 772 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 773 normal scalar means; instead refer to the buffer directly by the pointer
 774 variables described below.
 775
 776 The lexer maintains various C<char*> pointers to things in the
 777 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 778 reallocated, all of these pointers must be updated.  Don't attempt to
 779 do this manually, but rather use L</lex_grow_linestr> if you need to
 780 reallocate the buffer.
 781
 782 The content of the text chunk in the buffer is commonly exactly one
 783 complete line of input, up to and including a newline terminator,
 784 but there are situations where it is otherwise.  The octets of the
 785 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 786 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 787 flag on this scalar, which may disagree with it.
 788
 789 For direct examination of the buffer, the variable
 790 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 791 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 792 of these pointers is usually preferable to examination of the scalar
 793 through normal scalar means.
 794
 795 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 796
 797 Direct pointer to the end of the chunk of text currently being lexed, the
 798 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 799 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 800 always located at the end of the buffer, and does not count as part of
 801 the buffer's contents.
 802
 803 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 804
 805 Points to the current position of lexing inside the lexer buffer.
 806 Characters around this point may be freely examined, within
 807 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 808 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 809 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 810
 811 Lexing code (whether in the Perl core or not) moves this pointer past
 812 the characters that it consumes.  It is also expected to perform some
 813 bookkeeping whenever a newline character is consumed.  This movement
 814 can be more conveniently performed by the function L</lex_read_to>,
 815 which handles newlines appropriately.
 816
 817 Interpretation of the buffer's octets can be abstracted out by
 818 using the slightly higher-level functions L</lex_peek_unichar> and
 819 L</lex_read_unichar>.
 820
 821 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 822
 823 Points to the start of the current line inside the lexer buffer.
 824 This is useful for indicating at which column an error occurred, and
 825 not much else.  This must be updated by any lexing code that consumes
 826 a newline; the function L</lex_read_to> handles this detail.
 827
 828 =cut
 829 */
 830
 831 /*
 832 =for apidoc Amx|bool|lex_bufutf8
 833
 834 Indicates whether the octets in the lexer buffer
 835 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 836 of Unicode characters.  If not, they should be interpreted as Latin-1
 837 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 838
 839 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 840 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 841 encoding.
 842
 843 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 844 is significant, but not the whole story regarding the input character
 845 encoding.  Normally, when a file is being read, the scalar contains octets
 846 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 847 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 848 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 849 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 850 is in effect.  This logic may change in the future; use this function
 851 instead of implementing the logic yourself.
 852
 853 =cut
 854 */
 855
 856 bool
 857 Perl_lex_bufutf8(pTHX)
 858 {
 859     return UTF;
 860 }
 861
 862 /*
 863 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 864
 865 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 866 at least I<len> octets (including terminating NUL).  Returns a
 867 pointer to the reallocated buffer.  This is necessary before making
 868 any direct modification of the buffer that would increase its length.
 869 L</lex_stuff_pvn> provides a more convenient way to insert text into
 870 the buffer.
 871
 872 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 873 this function updates all of the lexer's variables that point directly
 874 into the buffer.
 875
 876 =cut
 877 */
 878
 879 char *
 880 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 881 {
 882     SV *linestr;
 883     char *buf;
 884     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 885     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 886     linestr = PL_parser->linestr;
 887     buf = SvPVX(linestr);
 888     if (len <= SvLEN(linestr))
 889         return buf;
 890     bufend_pos = PL_parser->bufend - buf;
 891     bufptr_pos = PL_parser->bufptr - buf;
 892     oldbufptr_pos = PL_parser->oldbufptr - buf;
 893     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 894     linestart_pos = PL_parser->linestart - buf;
 895     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 896     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 897     buf = sv_grow(linestr, len);
 898     PL_parser->bufend = buf + bufend_pos;
 899     PL_parser->bufptr = buf + bufptr_pos;
 900     PL_parser->oldbufptr = buf + oldbufptr_pos;
 901     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 902     PL_parser->linestart = buf + linestart_pos;
 903     if (PL_parser->last_uni)
 904         PL_parser->last_uni = buf + last_uni_pos;
 905     if (PL_parser->last_lop)
 906         PL_parser->last_lop = buf + last_lop_pos;
 907     return buf;
 908 }
 909
 910 /*
 911 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 912
 913 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 914 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 915 reallocating the buffer if necessary.  This means that lexing code that
 916 runs later will see the characters as if they had appeared in the input.
 917 It is not recommended to do this as part of normal parsing, and most
 918 uses of this facility run the risk of the inserted characters being
 919 interpreted in an unintended manner.
 920
 921 The string to be inserted is represented by I<len> octets starting
 922 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 923 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 924 The characters are recoded for the lexer buffer, according to how the
 925 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 926 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 927 function is more convenient.
 928
 929 =cut
 930 */
 931
 932 void
 933 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 934 {
 935     dVAR;
 936     char *bufptr;
 937     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 938     if (flags & ~(LEX_STUFF_UTF8))
 939         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 940     if (UTF) {
 941         if (flags & LEX_STUFF_UTF8) {
 942             goto plain_copy;
 943         } else {
 944             STRLEN highhalf = 0;
 945             const char *p, *e = pv+len;
 946             for (p = pv; p != e; p++)
 947                 highhalf += !!(((U8)*p) & 0x80);
 948             if (!highhalf)
 949                 goto plain_copy;
 950             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 951             bufptr = PL_parser->bufptr;
 952             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 953             SvCUR_set(PL_parser->linestr,
 954                 SvCUR(PL_parser->linestr) + len+highhalf);
 955             PL_parser->bufend += len+highhalf;
 956             for (p = pv; p != e; p++) {
 957                 U8 c = (U8)*p;
 958                 if (c & 0x80) {
 959                     *bufptr++ = (char)(0xc0 | (c >> 6));
 960                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 961                 } else {
 962                     *bufptr++ = (char)c;
 963                 }
 964             }
 965         }
 966     } else {
 967         if (flags & LEX_STUFF_UTF8) {
 968             STRLEN highhalf = 0;
 969             const char *p, *e = pv+len;
 970             for (p = pv; p != e; p++) {
 971                 U8 c = (U8)*p;
 972                 if (c >= 0xc4) {
 973                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 974                                 "non-Latin-1 character into Latin-1 input");
 975                 } else if (c >= 0xc2 && p+1 != e &&
 976                             (((U8)p[1]) & 0xc0) == 0x80) {
 977                     p++;
 978                     highhalf++;
 979                 } else if (c >= 0x80) {
 980                     /* malformed UTF-8 */
 981                     ENTER;
 982                     SAVESPTR(PL_warnhook);
 983                     PL_warnhook = PERL_WARNHOOK_FATAL;
 984                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 985                     LEAVE;
 986                 }
 987             }
 988             if (!highhalf)
 989                 goto plain_copy;
 990             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 991             bufptr = PL_parser->bufptr;
 992             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 993             SvCUR_set(PL_parser->linestr,
 994                 SvCUR(PL_parser->linestr) + len-highhalf);
 995             PL_parser->bufend += len-highhalf;
 996             for (p = pv; p != e; p++) {
 997                 U8 c = (U8)*p;
 998                 if (c & 0x80) {
 999                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1000                     p++;
1001                 } else {
1002                     *bufptr++ = (char)c;
1003                 }
1004             }
1005         } else {
1006             plain_copy:
1007             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1008             bufptr = PL_parser->bufptr;
1009             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1010             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1011             PL_parser->bufend += len;
1012             Copy(pv, bufptr, len, char);
1013         }
1014     }
1015 }
1016
1017 /*
1018 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1019
1020 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022 reallocating the buffer if necessary.  This means that lexing code that
1023 runs later will see the characters as if they had appeared in the input.
1024 It is not recommended to do this as part of normal parsing, and most
1025 uses of this facility run the risk of the inserted characters being
1026 interpreted in an unintended manner.
1027
1028 The string to be inserted is represented by octets starting at I<pv>
1029 and continuing to the first nul.  These octets are interpreted as either
1030 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1031 in I<flags>.  The characters are recoded for the lexer buffer, according
1032 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1033 If it is not convenient to nul-terminate a string to be inserted, the
1034 L</lex_stuff_pvn> function is more appropriate.
1035
1036 =cut
1037 */
1038
1039 void
1040 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1041 {
1042     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1043     lex_stuff_pvn(pv, strlen(pv), flags);
1044 }
1045
1046 /*
1047 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1048
1049 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1050 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1051 reallocating the buffer if necessary.  This means that lexing code that
1052 runs later will see the characters as if they had appeared in the input.
1053 It is not recommended to do this as part of normal parsing, and most
1054 uses of this facility run the risk of the inserted characters being
1055 interpreted in an unintended manner.
1056
1057 The string to be inserted is the string value of I<sv>.  The characters
1058 are recoded for the lexer buffer, according to how the buffer is currently
1059 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1060 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1061 need to construct a scalar.
1062
1063 =cut
1064 */
1065
1066 void
1067 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1068 {
1069     char *pv;
1070     STRLEN len;
1071     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1072     if (flags)
1073         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1074     pv = SvPV(sv, len);
1075     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1076 }
1077
1078 /*
1079 =for apidoc Amx|void|lex_unstuff|char *ptr
1080
1081 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1082 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1083 This hides the discarded text from any lexing code that runs later,
1084 as if the text had never appeared.
1085
1086 This is not the normal way to consume lexed text.  For that, use
1087 L</lex_read_to>.
1088
1089 =cut
1090 */
1091
1092 void
1093 Perl_lex_unstuff(pTHX_ char *ptr)
1094 {
1095     char *buf, *bufend;
1096     STRLEN unstuff_len;
1097     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1098     buf = PL_parser->bufptr;
1099     if (ptr < buf)
1100         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1101     if (ptr == buf)
1102         return;
1103     bufend = PL_parser->bufend;
1104     if (ptr > bufend)
1105         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1106     unstuff_len = ptr - buf;
1107     Move(ptr, buf, bufend+1-ptr, char);
1108     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1109     PL_parser->bufend = bufend - unstuff_len;
1110 }
1111
1112 /*
1113 =for apidoc Amx|void|lex_read_to|char *ptr
1114
1115 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1116 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1117 performing the correct bookkeeping whenever a newline character is passed.
1118 This is the normal way to consume lexed text.
1119
1120 Interpretation of the buffer's octets can be abstracted out by
1121 using the slightly higher-level functions L</lex_peek_unichar> and
1122 L</lex_read_unichar>.
1123
1124 =cut
1125 */
1126
1127 void
1128 Perl_lex_read_to(pTHX_ char *ptr)
1129 {
1130     char *s;
1131     PERL_ARGS_ASSERT_LEX_READ_TO;
1132     s = PL_parser->bufptr;
1133     if (ptr < s || ptr > PL_parser->bufend)
1134         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1135     for (; s != ptr; s++)
1136         if (*s == '\n') {
1137             CopLINE_inc(PL_curcop);
1138             PL_parser->linestart = s+1;
1139         }
1140     PL_parser->bufptr = ptr;
1141 }
1142
1143 /*
1144 =for apidoc Amx|void|lex_discard_to|char *ptr
1145
1146 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1147 up to I<ptr>.  The remaining content of the buffer will be moved, and
1148 all pointers into the buffer updated appropriately.  I<ptr> must not
1149 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1150 it is not permitted to discard text that has yet to be lexed.
1151
1152 Normally it is not necessarily to do this directly, because it suffices to
1153 use the implicit discarding behaviour of L</lex_next_chunk> and things
1154 based on it.  However, if a token stretches across multiple lines,
1155 and the lexing code has kept multiple lines of text in the buffer for
1156 that purpose, then after completion of the token it would be wise to
1157 explicitly discard the now-unneeded earlier lines, to avoid future
1158 multi-line tokens growing the buffer without bound.
1159
1160 =cut
1161 */
1162
1163 void
1164 Perl_lex_discard_to(pTHX_ char *ptr)
1165 {
1166     char *buf;
1167     STRLEN discard_len;
1168     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1169     buf = SvPVX(PL_parser->linestr);
1170     if (ptr < buf)
1171         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1172     if (ptr == buf)
1173         return;
1174     if (ptr > PL_parser->bufptr)
1175         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1176     discard_len = ptr - buf;
1177     if (PL_parser->oldbufptr < ptr)
1178         PL_parser->oldbufptr = ptr;
1179     if (PL_parser->oldoldbufptr < ptr)
1180         PL_parser->oldoldbufptr = ptr;
1181     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1182         PL_parser->last_uni = NULL;
1183     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1184         PL_parser->last_lop = NULL;
1185     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1186     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1187     PL_parser->bufend -= discard_len;
1188     PL_parser->bufptr -= discard_len;
1189     PL_parser->oldbufptr -= discard_len;
1190     PL_parser->oldoldbufptr -= discard_len;
1191     if (PL_parser->last_uni)
1192         PL_parser->last_uni -= discard_len;
1193     if (PL_parser->last_lop)
1194         PL_parser->last_lop -= discard_len;
1195 }
1196
1197 /*
1198 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1199
1200 Reads in the next chunk of text to be lexed, appending it to
1201 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1202 looked to the end of the current chunk and wants to know more.  It is
1203 usual, but not necessary, for lexing to have consumed the entirety of
1204 the current chunk at this time.
1205
1206 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1207 chunk (i.e., the current chunk has been entirely consumed), normally the
1208 current chunk will be discarded at the same time that the new chunk is
1209 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1210 will not be discarded.  If the current chunk has not been entirely
1211 consumed, then it will not be discarded regardless of the flag.
1212
1213 Returns true if some new text was added to the buffer, or false if the
1214 buffer has reached the end of the input text.
1215
1216 =cut
1217 */
1218
1219 #define LEX_FAKE_EOF 0x80000000
1220
1221 bool
1222 Perl_lex_next_chunk(pTHX_ U32 flags)
1223 {
1224     SV *linestr;
1225     char *buf;
1226     STRLEN old_bufend_pos, new_bufend_pos;
1227     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1228     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1229     bool got_some_for_debugger = 0;
1230     bool got_some;
1231     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1232         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1233     linestr = PL_parser->linestr;
1234     buf = SvPVX(linestr);
1235     if (!(flags & LEX_KEEP_PREVIOUS) &&
1236             PL_parser->bufptr == PL_parser->bufend) {
1237         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1238         linestart_pos = 0;
1239         if (PL_parser->last_uni != PL_parser->bufend)
1240             PL_parser->last_uni = NULL;
1241         if (PL_parser->last_lop != PL_parser->bufend)
1242             PL_parser->last_lop = NULL;
1243         last_uni_pos = last_lop_pos = 0;
1244         *buf = 0;
1245         SvCUR(linestr) = 0;
1246     } else {
1247         old_bufend_pos = PL_parser->bufend - buf;
1248         bufptr_pos = PL_parser->bufptr - buf;
1249         oldbufptr_pos = PL_parser->oldbufptr - buf;
1250         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1251         linestart_pos = PL_parser->linestart - buf;
1252         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1253         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1254     }
1255     if (flags & LEX_FAKE_EOF) {
1256         goto eof;
1257     } else if (!PL_parser->rsfp) {
1258         got_some = 0;
1259     } else if (filter_gets(linestr, old_bufend_pos)) {
1260         got_some = 1;
1261         got_some_for_debugger = 1;
1262     } else {
1263         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1264             sv_setpvs(linestr, "");
1265         eof:
1266         /* End of real input.  Close filehandle (unless it was STDIN),
1267          * then add implicit termination.
1268          */
1269         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1270             PerlIO_clearerr(PL_parser->rsfp);
1271         else if (PL_parser->rsfp)
1272             (void)PerlIO_close(PL_parser->rsfp);
1273         PL_parser->rsfp = NULL;
1274         PL_doextract = FALSE;
1275 #ifdef PERL_MAD
1276         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1277             PL_faketokens = 1;
1278 #endif
1279         if (!PL_in_eval && PL_minus_p) {
1280             sv_catpvs(linestr,
1281                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1282             PL_minus_n = PL_minus_p = 0;
1283         } else if (!PL_in_eval && PL_minus_n) {
1284             sv_catpvs(linestr, /*{*/";}");
1285             PL_minus_n = 0;
1286         } else
1287             sv_catpvs(linestr, ";");
1288         got_some = 1;
1289     }
1290     buf = SvPVX(linestr);
1291     new_bufend_pos = SvCUR(linestr);
1292     PL_parser->bufend = buf + new_bufend_pos;
1293     PL_parser->bufptr = buf + bufptr_pos;
1294     PL_parser->oldbufptr = buf + oldbufptr_pos;
1295     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1296     PL_parser->linestart = buf + linestart_pos;
1297     if (PL_parser->last_uni)
1298         PL_parser->last_uni = buf + last_uni_pos;
1299     if (PL_parser->last_lop)
1300         PL_parser->last_lop = buf + last_lop_pos;
1301     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1302             PL_curstash != PL_debstash) {
1303         /* debugger active and we're not compiling the debugger code,
1304          * so store the line into the debugger's array of lines
1305          */
1306         update_debugger_info(NULL, buf+old_bufend_pos,
1307             new_bufend_pos-old_bufend_pos);
1308     }
1309     return got_some;
1310 }
1311
1312 /*
1313 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1314
1315 Looks ahead one (Unicode) character in the text currently being lexed.
1316 Returns the codepoint (unsigned integer value) of the next character,
1317 or -1 if lexing has reached the end of the input text.  To consume the
1318 peeked character, use L</lex_read_unichar>.
1319
1320 If the next character is in (or extends into) the next chunk of input
1321 text, the next chunk will be read in.  Normally the current chunk will be
1322 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1323 then the current chunk will not be discarded.
1324
1325 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1326 is encountered, an exception is generated.
1327
1328 =cut
1329 */
1330
1331 I32
1332 Perl_lex_peek_unichar(pTHX_ U32 flags)
1333 {
1334     dVAR;
1335     char *s, *bufend;
1336     if (flags & ~(LEX_KEEP_PREVIOUS))
1337         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1338     s = PL_parser->bufptr;
1339     bufend = PL_parser->bufend;
1340     if (UTF) {
1341         U8 head;
1342         I32 unichar;
1343         STRLEN len, retlen;
1344         if (s == bufend) {
1345             if (!lex_next_chunk(flags))
1346                 return -1;
1347             s = PL_parser->bufptr;
1348             bufend = PL_parser->bufend;
1349         }
1350         head = (U8)*s;
1351         if (!(head & 0x80))
1352             return head;
1353         if (head & 0x40) {
1354             len = PL_utf8skip[head];
1355             while ((STRLEN)(bufend-s) < len) {
1356                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1357                     break;
1358                 s = PL_parser->bufptr;
1359                 bufend = PL_parser->bufend;
1360             }
1361         }
1362         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1363         if (retlen == (STRLEN)-1) {
1364             /* malformed UTF-8 */
1365             ENTER;
1366             SAVESPTR(PL_warnhook);
1367             PL_warnhook = PERL_WARNHOOK_FATAL;
1368             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1369             LEAVE;
1370         }
1371         return unichar;
1372     } else {
1373         if (s == bufend) {
1374             if (!lex_next_chunk(flags))
1375                 return -1;
1376             s = PL_parser->bufptr;
1377         }
1378         return (U8)*s;
1379     }
1380 }
1381
1382 /*
1383 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1384
1385 Reads the next (Unicode) character in the text currently being lexed.
1386 Returns the codepoint (unsigned integer value) of the character read,
1387 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1388 if lexing has reached the end of the input text.  To non-destructively
1389 examine the next character, use L</lex_peek_unichar> instead.
1390
1391 If the next character is in (or extends into) the next chunk of input
1392 text, the next chunk will be read in.  Normally the current chunk will be
1393 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1394 then the current chunk will not be discarded.
1395
1396 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1397 is encountered, an exception is generated.
1398
1399 =cut
1400 */
1401
1402 I32
1403 Perl_lex_read_unichar(pTHX_ U32 flags)
1404 {
1405     I32 c;
1406     if (flags & ~(LEX_KEEP_PREVIOUS))
1407         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1408     c = lex_peek_unichar(flags);
1409     if (c != -1) {
1410         if (c == '\n')
1411             CopLINE_inc(PL_curcop);
1412         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1413     }
1414     return c;
1415 }
1416
1417 /*
1418 =for apidoc Amx|void|lex_read_space|U32 flags
1419
1420 Reads optional spaces, in Perl style, in the text currently being
1421 lexed.  The spaces may include ordinary whitespace characters and
1422 Perl-style comments.  C<#line> directives are processed if encountered.
1423 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1424 at a non-space character (or the end of the input text).
1425
1426 If spaces extend into the next chunk of input text, the next chunk will
1427 be read in.  Normally the current chunk will be discarded at the same
1428 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1429 chunk will not be discarded.
1430
1431 =cut
1432 */
1433
1434 #define LEX_NO_NEXT_CHUNK 0x80000000
1435
1436 void
1437 Perl_lex_read_space(pTHX_ U32 flags)
1438 {
1439     char *s, *bufend;
1440     bool need_incline = 0;
1441     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1442         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1443 #ifdef PERL_MAD
1444     if (PL_skipwhite) {
1445         sv_free(PL_skipwhite);
1446         PL_skipwhite = NULL;
1447     }
1448     if (PL_madskills)
1449         PL_skipwhite = newSVpvs("");
1450 #endif /* PERL_MAD */
1451     s = PL_parser->bufptr;
1452     bufend = PL_parser->bufend;
1453     while (1) {
1454         char c = *s;
1455         if (c == '#') {
1456             do {
1457                 c = *++s;
1458             } while (!(c == '\n' || (c == 0 && s == bufend)));
1459         } else if (c == '\n') {
1460             s++;
1461             PL_parser->linestart = s;
1462             if (s == bufend)
1463                 need_incline = 1;
1464             else
1465                 incline(s);
1466         } else if (isSPACE(c)) {
1467             s++;
1468         } else if (c == 0 && s == bufend) {
1469             bool got_more;
1470 #ifdef PERL_MAD
1471             if (PL_madskills)
1472                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1473 #endif /* PERL_MAD */
1474             if (flags & LEX_NO_NEXT_CHUNK)
1475                 break;
1476             PL_parser->bufptr = s;
1477             CopLINE_inc(PL_curcop);
1478             got_more = lex_next_chunk(flags);
1479             CopLINE_dec(PL_curcop);
1480             s = PL_parser->bufptr;
1481             bufend = PL_parser->bufend;
1482             if (!got_more)
1483                 break;
1484             if (need_incline && PL_parser->rsfp) {
1485                 incline(s);
1486                 need_incline = 0;
1487             }
1488         } else {
1489             break;
1490         }
1491     }
1492 #ifdef PERL_MAD
1493     if (PL_madskills)
1494         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1495 #endif /* PERL_MAD */
1496     PL_parser->bufptr = s;
1497 }
1498
1499 /*
1500  * S_incline
1501  * This subroutine has nothing to do with tilting, whether at windmills
1502  * or pinball tables.  Its name is short for "increment line".  It
1503  * increments the current line number in CopLINE(PL_curcop) and checks
1504  * to see whether the line starts with a comment of the form
1505  *    # line 500 "foo.pm"
1506  * If so, it sets the current line number and file to the values in the comment.
1507  */
1508
1509 STATIC void
1510 S_incline(pTHX_ const char *s)
1511 {
1512     dVAR;
1513     const char *t;
1514     const char *n;
1515     const char *e;
1516
1517     PERL_ARGS_ASSERT_INCLINE;
1518
1519     CopLINE_inc(PL_curcop);
1520     if (*s++ != '#')
1521         return;
1522     while (SPACE_OR_TAB(*s))
1523         s++;
1524     if (strnEQ(s, "line", 4))
1525         s += 4;
1526     else
1527         return;
1528     if (SPACE_OR_TAB(*s))
1529         s++;
1530     else
1531         return;
1532     while (SPACE_OR_TAB(*s))
1533         s++;
1534     if (!isDIGIT(*s))
1535         return;
1536
1537     n = s;
1538     while (isDIGIT(*s))
1539         s++;
1540     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1541         return;
1542     while (SPACE_OR_TAB(*s))
1543         s++;
1544     if (*s == '"' && (t = strchr(s+1, '"'))) {
1545         s++;
1546         e = t + 1;
1547     }
1548     else {
1549         t = s;
1550         while (!isSPACE(*t))
1551             t++;
1552         e = t;
1553     }
1554     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1555         e++;
1556     if (*e != '\n' && *e != '\0')
1557         return;         /* false alarm */
1558
1559     if (t - s > 0) {
1560         const STRLEN len = t - s;
1561 #ifndef USE_ITHREADS
1562         SV *const temp_sv = CopFILESV(PL_curcop);
1563         const char *cf;
1564         STRLEN tmplen;
1565
1566         if (temp_sv) {
1567             cf = SvPVX(temp_sv);
1568             tmplen = SvCUR(temp_sv);
1569         } else {
1570             cf = NULL;
1571             tmplen = 0;
1572         }
1573
1574         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1575             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1576              * to *{"::_<newfilename"} */
1577             /* However, the long form of evals is only turned on by the
1578                debugger - usually they're "(eval %lu)" */
1579             char smallbuf[128];
1580             char *tmpbuf;
1581             GV **gvp;
1582             STRLEN tmplen2 = len;
1583             if (tmplen + 2 <= sizeof smallbuf)
1584                 tmpbuf = smallbuf;
1585             else
1586                 Newx(tmpbuf, tmplen + 2, char);
1587             tmpbuf[0] = '_';
1588             tmpbuf[1] = '<';
1589             memcpy(tmpbuf + 2, cf, tmplen);
1590             tmplen += 2;
1591             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1592             if (gvp) {
1593                 char *tmpbuf2;
1594                 GV *gv2;
1595
1596                 if (tmplen2 + 2 <= sizeof smallbuf)
1597                     tmpbuf2 = smallbuf;
1598                 else
1599                     Newx(tmpbuf2, tmplen2 + 2, char);
1600
1601                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1602                     /* Either they malloc'd it, or we malloc'd it,
1603                        so no prefix is present in ours.  */
1604                     tmpbuf2[0] = '_';
1605                     tmpbuf2[1] = '<';
1606                 }
1607
1608                 memcpy(tmpbuf2 + 2, s, tmplen2);
1609                 tmplen2 += 2;
1610
1611                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1612                 if (!isGV(gv2)) {
1613                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1614                     /* adjust ${"::_<newfilename"} to store the new file name */
1615                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1616                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1617                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1618                 }
1619
1620                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1621             }
1622             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1623         }
1624 #endif
1625         CopFILE_free(PL_curcop);
1626         CopFILE_setn(PL_curcop, s, len);
1627     }
1628     CopLINE_set(PL_curcop, atoi(n)-1);
1629 }
1630
1631 #ifdef PERL_MAD
1632 /* skip space before PL_thistoken */
1633
1634 STATIC char *
1635 S_skipspace0(pTHX_ register char *s)
1636 {
1637     PERL_ARGS_ASSERT_SKIPSPACE0;
1638
1639     s = skipspace(s);
1640     if (!PL_madskills)
1641         return s;
1642     if (PL_skipwhite) {
1643         if (!PL_thiswhite)
1644             PL_thiswhite = newSVpvs("");
1645         sv_catsv(PL_thiswhite, PL_skipwhite);
1646         sv_free(PL_skipwhite);
1647         PL_skipwhite = 0;
1648     }
1649     PL_realtokenstart = s - SvPVX(PL_linestr);
1650     return s;
1651 }
1652
1653 /* skip space after PL_thistoken */
1654
1655 STATIC char *
1656 S_skipspace1(pTHX_ register char *s)
1657 {
1658     const char *start = s;
1659     I32 startoff = start - SvPVX(PL_linestr);
1660
1661     PERL_ARGS_ASSERT_SKIPSPACE1;
1662
1663     s = skipspace(s);
1664     if (!PL_madskills)
1665         return s;
1666     start = SvPVX(PL_linestr) + startoff;
1667     if (!PL_thistoken && PL_realtokenstart >= 0) {
1668         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1669         PL_thistoken = newSVpvn(tstart, start - tstart);
1670     }
1671     PL_realtokenstart = -1;
1672     if (PL_skipwhite) {
1673         if (!PL_nextwhite)
1674             PL_nextwhite = newSVpvs("");
1675         sv_catsv(PL_nextwhite, PL_skipwhite);
1676         sv_free(PL_skipwhite);
1677         PL_skipwhite = 0;
1678     }
1679     return s;
1680 }
1681
1682 STATIC char *
1683 S_skipspace2(pTHX_ register char *s, SV **svp)
1684 {
1685     char *start;
1686     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1687     const I32 startoff = s - SvPVX(PL_linestr);
1688
1689     PERL_ARGS_ASSERT_SKIPSPACE2;
1690
1691     s = skipspace(s);
1692     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1693     if (!PL_madskills || !svp)
1694         return s;
1695     start = SvPVX(PL_linestr) + startoff;
1696     if (!PL_thistoken && PL_realtokenstart >= 0) {
1697         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1698         PL_thistoken = newSVpvn(tstart, start - tstart);
1699         PL_realtokenstart = -1;
1700     }
1701     if (PL_skipwhite) {
1702         if (!*svp)
1703             *svp = newSVpvs("");
1704         sv_setsv(*svp, PL_skipwhite);
1705         sv_free(PL_skipwhite);
1706         PL_skipwhite = 0;
1707     }
1708
1709     return s;
1710 }
1711 #endif
1712
1713 STATIC void
1714 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1715 {
1716     AV *av = CopFILEAVx(PL_curcop);
1717     if (av) {
1718         SV * const sv = newSV_type(SVt_PVMG);
1719         if (orig_sv)
1720             sv_setsv(sv, orig_sv);
1721         else
1722             sv_setpvn(sv, buf, len);
1723         (void)SvIOK_on(sv);
1724         SvIV_set(sv, 0);
1725         av_store(av, (I32)CopLINE(PL_curcop), sv);
1726     }
1727 }
1728
1729 /*
1730  * S_skipspace
1731  * Called to gobble the appropriate amount and type of whitespace.
1732  * Skips comments as well.
1733  */
1734
1735 STATIC char *
1736 S_skipspace(pTHX_ register char *s)
1737 {
1738 #ifdef PERL_MAD
1739     char *start = s;
1740 #endif /* PERL_MAD */
1741     PERL_ARGS_ASSERT_SKIPSPACE;
1742 #ifdef PERL_MAD
1743     if (PL_skipwhite) {
1744         sv_free(PL_skipwhite);
1745         PL_skipwhite = NULL;
1746     }
1747 #endif /* PERL_MAD */
1748     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1749         while (s < PL_bufend && SPACE_OR_TAB(*s))
1750             s++;
1751     } else {
1752         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1753         PL_bufptr = s;
1754         lex_read_space(LEX_KEEP_PREVIOUS |
1755                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1756                     LEX_NO_NEXT_CHUNK : 0));
1757         s = PL_bufptr;
1758         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1759         if (PL_linestart > PL_bufptr)
1760             PL_bufptr = PL_linestart;
1761         return s;
1762     }
1763 #ifdef PERL_MAD
1764     if (PL_madskills)
1765         PL_skipwhite = newSVpvn(start, s-start);
1766 #endif /* PERL_MAD */
1767     return s;
1768 }
1769
1770 /*
1771  * S_check_uni
1772  * Check the unary operators to ensure there's no ambiguity in how they're
1773  * used.  An ambiguous piece of code would be:
1774  *     rand + 5
1775  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1776  * the +5 is its argument.
1777  */
1778
1779 STATIC void
1780 S_check_uni(pTHX)
1781 {
1782     dVAR;
1783     const char *s;
1784     const char *t;
1785
1786     if (PL_oldoldbufptr != PL_last_uni)
1787         return;
1788     while (isSPACE(*PL_last_uni))
1789         PL_last_uni++;
1790     s = PL_last_uni;
1791     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1792         s++;
1793     if ((t = strchr(s, '(')) && t < PL_bufptr)
1794         return;
1795
1796     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1797                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1798                      (int)(s - PL_last_uni), PL_last_uni);
1799 }
1800
1801 /*
1802  * LOP : macro to build a list operator.  Its behaviour has been replaced
1803  * with a subroutine, S_lop() for which LOP is just another name.
1804  */
1805
1806 #define LOP(f,x) return lop(f,x,s)
1807
1808 /*
1809  * S_lop
1810  * Build a list operator (or something that might be one).  The rules:
1811  *  - if we have a next token, then it's a list operator [why?]
1812  *  - if the next thing is an opening paren, then it's a function
1813  *  - else it's a list operator
1814  */
1815
1816 STATIC I32
1817 S_lop(pTHX_ I32 f, int x, char *s)
1818 {
1819     dVAR;
1820
1821     PERL_ARGS_ASSERT_LOP;
1822
1823     pl_yylval.ival = f;
1824     CLINE;
1825     PL_expect = x;
1826     PL_bufptr = s;
1827     PL_last_lop = PL_oldbufptr;
1828     PL_last_lop_op = (OPCODE)f;
1829 #ifdef PERL_MAD
1830     if (PL_lasttoke)
1831         return REPORT(LSTOP);
1832 #else
1833     if (PL_nexttoke)
1834         return REPORT(LSTOP);
1835 #endif
1836     if (*s == '(')
1837         return REPORT(FUNC);
1838     s = PEEKSPACE(s);
1839     if (*s == '(')
1840         return REPORT(FUNC);
1841     else
1842         return REPORT(LSTOP);
1843 }
1844
1845 #ifdef PERL_MAD
1846  /*
1847  * S_start_force
1848  * Sets up for an eventual force_next().  start_force(0) basically does
1849  * an unshift, while start_force(-1) does a push.  yylex removes items
1850  * on the "pop" end.
1851  */
1852
1853 STATIC void
1854 S_start_force(pTHX_ int where)
1855 {
1856     int i;
1857
1858     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1859         where = PL_lasttoke;
1860     assert(PL_curforce < 0 || PL_curforce == where);
1861     if (PL_curforce != where) {
1862         for (i = PL_lasttoke; i > where; --i) {
1863             PL_nexttoke[i] = PL_nexttoke[i-1];
1864         }
1865         PL_lasttoke++;
1866     }
1867     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1868         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1869     PL_curforce = where;
1870     if (PL_nextwhite) {
1871         if (PL_madskills)
1872             curmad('^', newSVpvs(""));
1873         CURMAD('_', PL_nextwhite);
1874     }
1875 }
1876
1877 STATIC void
1878 S_curmad(pTHX_ char slot, SV *sv)
1879 {
1880     MADPROP **where;
1881
1882     if (!sv)
1883         return;
1884     if (PL_curforce < 0)
1885         where = &PL_thismad;
1886     else
1887         where = &PL_nexttoke[PL_curforce].next_mad;
1888
1889     if (PL_faketokens)
1890         sv_setpvs(sv, "");
1891     else {
1892         if (!IN_BYTES) {
1893             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1894                 SvUTF8_on(sv);
1895             else if (PL_encoding) {
1896                 sv_recode_to_utf8(sv, PL_encoding);
1897             }
1898         }
1899     }
1900
1901     /* keep a slot open for the head of the list? */
1902     if (slot != '_' && *where && (*where)->mad_key == '^') {
1903         (*where)->mad_key = slot;
1904         sv_free(MUTABLE_SV(((*where)->mad_val)));
1905         (*where)->mad_val = (void*)sv;
1906     }
1907     else
1908         addmad(newMADsv(slot, sv), where, 0);
1909 }
1910 #else
1911 #  define start_force(where)    NOOP
1912 #  define curmad(slot, sv)      NOOP
1913 #endif
1914
1915 /*
1916  * S_force_next
1917  * When the lexer realizes it knows the next token (for instance,
1918  * it is reordering tokens for the parser) then it can call S_force_next
1919  * to know what token to return the next time the lexer is called.  Caller
1920  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1921  * and possibly PL_expect to ensure the lexer handles the token correctly.
1922  */
1923
1924 STATIC void
1925 S_force_next(pTHX_ I32 type)
1926 {
1927     dVAR;
1928 #ifdef DEBUGGING
1929     if (DEBUG_T_TEST) {
1930         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1931         tokereport(type, &NEXTVAL_NEXTTOKE);
1932     }
1933 #endif
1934 #ifdef PERL_MAD
1935     if (PL_curforce < 0)
1936         start_force(PL_lasttoke);
1937     PL_nexttoke[PL_curforce].next_type = type;
1938     if (PL_lex_state != LEX_KNOWNEXT)
1939         PL_lex_defer = PL_lex_state;
1940     PL_lex_state = LEX_KNOWNEXT;
1941     PL_lex_expect = PL_expect;
1942     PL_curforce = -1;
1943 #else
1944     PL_nexttype[PL_nexttoke] = type;
1945     PL_nexttoke++;
1946     if (PL_lex_state != LEX_KNOWNEXT) {
1947         PL_lex_defer = PL_lex_state;
1948         PL_lex_expect = PL_expect;
1949         PL_lex_state = LEX_KNOWNEXT;
1950     }
1951 #endif
1952 }
1953
1954 void
1955 Perl_yyunlex(pTHX)
1956 {
1957     if (PL_parser->yychar != YYEMPTY) {
1958         start_force(-1);
1959         NEXTVAL_NEXTTOKE = PL_parser->yylval;
1960         force_next(PL_parser->yychar);
1961         PL_parser->yychar = YYEMPTY;
1962     }
1963 }
1964
1965 STATIC SV *
1966 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1967 {
1968     dVAR;
1969     SV * const sv = newSVpvn_utf8(start, len,
1970                                   !IN_BYTES
1971                                   && UTF
1972                                   && !is_ascii_string((const U8*)start, len)
1973                                   && is_utf8_string((const U8*)start, len));
1974     return sv;
1975 }
1976
1977 /*
1978  * S_force_word
1979  * When the lexer knows the next thing is a word (for instance, it has
1980  * just seen -> and it knows that the next char is a word char, then
1981  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1982  * lookahead.
1983  *
1984  * Arguments:
1985  *   char *start : buffer position (must be within PL_linestr)
1986  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1987  *   int check_keyword : if true, Perl checks to make sure the word isn't
1988  *       a keyword (do this if the word is a label, e.g. goto FOO)
1989  *   int allow_pack : if true, : characters will also be allowed (require,
1990  *       use, etc. do this)
1991  *   int allow_initial_tick : used by the "sub" lexer only.
1992  */
1993
1994 STATIC char *
1995 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1996 {
1997     dVAR;
1998     register char *s;
1999     STRLEN len;
2000
2001     PERL_ARGS_ASSERT_FORCE_WORD;
2002
2003     start = SKIPSPACE1(start);
2004     s = start;
2005     if (isIDFIRST_lazy_if(s,UTF) ||
2006         (allow_pack && *s == ':') ||
2007         (allow_initial_tick && *s == '\'') )
2008     {
2009         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2010         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2011             return start;
2012         start_force(PL_curforce);
2013         if (PL_madskills)
2014             curmad('X', newSVpvn(start,s-start));
2015         if (token == METHOD) {
2016             s = SKIPSPACE1(s);
2017             if (*s == '(')
2018                 PL_expect = XTERM;
2019             else {
2020                 PL_expect = XOPERATOR;
2021             }
2022         }
2023         if (PL_madskills)
2024             curmad('g', newSVpvs( "forced" ));
2025         NEXTVAL_NEXTTOKE.opval
2026             = (OP*)newSVOP(OP_CONST,0,
2027                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2028         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2029         force_next(token);
2030     }
2031     return s;
2032 }
2033
2034 /*
2035  * S_force_ident
2036  * Called when the lexer wants $foo *foo &foo etc, but the program
2037  * text only contains the "foo" portion.  The first argument is a pointer
2038  * to the "foo", and the second argument is the type symbol to prefix.
2039  * Forces the next token to be a "WORD".
2040  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2041  */
2042
2043 STATIC void
2044 S_force_ident(pTHX_ register const char *s, int kind)
2045 {
2046     dVAR;
2047
2048     PERL_ARGS_ASSERT_FORCE_IDENT;
2049
2050     if (*s) {
2051         const STRLEN len = strlen(s);
2052         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2053         start_force(PL_curforce);
2054         NEXTVAL_NEXTTOKE.opval = o;
2055         force_next(WORD);
2056         if (kind) {
2057             o->op_private = OPpCONST_ENTERED;
2058             /* XXX see note in pp_entereval() for why we forgo typo
2059                warnings if the symbol must be introduced in an eval.
2060                GSAR 96-10-12 */
2061             gv_fetchpvn_flags(s, len,
2062                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2063                               : GV_ADD,
2064                               kind == '$' ? SVt_PV :
2065                               kind == '@' ? SVt_PVAV :
2066                               kind == '%' ? SVt_PVHV :
2067                               SVt_PVGV
2068                               );
2069         }
2070     }
2071 }
2072
2073 NV
2074 Perl_str_to_version(pTHX_ SV *sv)
2075 {
2076     NV retval = 0.0;
2077     NV nshift = 1.0;
2078     STRLEN len;
2079     const char *start = SvPV_const(sv,len);
2080     const char * const end = start + len;
2081     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2082
2083     PERL_ARGS_ASSERT_STR_TO_VERSION;
2084
2085     while (start < end) {
2086         STRLEN skip;
2087         UV n;
2088         if (utf)
2089             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2090         else {
2091             n = *(U8*)start;
2092             skip = 1;
2093         }
2094         retval += ((NV)n)/nshift;
2095         start += skip;
2096         nshift *= 1000;
2097     }
2098     return retval;
2099 }
2100
2101 /*
2102  * S_force_version
2103  * Forces the next token to be a version number.
2104  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2105  * and if "guessing" is TRUE, then no new token is created (and the caller
2106  * must use an alternative parsing method).
2107  */
2108
2109 STATIC char *
2110 S_force_version(pTHX_ char *s, int guessing)
2111 {
2112     dVAR;
2113     OP *version = NULL;
2114     char *d;
2115 #ifdef PERL_MAD
2116     I32 startoff = s - SvPVX(PL_linestr);
2117 #endif
2118
2119     PERL_ARGS_ASSERT_FORCE_VERSION;
2120
2121     s = SKIPSPACE1(s);
2122
2123     d = s;
2124     if (*d == 'v')
2125         d++;
2126     if (isDIGIT(*d)) {
2127         while (isDIGIT(*d) || *d == '_' || *d == '.')
2128             d++;
2129 #ifdef PERL_MAD
2130         if (PL_madskills) {
2131             start_force(PL_curforce);
2132             curmad('X', newSVpvn(s,d-s));
2133         }
2134 #endif
2135         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2136             SV *ver;
2137 #ifdef USE_LOCALE_NUMERIC
2138             char *loc = setlocale(LC_NUMERIC, "C");
2139 #endif
2140             s = scan_num(s, &pl_yylval);
2141 #ifdef USE_LOCALE_NUMERIC
2142             setlocale(LC_NUMERIC, loc);
2143 #endif
2144             version = pl_yylval.opval;
2145             ver = cSVOPx(version)->op_sv;
2146             if (SvPOK(ver) && !SvNIOK(ver)) {
2147                 SvUPGRADE(ver, SVt_PVNV);
2148                 SvNV_set(ver, str_to_version(ver));
2149                 SvNOK_on(ver);          /* hint that it is a version */
2150             }
2151         }
2152         else if (guessing) {
2153 #ifdef PERL_MAD
2154             if (PL_madskills) {
2155                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2156                 PL_nextwhite = 0;
2157                 s = SvPVX(PL_linestr) + startoff;
2158             }
2159 #endif
2160             return s;
2161         }
2162     }
2163
2164 #ifdef PERL_MAD
2165     if (PL_madskills && !version) {
2166         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2167         PL_nextwhite = 0;
2168         s = SvPVX(PL_linestr) + startoff;
2169     }
2170 #endif
2171     /* NOTE: The parser sees the package name and the VERSION swapped */
2172     start_force(PL_curforce);
2173     NEXTVAL_NEXTTOKE.opval = version;
2174     force_next(WORD);
2175
2176     return s;
2177 }
2178
2179 /*
2180  * S_force_strict_version
2181  * Forces the next token to be a version number using strict syntax rules.
2182  */
2183
2184 STATIC char *
2185 S_force_strict_version(pTHX_ char *s)
2186 {
2187     dVAR;
2188     OP *version = NULL;
2189 #ifdef PERL_MAD
2190     I32 startoff = s - SvPVX(PL_linestr);
2191 #endif
2192     const char *errstr = NULL;
2193
2194     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2195
2196     while (isSPACE(*s)) /* leading whitespace */
2197         s++;
2198
2199     if (is_STRICT_VERSION(s,&errstr)) {
2200         SV *ver = newSV(0);
2201         s = (char *)scan_version(s, ver, 0);
2202         version = newSVOP(OP_CONST, 0, ver);
2203     }
2204     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2205             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2206     {
2207         PL_bufptr = s;
2208         if (errstr)
2209             yyerror(errstr); /* version required */
2210         return s;
2211     }
2212
2213 #ifdef PERL_MAD
2214     if (PL_madskills && !version) {
2215         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2216         PL_nextwhite = 0;
2217         s = SvPVX(PL_linestr) + startoff;
2218     }
2219 #endif
2220     /* NOTE: The parser sees the package name and the VERSION swapped */
2221     start_force(PL_curforce);
2222     NEXTVAL_NEXTTOKE.opval = version;
2223     force_next(WORD);
2224
2225     return s;
2226 }
2227
2228 /*
2229  * S_tokeq
2230  * Tokenize a quoted string passed in as an SV.  It finds the next
2231  * chunk, up to end of string or a backslash.  It may make a new
2232  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2233  * turns \\ into \.
2234  */
2235
2236 STATIC SV *
2237 S_tokeq(pTHX_ SV *sv)
2238 {
2239     dVAR;
2240     register char *s;
2241     register char *send;
2242     register char *d;
2243     STRLEN len = 0;
2244     SV *pv = sv;
2245
2246     PERL_ARGS_ASSERT_TOKEQ;
2247
2248     if (!SvLEN(sv))
2249         goto finish;
2250
2251     s = SvPV_force(sv, len);
2252     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2253         goto finish;
2254     send = s + len;
2255     while (s < send && *s != '\\')
2256         s++;
2257     if (s == send)
2258         goto finish;
2259     d = s;
2260     if ( PL_hints & HINT_NEW_STRING ) {
2261         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2262     }
2263     while (s < send) {
2264         if (*s == '\\') {
2265             if (s + 1 < send && (s[1] == '\\'))
2266                 s++;            /* all that, just for this */
2267         }
2268         *d++ = *s++;
2269     }
2270     *d = '\0';
2271     SvCUR_set(sv, d - SvPVX_const(sv));
2272   finish:
2273     if ( PL_hints & HINT_NEW_STRING )
2274        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2275     return sv;
2276 }
2277
2278 /*
2279  * Now come three functions related to double-quote context,
2280  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2281  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2282  * interact with PL_lex_state, and create fake ( ... ) argument lists
2283  * to handle functions and concatenation.
2284  * They assume that whoever calls them will be setting up a fake
2285  * join call, because each subthing puts a ',' after it.  This lets
2286  *   "lower \luPpEr"
2287  * become
2288  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2289  *
2290  * (I'm not sure whether the spurious commas at the end of lcfirst's
2291  * arguments and join's arguments are created or not).
2292  */
2293
2294 /*
2295  * S_sublex_start
2296  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2297  *
2298  * Pattern matching will set PL_lex_op to the pattern-matching op to
2299  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2300  *
2301  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2302  *
2303  * Everything else becomes a FUNC.
2304  *
2305  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2306  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2307  * call to S_sublex_push().
2308  */
2309
2310 STATIC I32
2311 S_sublex_start(pTHX)
2312 {
2313     dVAR;
2314     register const I32 op_type = pl_yylval.ival;
2315
2316     if (op_type == OP_NULL) {
2317         pl_yylval.opval = PL_lex_op;
2318         PL_lex_op = NULL;
2319         return THING;
2320     }
2321     if (op_type == OP_CONST || op_type == OP_READLINE) {
2322         SV *sv = tokeq(PL_lex_stuff);
2323
2324         if (SvTYPE(sv) == SVt_PVIV) {
2325             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2326             STRLEN len;
2327             const char * const p = SvPV_const(sv, len);
2328             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2329             SvREFCNT_dec(sv);
2330             sv = nsv;
2331         }
2332         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2333         PL_lex_stuff = NULL;
2334         /* Allow <FH> // "foo" */
2335         if (op_type == OP_READLINE)
2336             PL_expect = XTERMORDORDOR;
2337         return THING;
2338     }
2339     else if (op_type == OP_BACKTICK && PL_lex_op) {
2340         /* readpipe() vas overriden */
2341         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2342         pl_yylval.opval = PL_lex_op;
2343         PL_lex_op = NULL;
2344         PL_lex_stuff = NULL;
2345         return THING;
2346     }
2347
2348     PL_sublex_info.super_state = PL_lex_state;
2349     PL_sublex_info.sub_inwhat = (U16)op_type;
2350     PL_sublex_info.sub_op = PL_lex_op;
2351     PL_lex_state = LEX_INTERPPUSH;
2352
2353     PL_expect = XTERM;
2354     if (PL_lex_op) {
2355         pl_yylval.opval = PL_lex_op;
2356         PL_lex_op = NULL;
2357         return PMFUNC;
2358     }
2359     else
2360         return FUNC;
2361 }
2362
2363 /*
2364  * S_sublex_push
2365  * Create a new scope to save the lexing state.  The scope will be
2366  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2367  * to the uc, lc, etc. found before.
2368  * Sets PL_lex_state to LEX_INTERPCONCAT.
2369  */
2370
2371 STATIC I32
2372 S_sublex_push(pTHX)
2373 {
2374     dVAR;
2375     ENTER;
2376
2377     PL_lex_state = PL_sublex_info.super_state;
2378     SAVEBOOL(PL_lex_dojoin);
2379     SAVEI32(PL_lex_brackets);
2380     SAVEI32(PL_lex_casemods);
2381     SAVEI32(PL_lex_starts);
2382     SAVEI8(PL_lex_state);
2383     SAVEVPTR(PL_lex_inpat);
2384     SAVEI16(PL_lex_inwhat);
2385     SAVECOPLINE(PL_curcop);
2386     SAVEPPTR(PL_bufptr);
2387     SAVEPPTR(PL_bufend);
2388     SAVEPPTR(PL_oldbufptr);
2389     SAVEPPTR(PL_oldoldbufptr);
2390     SAVEPPTR(PL_last_lop);
2391     SAVEPPTR(PL_last_uni);
2392     SAVEPPTR(PL_linestart);
2393     SAVESPTR(PL_linestr);
2394     SAVEGENERICPV(PL_lex_brackstack);
2395     SAVEGENERICPV(PL_lex_casestack);
2396
2397     PL_linestr = PL_lex_stuff;
2398     PL_lex_stuff = NULL;
2399
2400     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2401         = SvPVX(PL_linestr);
2402     PL_bufend += SvCUR(PL_linestr);
2403     PL_last_lop = PL_last_uni = NULL;
2404     SAVEFREESV(PL_linestr);
2405
2406     PL_lex_dojoin = FALSE;
2407     PL_lex_brackets = 0;
2408     Newx(PL_lex_brackstack, 120, char);
2409     Newx(PL_lex_casestack, 12, char);
2410     PL_lex_casemods = 0;
2411     *PL_lex_casestack = '\0';
2412     PL_lex_starts = 0;
2413     PL_lex_state = LEX_INTERPCONCAT;
2414     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2415
2416     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2417     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2418         PL_lex_inpat = PL_sublex_info.sub_op;
2419     else
2420         PL_lex_inpat = NULL;
2421
2422     return '(';
2423 }
2424
2425 /*
2426  * S_sublex_done
2427  * Restores lexer state after a S_sublex_push.
2428  */
2429
2430 STATIC I32
2431 S_sublex_done(pTHX)
2432 {
2433     dVAR;
2434     if (!PL_lex_starts++) {
2435         SV * const sv = newSVpvs("");
2436         if (SvUTF8(PL_linestr))
2437             SvUTF8_on(sv);
2438         PL_expect = XOPERATOR;
2439         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2440         return THING;
2441     }
2442
2443     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2444         PL_lex_state = LEX_INTERPCASEMOD;
2445         return yylex();
2446     }
2447
2448     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2449     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2450         PL_linestr = PL_lex_repl;
2451         PL_lex_inpat = 0;
2452         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2453         PL_bufend += SvCUR(PL_linestr);
2454         PL_last_lop = PL_last_uni = NULL;
2455         SAVEFREESV(PL_linestr);
2456         PL_lex_dojoin = FALSE;
2457         PL_lex_brackets = 0;
2458         PL_lex_casemods = 0;
2459         *PL_lex_casestack = '\0';
2460         PL_lex_starts = 0;
2461         if (SvEVALED(PL_lex_repl)) {
2462             PL_lex_state = LEX_INTERPNORMAL;
2463             PL_lex_starts++;
2464             /*  we don't clear PL_lex_repl here, so that we can check later
2465                 whether this is an evalled subst; that means we rely on the
2466                 logic to ensure sublex_done() is called again only via the
2467                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2468         }
2469         else {
2470             PL_lex_state = LEX_INTERPCONCAT;
2471             PL_lex_repl = NULL;
2472         }
2473         return ',';
2474     }
2475     else {
2476 #ifdef PERL_MAD
2477         if (PL_madskills) {
2478             if (PL_thiswhite) {
2479                 if (!PL_endwhite)
2480                     PL_endwhite = newSVpvs("");
2481                 sv_catsv(PL_endwhite, PL_thiswhite);
2482                 PL_thiswhite = 0;
2483             }
2484             if (PL_thistoken)
2485                 sv_setpvs(PL_thistoken,"");
2486             else
2487                 PL_realtokenstart = -1;
2488         }
2489 #endif
2490         LEAVE;
2491         PL_bufend = SvPVX(PL_linestr);
2492         PL_bufend += SvCUR(PL_linestr);
2493         PL_expect = XOPERATOR;
2494         PL_sublex_info.sub_inwhat = 0;
2495         return ')';
2496     }
2497 }
2498
2499 /*
2500   scan_const
2501
2502   Extracts a pattern, double-quoted string, or transliteration.  This
2503   is terrifying code.
2504
2505   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2506   processing a pattern (PL_lex_inpat is true), a transliteration
2507   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2508
2509   Returns a pointer to the character scanned up to. If this is
2510   advanced from the start pointer supplied (i.e. if anything was
2511   successfully parsed), will leave an OP for the substring scanned
2512   in pl_yylval. Caller must intuit reason for not parsing further
2513   by looking at the next characters herself.
2514
2515   In patterns:
2516     backslashes:
2517       constants: \N{NAME} only
2518       case and quoting: \U \Q \E
2519     stops on @ and $, but not for $ as tail anchor
2520
2521   In transliterations:
2522     characters are VERY literal, except for - not at the start or end
2523     of the string, which indicates a range. If the range is in bytes,
2524     scan_const expands the range to the full set of intermediate
2525     characters. If the range is in utf8, the hyphen is replaced with
2526     a certain range mark which will be handled by pmtrans() in op.c.
2527
2528   In double-quoted strings:
2529     backslashes:
2530       double-quoted style: \r and \n
2531       constants: \x31, etc.
2532       deprecated backrefs: \1 (in substitution replacements)
2533       case and quoting: \U \Q \E
2534     stops on @ and $
2535
2536   scan_const does *not* construct ops to handle interpolated strings.
2537   It stops processing as soon as it finds an embedded $ or @ variable
2538   and leaves it to the caller to work out what's going on.
2539
2540   embedded arrays (whether in pattern or not) could be:
2541       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2542
2543   $ in double-quoted strings must be the symbol of an embedded scalar.
2544
2545   $ in pattern could be $foo or could be tail anchor.  Assumption:
2546   it's a tail anchor if $ is the last thing in the string, or if it's
2547   followed by one of "()| \r\n\t"
2548
2549   \1 (backreferences) are turned into $1
2550
2551   The structure of the code is
2552       while (there's a character to process) {
2553           handle transliteration ranges
2554           skip regexp comments /(?#comment)/ and codes /(?{code})/
2555           skip #-initiated comments in //x patterns
2556           check for embedded arrays
2557           check for embedded scalars
2558           if (backslash) {
2559               deprecate \1 in substitution replacements
2560               handle string-changing backslashes \l \U \Q \E, etc.
2561               switch (what was escaped) {
2562                   handle \- in a transliteration (becomes a literal -)
2563                   if a pattern and not \N{, go treat as regular character
2564                   handle \132 (octal characters)
2565                   handle \x15 and \x{1234} (hex characters)
2566                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2567                   handle \cV (control characters)
2568                   handle printf-style backslashes (\f, \r, \n, etc)
2569               } (end switch)
2570               continue
2571           } (end if backslash)
2572           handle regular character
2573     } (end while character to read)
2574
2575 */
2576
2577 STATIC char *
2578 S_scan_const(pTHX_ char *start)
2579 {
2580     dVAR;
2581     register char *send = PL_bufend;            /* end of the constant */
2582     SV *sv = newSV(send - start);               /* sv for the constant.  See
2583                                                    note below on sizing. */
2584     register char *s = start;                   /* start of the constant */
2585     register char *d = SvPVX(sv);               /* destination for copies */
2586     bool dorange = FALSE;                       /* are we in a translit range? */
2587     bool didrange = FALSE;                      /* did we just finish a range? */
2588     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2589     I32  this_utf8 = UTF;                       /* Is the source string assumed
2590                                                    to be UTF8?  But, this can
2591                                                    show as true when the source
2592                                                    isn't utf8, as for example
2593                                                    when it is entirely composed
2594                                                    of hex constants */
2595
2596     /* Note on sizing:  The scanned constant is placed into sv, which is
2597      * initialized by newSV() assuming one byte of output for every byte of
2598      * input.  This routine expects newSV() to allocate an extra byte for a
2599      * trailing NUL, which this routine will append if it gets to the end of
2600      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2601      * CAPITAL LETTER A}), or more output than input if the constant ends up
2602      * recoded to utf8, but each time a construct is found that might increase
2603      * the needed size, SvGROW() is called.  Its size parameter each time is
2604      * based on the best guess estimate at the time, namely the length used so
2605      * far, plus the length the current construct will occupy, plus room for
2606      * the trailing NUL, plus one byte for every input byte still unscanned */
2607
2608     UV uv;
2609 #ifdef EBCDIC
2610     UV literal_endpoint = 0;
2611     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2612 #endif
2613
2614     PERL_ARGS_ASSERT_SCAN_CONST;
2615
2616     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2617         /* If we are doing a trans and we know we want UTF8 set expectation */
2618         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2619         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2620     }
2621
2622
2623     while (s < send || dorange) {
2624
2625         /* get transliterations out of the way (they're most literal) */
2626         if (PL_lex_inwhat == OP_TRANS) {
2627             /* expand a range A-Z to the full set of characters.  AIE! */
2628             if (dorange) {
2629                 I32 i;                          /* current expanded character */
2630                 I32 min;                        /* first character in range */
2631                 I32 max;                        /* last character in range */
2632
2633 #ifdef EBCDIC
2634                 UV uvmax = 0;
2635 #endif
2636
2637                 if (has_utf8
2638 #ifdef EBCDIC
2639                     && !native_range
2640 #endif
2641                     ) {
2642                     char * const c = (char*)utf8_hop((U8*)d, -1);
2643                     char *e = d++;
2644                     while (e-- > c)
2645                         *(e + 1) = *e;
2646                     *c = (char)UTF_TO_NATIVE(0xff);
2647                     /* mark the range as done, and continue */
2648                     dorange = FALSE;
2649                     didrange = TRUE;
2650                     continue;
2651                 }
2652
2653                 i = d - SvPVX_const(sv);                /* remember current offset */
2654 #ifdef EBCDIC
2655                 SvGROW(sv,
2656                        SvLEN(sv) + (has_utf8 ?
2657                                     (512 - UTF_CONTINUATION_MARK +
2658                                      UNISKIP(0x100))
2659                                     : 256));
2660                 /* How many two-byte within 0..255: 128 in UTF-8,
2661                  * 96 in UTF-8-mod. */
2662 #else
2663                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2664 #endif
2665                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2666 #ifdef EBCDIC
2667                 if (has_utf8) {
2668                     int j;
2669                     for (j = 0; j <= 1; j++) {
2670                         char * const c = (char*)utf8_hop((U8*)d, -1);
2671                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2672                         if (j)
2673                             min = (U8)uv;
2674                         else if (uv < 256)
2675                             max = (U8)uv;
2676                         else {
2677                             max = (U8)0xff; /* only to \xff */
2678                             uvmax = uv; /* \x{100} to uvmax */
2679                         }
2680                         d = c; /* eat endpoint chars */
2681                      }
2682                 }
2683                else {
2684 #endif
2685                    d -= 2;              /* eat the first char and the - */
2686                    min = (U8)*d;        /* first char in range */
2687                    max = (U8)d[1];      /* last char in range  */
2688 #ifdef EBCDIC
2689                }
2690 #endif
2691
2692                 if (min > max) {
2693                     Perl_croak(aTHX_
2694                                "Invalid range \"%c-%c\" in transliteration operator",
2695                                (char)min, (char)max);
2696                 }
2697
2698 #ifdef EBCDIC
2699                 if (literal_endpoint == 2 &&
2700                     ((isLOWER(min) && isLOWER(max)) ||
2701                      (isUPPER(min) && isUPPER(max)))) {
2702                     if (isLOWER(min)) {
2703                         for (i = min; i <= max; i++)
2704                             if (isLOWER(i))
2705                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2706                     } else {
2707                         for (i = min; i <= max; i++)
2708                             if (isUPPER(i))
2709                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2710                     }
2711                 }
2712                 else
2713 #endif
2714                     for (i = min; i <= max; i++)
2715 #ifdef EBCDIC
2716                         if (has_utf8) {
2717                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2718                             if (UNI_IS_INVARIANT(ch))
2719                                 *d++ = (U8)i;
2720                             else {
2721                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2722                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2723                             }
2724                         }
2725                         else
2726 #endif
2727                             *d++ = (char)i;
2728
2729 #ifdef EBCDIC
2730                 if (uvmax) {
2731                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2732                     if (uvmax > 0x101)
2733                         *d++ = (char)UTF_TO_NATIVE(0xff);
2734                     if (uvmax > 0x100)
2735                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2736                 }
2737 #endif
2738
2739                 /* mark the range as done, and continue */
2740                 dorange = FALSE;
2741                 didrange = TRUE;
2742 #ifdef EBCDIC
2743                 literal_endpoint = 0;
2744 #endif
2745                 continue;
2746             }
2747
2748             /* range begins (ignore - as first or last char) */
2749             else if (*s == '-' && s+1 < send  && s != start) {
2750                 if (didrange) {
2751                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2752                 }
2753                 if (has_utf8
2754 #ifdef EBCDIC
2755                     && !native_range
2756 #endif
2757                     ) {
2758                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2759                     s++;
2760                     continue;
2761                 }
2762                 dorange = TRUE;
2763                 s++;
2764             }
2765             else {
2766                 didrange = FALSE;
2767 #ifdef EBCDIC
2768                 literal_endpoint = 0;
2769                 native_range = TRUE;
2770 #endif
2771             }
2772         }
2773
2774         /* if we get here, we're not doing a transliteration */
2775
2776         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2777            except for the last char, which will be done separately. */
2778         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2779             if (s[2] == '#') {
2780                 while (s+1 < send && *s != ')')
2781                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2782             }
2783             else if (s[2] == '{' /* This should match regcomp.c */
2784                     || (s[2] == '?' && s[3] == '{'))
2785             {
2786                 I32 count = 1;
2787                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2788                 char c;
2789
2790                 while (count && (c = *regparse)) {
2791                     if (c == '\\' && regparse[1])
2792                         regparse++;
2793                     else if (c == '{')
2794                         count++;
2795                     else if (c == '}')
2796                         count--;
2797                     regparse++;
2798                 }
2799                 if (*regparse != ')')
2800                     regparse--;         /* Leave one char for continuation. */
2801                 while (s < regparse)
2802                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2803             }
2804         }
2805
2806         /* likewise skip #-initiated comments in //x patterns */
2807         else if (*s == '#' && PL_lex_inpat &&
2808           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2809             while (s+1 < send && *s != '\n')
2810                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2811         }
2812
2813         /* check for embedded arrays
2814            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2815            */
2816         else if (*s == '@' && s[1]) {
2817             if (isALNUM_lazy_if(s+1,UTF))
2818                 break;
2819             if (strchr(":'{$", s[1]))
2820                 break;
2821             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2822                 break; /* in regexp, neither @+ nor @- are interpolated */
2823         }
2824
2825         /* check for embedded scalars.  only stop if we're sure it's a
2826            variable.
2827         */
2828         else if (*s == '$') {
2829             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2830                 break;
2831             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2832                 if (s[1] == '\\') {
2833                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2834                                    "Possible unintended interpolation of $\\ in regex");
2835                 }
2836                 break;          /* in regexp, $ might be tail anchor */
2837             }
2838         }
2839
2840         /* End of else if chain - OP_TRANS rejoin rest */
2841
2842         /* backslashes */
2843         if (*s == '\\' && s+1 < send) {
2844             char* e;    /* Can be used for ending '}', etc. */
2845
2846             s++;
2847
2848             /* warn on \1 - \9 in substitution replacements, but note that \11
2849              * is an octal; and \19 is \1 followed by '9' */
2850             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2851                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2852             {
2853                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2854                 *--s = '$';
2855                 break;
2856             }
2857
2858             /* string-change backslash escapes */
2859             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2860                 --s;
2861                 break;
2862             }
2863             /* In a pattern, process \N, but skip any other backslash escapes.
2864              * This is because we don't want to translate an escape sequence
2865              * into a meta symbol and have the regex compiler use the meta
2866              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2867              * in spite of this, we do have to process \N here while the proper
2868              * charnames handler is in scope.  See bugs #56444 and #62056.
2869              * There is a complication because \N in a pattern may also stand
2870              * for 'match a non-nl', and not mean a charname, in which case its
2871              * processing should be deferred to the regex compiler.  To be a
2872              * charname it must be followed immediately by a '{', and not look
2873              * like \N followed by a curly quantifier, i.e., not something like
2874              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2875              * quantifier */
2876             else if (PL_lex_inpat
2877                     && (*s != 'N'
2878                         || s[1] != '{'
2879                         || regcurly(s + 1)))
2880             {
2881                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2882                 goto default_action;
2883             }
2884
2885             switch (*s) {
2886
2887             /* quoted - in transliterations */
2888             case '-':
2889                 if (PL_lex_inwhat == OP_TRANS) {
2890                     *d++ = *s++;
2891                     continue;
2892                 }
2893                 /* FALL THROUGH */
2894             default:
2895                 {
2896                     if ((isALPHA(*s) || isDIGIT(*s)))
2897                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2898                                        "Unrecognized escape \\%c passed through",
2899                                        *s);
2900                     /* default action is to copy the quoted character */
2901                     goto default_action;
2902                 }
2903
2904             /* eg. \132 indicates the octal constant 0132 */
2905             case '0': case '1': case '2': case '3':
2906             case '4': case '5': case '6': case '7':
2907                 {
2908                     I32 flags = 0;
2909                     STRLEN len = 3;
2910                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2911                     s += len;
2912                 }
2913                 goto NUM_ESCAPE_INSERT;
2914
2915             /* eg. \o{24} indicates the octal constant \024 */
2916             case 'o':
2917                 {
2918                     STRLEN len;
2919                     const char* error;
2920
2921                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2922                     s += len;
2923                     if (! valid) {
2924                         yyerror(error);
2925                         continue;
2926                     }
2927                     goto NUM_ESCAPE_INSERT;
2928                 }
2929
2930             /* eg. \x24 indicates the hex constant 0x24 */
2931             case 'x':
2932                 ++s;
2933                 if (*s == '{') {
2934                     char* const e = strchr(s, '}');
2935                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2936                       PERL_SCAN_DISALLOW_PREFIX;
2937                     STRLEN len;
2938
2939                     ++s;
2940                     if (!e) {
2941                         yyerror("Missing right brace on \\x{}");
2942                         continue;
2943                     }
2944                     len = e - s;
2945                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2946                     s = e + 1;
2947                 }
2948                 else {
2949                     {
2950                         STRLEN len = 2;
2951                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2952                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2953                         s += len;
2954                     }
2955                 }
2956
2957               NUM_ESCAPE_INSERT:
2958                 /* Insert oct or hex escaped character.  There will always be
2959                  * enough room in sv since such escapes will be longer than any
2960                  * UTF-8 sequence they can end up as, except if they force us
2961                  * to recode the rest of the string into utf8 */
2962
2963                 /* Here uv is the ordinal of the next character being added in
2964                  * unicode (converted from native). */
2965                 if (!UNI_IS_INVARIANT(uv)) {
2966                     if (!has_utf8 && uv > 255) {
2967                         /* Might need to recode whatever we have accumulated so
2968                          * far if it contains any chars variant in utf8 or
2969                          * utf-ebcdic. */
2970
2971                         SvCUR_set(sv, d - SvPVX_const(sv));
2972                         SvPOK_on(sv);
2973                         *d = '\0';
2974                         /* See Note on sizing above.  */
2975                         sv_utf8_upgrade_flags_grow(sv,
2976                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2977                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2978                         d = SvPVX(sv) + SvCUR(sv);
2979                         has_utf8 = TRUE;
2980                     }
2981
2982                     if (has_utf8) {
2983                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2984                         if (PL_lex_inwhat == OP_TRANS &&
2985                             PL_sublex_info.sub_op) {
2986                             PL_sublex_info.sub_op->op_private |=
2987                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2988                                              : OPpTRANS_TO_UTF);
2989                         }
2990 #ifdef EBCDIC
2991                         if (uv > 255 && !dorange)
2992                             native_range = FALSE;
2993 #endif
2994                     }
2995                     else {
2996                         *d++ = (char)uv;
2997                     }
2998                 }
2999                 else {
3000                     *d++ = (char) uv;
3001                 }
3002                 continue;
3003
3004             case 'N':
3005                 /* In a non-pattern \N must be a named character, like \N{LATIN
3006                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3007                  * mean to match a non-newline.  For non-patterns, named
3008                  * characters are converted to their string equivalents. In
3009                  * patterns, named characters are not converted to their
3010                  * ultimate forms for the same reasons that other escapes
3011                  * aren't.  Instead, they are converted to the \N{U+...} form
3012                  * to get the value from the charnames that is in effect right
3013                  * now, while preserving the fact that it was a named character
3014                  * so that the regex compiler knows this */
3015
3016                 /* This section of code doesn't generally use the
3017                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3018                  * a close examination of this macro and determined it is a
3019                  * no-op except on utfebcdic variant characters.  Every
3020                  * character generated by this that would normally need to be
3021                  * enclosed by this macro is invariant, so the macro is not
3022                  * needed, and would complicate use of copy(). There are other
3023                  * parts of this file where the macro is used inconsistently,
3024                  * but are saved by it being a no-op */
3025
3026                 /* The structure of this section of code (besides checking for
3027                  * errors and upgrading to utf8) is:
3028                  *  Further disambiguate between the two meanings of \N, and if
3029                  *      not a charname, go process it elsewhere
3030                  *  If of form \N{U+...}, pass it through if a pattern;
3031                  *      otherwise convert to utf8
3032                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3033                  *  pattern; otherwise convert to utf8 */
3034
3035                 /* Here, s points to the 'N'; the test below is guaranteed to
3036                  * succeed if we are being called on a pattern as we already
3037                  * know from a test above that the next character is a '{'.
3038                  * On a non-pattern \N must mean 'named sequence, which
3039                  * requires braces */
3040                 s++;
3041                 if (*s != '{') {
3042                     yyerror("Missing braces on \\N{}");
3043                     continue;
3044                 }
3045                 s++;
3046
3047                 /* If there is no matching '}', it is an error. */
3048                 if (! (e = strchr(s, '}'))) {
3049                     if (! PL_lex_inpat) {
3050                         yyerror("Missing right brace on \\N{}");
3051                     } else {
3052                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3053                     }
3054                     continue;
3055                 }
3056
3057                 /* Here it looks like a named character */
3058
3059                 if (PL_lex_inpat) {
3060
3061                     /* XXX This block is temporary code.  \N{} implies that the
3062                      * pattern is to have Unicode semantics, and therefore
3063                      * currently has to be encoded in utf8.  By putting it in
3064                      * utf8 now, we save a whole pass in the regular expression
3065                      * compiler.  Once that code is changed so Unicode
3066                      * semantics doesn't necessarily have to be in utf8, this
3067                      * block should be removed */
3068                     if (!has_utf8) {
3069                         SvCUR_set(sv, d - SvPVX_const(sv));
3070                         SvPOK_on(sv);
3071                         *d = '\0';
3072                         /* See Note on sizing above.  */
3073                         sv_utf8_upgrade_flags_grow(sv,
3074                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3075                                         /* 5 = '\N{' + cur char + NUL */
3076                                         (STRLEN)(send - s) + 5);
3077                         d = SvPVX(sv) + SvCUR(sv);
3078                         has_utf8 = TRUE;
3079                     }
3080                 }
3081
3082                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3083                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3084                                 | PERL_SCAN_DISALLOW_PREFIX;
3085                     STRLEN len;
3086
3087                     /* For \N{U+...}, the '...' is a unicode value even on
3088                      * EBCDIC machines */
3089                     s += 2;         /* Skip to next char after the 'U+' */
3090                     len = e - s;
3091                     uv = grok_hex(s, &len, &flags, NULL);
3092                     if (len == 0 || len != (STRLEN)(e - s)) {
3093                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3094                         s = e + 1;
3095                         continue;
3096                     }
3097
3098                     if (PL_lex_inpat) {
3099
3100                         /* Pass through to the regex compiler unchanged.  The
3101                          * reason we evaluated the number above is to make sure
3102                          * there wasn't a syntax error. */
3103                         s -= 5;     /* Include the '\N{U+' */
3104                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3105                         d += e - s + 1;
3106                     }
3107                     else {  /* Not a pattern: convert the hex to string */
3108
3109                          /* If destination is not in utf8, unconditionally
3110                           * recode it to be so.  This is because \N{} implies
3111                           * Unicode semantics, and scalars have to be in utf8
3112                           * to guarantee those semantics */
3113                         if (! has_utf8) {
3114                             SvCUR_set(sv, d - SvPVX_const(sv));
3115                             SvPOK_on(sv);
3116                             *d = '\0';
3117                             /* See Note on sizing above.  */
3118                             sv_utf8_upgrade_flags_grow(
3119                                         sv,
3120                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3121                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3122                             d = SvPVX(sv) + SvCUR(sv);
3123                             has_utf8 = TRUE;
3124                         }
3125
3126                         /* Add the string to the output */
3127                         if (UNI_IS_INVARIANT(uv)) {
3128                             *d++ = (char) uv;
3129                         }
3130                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3131                     }
3132                 }
3133                 else { /* Here is \N{NAME} but not \N{U+...}. */
3134
3135                     SV *res;            /* result from charnames */
3136                     const char *str;    /* the string in 'res' */
3137                     STRLEN len;         /* its length */
3138
3139                     /* Get the value for NAME */
3140                     res = newSVpvn(s, e - s);
3141                     res = new_constant( NULL, 0, "charnames",
3142                                         /* includes all of: \N{...} */
3143                                         res, NULL, s - 3, e - s + 4 );
3144
3145                     /* Most likely res will be in utf8 already since the
3146                      * standard charnames uses pack U, but a custom translator
3147                      * can leave it otherwise, so make sure.  XXX This can be
3148                      * revisited to not have charnames use utf8 for characters
3149                      * that don't need it when regexes don't have to be in utf8
3150                      * for Unicode semantics.  If doing so, remember EBCDIC */
3151                     sv_utf8_upgrade(res);
3152                     str = SvPV_const(res, len);
3153
3154                     /* Don't accept malformed input */
3155                     if (! is_utf8_string((U8 *) str, len)) {
3156                         yyerror("Malformed UTF-8 returned by \\N");
3157                     }
3158                     else if (PL_lex_inpat) {
3159
3160                         if (! len) { /* The name resolved to an empty string */
3161                             Copy("\\N{}", d, 4, char);
3162                             d += 4;
3163                         }
3164                         else {
3165                             /* In order to not lose information for the regex
3166                             * compiler, pass the result in the specially made
3167                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3168                             * the code points in hex of each character
3169                             * returned by charnames */
3170
3171                             const char *str_end = str + len;
3172                             STRLEN char_length;     /* cur char's byte length */
3173                             STRLEN output_length;   /* and the number of bytes
3174                                                        after this is translated
3175                                                        into hex digits */
3176                             const STRLEN off = d - SvPVX_const(sv);
3177
3178                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3179                              * max('U+', '.'); and 1 for NUL */
3180                             char hex_string[2 * UTF8_MAXBYTES + 5];
3181
3182                             /* Get the first character of the result. */
3183                             U32 uv = utf8n_to_uvuni((U8 *) str,
3184                                                     len,
3185                                                     &char_length,
3186                                                     UTF8_ALLOW_ANYUV);
3187
3188                             /* The call to is_utf8_string() above hopefully
3189                              * guarantees that there won't be an error.  But
3190                              * it's easy here to make sure.  The function just
3191                              * above warns and returns 0 if invalid utf8, but
3192                              * it can also return 0 if the input is validly a
3193                              * NUL. Disambiguate */
3194                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3195                                 uv = UNICODE_REPLACEMENT;
3196                             }
3197
3198                             /* Convert first code point to hex, including the
3199                              * boiler plate before it */
3200                             sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3201                             output_length = strlen(hex_string);
3202
3203                             /* Make sure there is enough space to hold it */
3204                             d = off + SvGROW(sv, off
3205                                                  + output_length
3206                                                  + (STRLEN)(send - e)
3207                                                  + 2);  /* '}' + NUL */
3208                             /* And output it */
3209                             Copy(hex_string, d, output_length, char);
3210                             d += output_length;
3211
3212                             /* For each subsequent character, append dot and
3213                              * its ordinal in hex */
3214                             while ((str += char_length) < str_end) {
3215                                 const STRLEN off = d - SvPVX_const(sv);
3216                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3217                                                         str_end - str,
3218                                                         &char_length,
3219                                                         UTF8_ALLOW_ANYUV);
3220                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3221                                     uv = UNICODE_REPLACEMENT;
3222                                 }
3223
3224                                 sprintf(hex_string, ".%X", (unsigned int) uv);
3225                                 output_length = strlen(hex_string);
3226
3227                                 d = off + SvGROW(sv, off
3228                                                      + output_length
3229                                                      + (STRLEN)(send - e)
3230                                                      + 2);      /* '}' +  NUL */
3231                                 Copy(hex_string, d, output_length, char);
3232                                 d += output_length;
3233                             }
3234
3235                             *d++ = '}'; /* Done.  Add the trailing brace */
3236                         }
3237                     }
3238                     else { /* Here, not in a pattern.  Convert the name to a
3239                             * string. */
3240
3241                          /* If destination is not in utf8, unconditionally
3242                           * recode it to be so.  This is because \N{} implies
3243                           * Unicode semantics, and scalars have to be in utf8
3244                           * to guarantee those semantics */
3245                         if (! has_utf8) {
3246                             SvCUR_set(sv, d - SvPVX_const(sv));
3247                             SvPOK_on(sv);
3248                             *d = '\0';
3249                             /* See Note on sizing above.  */
3250                             sv_utf8_upgrade_flags_grow(sv,
3251                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3252                                                 len + (STRLEN)(send - s) + 1);
3253                             d = SvPVX(sv) + SvCUR(sv);
3254                             has_utf8 = TRUE;
3255                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3256
3257                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3258                              * set correctly here). */
3259                             const STRLEN off = d - SvPVX_const(sv);
3260                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3261                         }
3262                         Copy(str, d, len, char);
3263                         d += len;
3264                     }
3265                     SvREFCNT_dec(res);
3266
3267                     /* Deprecate non-approved name syntax */
3268                     if (ckWARN_d(WARN_DEPRECATED)) {
3269                         bool problematic = FALSE;
3270                         char* i = s;
3271
3272                         /* For non-ut8 input, look to see that the first
3273                          * character is an alpha, then loop through the rest
3274                          * checking that each is a continuation */
3275                         if (! this_utf8) {
3276                             if (! isALPHAU(*i)) problematic = TRUE;
3277                             else for (i = s + 1; i < e; i++) {
3278                                 if (isCHARNAME_CONT(*i)) continue;
3279                                 problematic = TRUE;
3280                                 break;
3281                             }
3282                         }
3283                         else {
3284                             /* Similarly for utf8.  For invariants can check
3285                              * directly.  We accept anything above the latin1
3286                              * range because it is immaterial to Perl if it is
3287                              * correct or not, and is expensive to check.  But
3288                              * it is fairly easy in the latin1 range to convert
3289                              * the variants into a single character and check
3290                              * those */
3291                             if (UTF8_IS_INVARIANT(*i)) {
3292                                 if (! isALPHAU(*i)) problematic = TRUE;
3293                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3294                                 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3295                                                                             *(i+1)))))
3296                                 {
3297                                     problematic = TRUE;
3298                                 }
3299                             }
3300                             if (! problematic) for (i = s + UTF8SKIP(s);
3301                                                     i < e;
3302                                                     i+= UTF8SKIP(i))
3303                             {
3304                                 if (UTF8_IS_INVARIANT(*i)) {
3305                                     if (isCHARNAME_CONT(*i)) continue;
3306                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3307                                     continue;
3308                                 } else if (isCHARNAME_CONT(
3309                                             UNI_TO_NATIVE(
3310                                             UTF8_ACCUMULATE(*i, *(i+1)))))
3311                                 {
3312                                     continue;
3313                                 }
3314                                 problematic = TRUE;
3315                                 break;
3316                             }
3317                         }
3318                         if (problematic) {
3319                             /* The e-i passed to the final %.*s makes sure that
3320                              * should the trailing NUL be missing that this
3321                              * print won't run off the end of the string */
3322                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3323                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3324                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3325                         }
3326                     }
3327                 } /* End \N{NAME} */
3328 #ifdef EBCDIC
3329                 if (!dorange)
3330                     native_range = FALSE; /* \N{} is defined to be Unicode */
3331 #endif
3332                 s = e + 1;  /* Point to just after the '}' */
3333                 continue;
3334
3335             /* \c is a control character */
3336             case 'c':
3337                 s++;
3338                 if (s < send) {
3339                     *d++ = grok_bslash_c(*s++, 1);
3340                 }
3341                 else {
3342                     yyerror("Missing control char name in \\c");
3343                 }
3344                 continue;
3345
3346             /* printf-style backslashes, formfeeds, newlines, etc */
3347             case 'b':
3348                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3349                 break;
3350             case 'n':
3351                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3352                 break;
3353             case 'r':
3354                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3355                 break;
3356             case 'f':
3357                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3358                 break;
3359             case 't':
3360                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3361                 break;
3362             case 'e':
3363                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3364                 break;
3365             case 'a':
3366                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3367                 break;
3368             } /* end switch */
3369
3370             s++;
3371             continue;
3372         } /* end if (backslash) */
3373 #ifdef EBCDIC
3374         else
3375             literal_endpoint++;
3376 #endif
3377
3378     default_action:
3379         /* If we started with encoded form, or already know we want it,
3380            then encode the next character */
3381         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3382             STRLEN len  = 1;
3383
3384
3385             /* One might think that it is wasted effort in the case of the
3386              * source being utf8 (this_utf8 == TRUE) to take the next character
3387              * in the source, convert it to an unsigned value, and then convert
3388              * it back again.  But the source has not been validated here.  The
3389              * routine that does the conversion checks for errors like
3390              * malformed utf8 */
3391
3392             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3393             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3394             if (!has_utf8) {
3395                 SvCUR_set(sv, d - SvPVX_const(sv));
3396                 SvPOK_on(sv);
3397                 *d = '\0';
3398                 /* See Note on sizing above.  */
3399                 sv_utf8_upgrade_flags_grow(sv,
3400                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3401                                         need + (STRLEN)(send - s) + 1);
3402                 d = SvPVX(sv) + SvCUR(sv);
3403                 has_utf8 = TRUE;
3404             } else if (need > len) {
3405                 /* encoded value larger than old, may need extra space (NOTE:
3406                  * SvCUR() is not set correctly here).   See Note on sizing
3407                  * above.  */
3408                 const STRLEN off = d - SvPVX_const(sv);
3409                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3410             }
3411             s += len;
3412
3413             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3414 #ifdef EBCDIC
3415             if (uv > 255 && !dorange)
3416                 native_range = FALSE;
3417 #endif
3418         }
3419         else {
3420             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3421         }
3422     } /* while loop to process each character */
3423
3424     /* terminate the string and set up the sv */
3425     *d = '\0';
3426     SvCUR_set(sv, d - SvPVX_const(sv));
3427     if (SvCUR(sv) >= SvLEN(sv))
3428         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3429
3430     SvPOK_on(sv);
3431     if (PL_encoding && !has_utf8) {
3432         sv_recode_to_utf8(sv, PL_encoding);
3433         if (SvUTF8(sv))
3434             has_utf8 = TRUE;
3435     }
3436     if (has_utf8) {
3437         SvUTF8_on(sv);
3438         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3439             PL_sublex_info.sub_op->op_private |=
3440                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3441         }
3442     }
3443
3444     /* shrink the sv if we allocated more than we used */
3445     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3446         SvPV_shrink_to_cur(sv);
3447     }
3448
3449     /* return the substring (via pl_yylval) only if we parsed anything */
3450     if (s > PL_bufptr) {
3451         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3452             const char *const key = PL_lex_inpat ? "qr" : "q";
3453             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3454             const char *type;
3455             STRLEN typelen;
3456
3457             if (PL_lex_inwhat == OP_TRANS) {
3458                 type = "tr";
3459                 typelen = 2;
3460             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3461                 type = "s";
3462                 typelen = 1;
3463             } else  {
3464                 type = "qq";
3465                 typelen = 2;
3466             }
3467
3468             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3469                                 type, typelen);
3470         }
3471         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3472     } else
3473         SvREFCNT_dec(sv);
3474     return s;
3475 }
3476
3477 /* S_intuit_more
3478  * Returns TRUE if there's more to the expression (e.g., a subscript),
3479  * FALSE otherwise.
3480  *
3481  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3482  *
3483  * ->[ and ->{ return TRUE
3484  * { and [ outside a pattern are always subscripts, so return TRUE
3485  * if we're outside a pattern and it's not { or [, then return FALSE
3486  * if we're in a pattern and the first char is a {
3487  *   {4,5} (any digits around the comma) returns FALSE
3488  * if we're in a pattern and the first char is a [
3489  *   [] returns FALSE
3490  *   [SOMETHING] has a funky algorithm to decide whether it's a
3491  *      character class or not.  It has to deal with things like
3492  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3493  * anything else returns TRUE
3494  */
3495
3496 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3497
3498 STATIC int
3499 S_intuit_more(pTHX_ register char *s)
3500 {
3501     dVAR;
3502
3503     PERL_ARGS_ASSERT_INTUIT_MORE;
3504
3505     if (PL_lex_brackets)
3506         return TRUE;
3507     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3508         return TRUE;
3509     if (*s != '{' && *s != '[')
3510         return FALSE;
3511     if (!PL_lex_inpat)
3512         return TRUE;
3513
3514     /* In a pattern, so maybe we have {n,m}. */
3515     if (*s == '{') {
3516         if (regcurly(s)) {
3517             return FALSE;
3518         }
3519         return TRUE;
3520     }
3521
3522     /* On the other hand, maybe we have a character class */
3523
3524     s++;
3525     if (*s == ']' || *s == '^')
3526         return FALSE;
3527     else {
3528         /* this is terrifying, and it works */
3529         int weight = 2;         /* let's weigh the evidence */
3530         char seen[256];
3531         unsigned char un_char = 255, last_un_char;
3532         const char * const send = strchr(s,']');
3533         char tmpbuf[sizeof PL_tokenbuf * 4];
3534
3535         if (!send)              /* has to be an expression */
3536             return TRUE;
3537
3538         Zero(seen,256,char);
3539         if (*s == '$')
3540             weight -= 3;
3541         else if (isDIGIT(*s)) {
3542             if (s[1] != ']') {
3543                 if (isDIGIT(s[1]) && s[2] == ']')
3544                     weight -= 10;
3545             }
3546             else
3547                 weight -= 100;
3548         }
3549         for (; s < send; s++) {
3550             last_un_char = un_char;
3551             un_char = (unsigned char)*s;
3552             switch (*s) {
3553             case '@':
3554             case '&':
3555             case '$':
3556                 weight -= seen[un_char] * 10;
3557                 if (isALNUM_lazy_if(s+1,UTF)) {
3558                     int len;
3559                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3560                     len = (int)strlen(tmpbuf);
3561                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3562                         weight -= 100;
3563                     else
3564                         weight -= 10;
3565                 }
3566                 else if (*s == '$' && s[1] &&
3567                   strchr("[#!%*<>()-=",s[1])) {
3568                     if (/*{*/ strchr("])} =",s[2]))
3569                         weight -= 10;
3570                     else
3571                         weight -= 1;
3572                 }
3573                 break;
3574             case '\\':
3575                 un_char = 254;
3576                 if (s[1]) {
3577                     if (strchr("wds]",s[1]))
3578                         weight += 100;
3579                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3580                         weight += 1;
3581                     else if (strchr("rnftbxcav",s[1]))
3582                         weight += 40;
3583                     else if (isDIGIT(s[1])) {
3584                         weight += 40;
3585                         while (s[1] && isDIGIT(s[1]))
3586                             s++;
3587                     }
3588                 }
3589                 else
3590                     weight += 100;
3591                 break;
3592             case '-':
3593                 if (s[1] == '\\')
3594                     weight += 50;
3595                 if (strchr("aA01! ",last_un_char))
3596                     weight += 30;
3597                 if (strchr("zZ79~",s[1]))
3598                     weight += 30;
3599                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3600                     weight -= 5;        /* cope with negative subscript */
3601                 break;
3602             default:
3603                 if (!isALNUM(last_un_char)
3604                     && !(last_un_char == '$' || last_un_char == '@'
3605                          || last_un_char == '&')
3606                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3607                     char *d = tmpbuf;
3608                     while (isALPHA(*s))
3609                         *d++ = *s++;
3610                     *d = '\0';
3611                     if (keyword(tmpbuf, d - tmpbuf, 0))
3612                         weight -= 150;
3613                 }
3614                 if (un_char == last_un_char + 1)
3615                     weight += 5;
3616                 weight -= seen[un_char];
3617                 break;
3618             }
3619             seen[un_char]++;
3620         }
3621         if (weight >= 0)        /* probably a character class */
3622             return FALSE;
3623     }
3624
3625     return TRUE;
3626 }
3627
3628 /*
3629  * S_intuit_method
3630  *
3631  * Does all the checking to disambiguate
3632  *   foo bar
3633  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3634  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3635  *
3636  * First argument is the stuff after the first token, e.g. "bar".
3637  *
3638  * Not a method if bar is a filehandle.
3639  * Not a method if foo is a subroutine prototyped to take a filehandle.
3640  * Not a method if it's really "Foo $bar"
3641  * Method if it's "foo $bar"
3642  * Not a method if it's really "print foo $bar"
3643  * Method if it's really "foo package::" (interpreted as package->foo)
3644  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3645  * Not a method if bar is a filehandle or package, but is quoted with
3646  *   =>
3647  */
3648
3649 STATIC int
3650 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3651 {
3652     dVAR;
3653     char *s = start + (*start == '$');
3654     char tmpbuf[sizeof PL_tokenbuf];
3655     STRLEN len;
3656     GV* indirgv;
3657 #ifdef PERL_MAD
3658     int soff;
3659 #endif
3660
3661     PERL_ARGS_ASSERT_INTUIT_METHOD;
3662
3663     if (gv) {
3664         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3665             return 0;
3666         if (cv) {
3667             if (SvPOK(cv)) {
3668                 const char *proto = SvPVX_const(cv);
3669                 if (proto) {
3670                     if (*proto == ';')
3671                         proto++;
3672                     if (*proto == '*')
3673                         return 0;
3674                 }
3675             }
3676         } else
3677             gv = NULL;
3678     }
3679     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3680     /* start is the beginning of the possible filehandle/object,
3681      * and s is the end of it
3682      * tmpbuf is a copy of it
3683      */
3684
3685     if (*start == '$') {
3686         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3687                 isUPPER(*PL_tokenbuf))
3688             return 0;
3689 #ifdef PERL_MAD
3690         len = start - SvPVX(PL_linestr);
3691 #endif
3692         s = PEEKSPACE(s);
3693 #ifdef PERL_MAD
3694         start = SvPVX(PL_linestr) + len;
3695 #endif
3696         PL_bufptr = start;
3697         PL_expect = XREF;
3698         return *s == '(' ? FUNCMETH : METHOD;
3699     }
3700     if (!keyword(tmpbuf, len, 0)) {
3701         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3702             len -= 2;
3703             tmpbuf[len] = '\0';
3704 #ifdef PERL_MAD
3705             soff = s - SvPVX(PL_linestr);
3706 #endif
3707             goto bare_package;
3708         }
3709         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3710         if (indirgv && GvCVu(indirgv))
3711             return 0;
3712         /* filehandle or package name makes it a method */
3713         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3714 #ifdef PERL_MAD
3715             soff = s - SvPVX(PL_linestr);
3716 #endif
3717             s = PEEKSPACE(s);
3718             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3719                 return 0;       /* no assumptions -- "=>" quotes bearword */
3720       bare_package:
3721             start_force(PL_curforce);
3722             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3723                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3724             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3725             if (PL_madskills)
3726                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3727             PL_expect = XTERM;
3728             force_next(WORD);
3729             PL_bufptr = s;
3730 #ifdef PERL_MAD
3731             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3732 #endif
3733             return *s == '(' ? FUNCMETH : METHOD;
3734         }
3735     }
3736     return 0;
3737 }
3738
3739 /* Encoded script support. filter_add() effectively inserts a
3740  * 'pre-processing' function into the current source input stream.
3741  * Note that the filter function only applies to the current source file
3742  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3743  *
3744  * The datasv parameter (which may be NULL) can be used to pass
3745  * private data to this instance of the filter. The filter function
3746  * can recover the SV using the FILTER_DATA macro and use it to
3747  * store private buffers and state information.
3748  *
3749  * The supplied datasv parameter is upgraded to a PVIO type
3750  * and the IoDIRP/IoANY field is used to store the function pointer,
3751  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3752  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3753  * private use must be set using malloc'd pointers.
3754  */
3755
3756 SV *
3757 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3758 {
3759     dVAR;
3760     if (!funcp)
3761         return NULL;
3762
3763     if (!PL_parser)
3764         return NULL;
3765
3766     if (!PL_rsfp_filters)
3767         PL_rsfp_filters = newAV();
3768     if (!datasv)
3769         datasv = newSV(0);
3770     SvUPGRADE(datasv, SVt_PVIO);
3771     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3772     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3773     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3774                           FPTR2DPTR(void *, IoANY(datasv)),
3775                           SvPV_nolen(datasv)));
3776     av_unshift(PL_rsfp_filters, 1);
3777     av_store(PL_rsfp_filters, 0, datasv) ;
3778     return(datasv);
3779 }
3780
3781
3782 /* Delete most recently added instance of this filter function. */
3783 void
3784 Perl_filter_del(pTHX_ filter_t funcp)
3785 {
3786     dVAR;
3787     SV *datasv;
3788
3789     PERL_ARGS_ASSERT_FILTER_DEL;
3790
3791 #ifdef DEBUGGING
3792     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3793                           FPTR2DPTR(void*, funcp)));
3794 #endif
3795     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3796         return;
3797     /* if filter is on top of stack (usual case) just pop it off */
3798     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3799     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3800         sv_free(av_pop(PL_rsfp_filters));
3801
3802         return;
3803     }
3804     /* we need to search for the correct entry and clear it     */
3805     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3806 }
3807
3808
3809 /* Invoke the idxth filter function for the current rsfp.        */
3810 /* maxlen 0 = read one text line */
3811 I32
3812 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3813 {
3814     dVAR;
3815     filter_t funcp;
3816     SV *datasv = NULL;
3817     /* This API is bad. It should have been using unsigned int for maxlen.
3818        Not sure if we want to change the API, but if not we should sanity
3819        check the value here.  */
3820     const unsigned int correct_length
3821         = maxlen < 0 ?
3822 #ifdef PERL_MICRO
3823         0x7FFFFFFF
3824 #else
3825         INT_MAX
3826 #endif
3827         : maxlen;
3828
3829     PERL_ARGS_ASSERT_FILTER_READ;
3830
3831     if (!PL_parser || !PL_rsfp_filters)
3832         return -1;
3833     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3834         /* Provide a default input filter to make life easy.    */
3835         /* Note that we append to the line. This is handy.      */
3836         DEBUG_P(PerlIO_printf(Perl_debug_log,
3837                               "filter_read %d: from rsfp\n", idx));
3838         if (correct_length) {
3839             /* Want a block */
3840             int len ;
3841             const int old_len = SvCUR(buf_sv);
3842
3843             /* ensure buf_sv is large enough */
3844             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3845             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3846                                    correct_length)) <= 0) {
3847                 if (PerlIO_error(PL_rsfp))
3848                     return -1;          /* error */
3849                 else
3850                     return 0 ;          /* end of file */
3851             }
3852             SvCUR_set(buf_sv, old_len + len) ;
3853             SvPVX(buf_sv)[old_len + len] = '\0';
3854         } else {
3855             /* Want a line */
3856             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3857                 if (PerlIO_error(PL_rsfp))
3858                     return -1;          /* error */
3859                 else
3860                     return 0 ;          /* end of file */
3861             }
3862         }
3863         return SvCUR(buf_sv);
3864     }
3865     /* Skip this filter slot if filter has been deleted */
3866     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3867         DEBUG_P(PerlIO_printf(Perl_debug_log,
3868                               "filter_read %d: skipped (filter deleted)\n",
3869                               idx));
3870         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3871     }
3872     /* Get function pointer hidden within datasv        */
3873     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3874     DEBUG_P(PerlIO_printf(Perl_debug_log,
3875                           "filter_read %d: via function %p (%s)\n",
3876                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3877     /* Call function. The function is expected to       */
3878     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3879     /* Return: <0:error, =0:eof, >0:not eof             */
3880     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3881 }
3882
3883 STATIC char *
3884 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3885 {
3886     dVAR;
3887
3888     PERL_ARGS_ASSERT_FILTER_GETS;
3889
3890 #ifdef PERL_CR_FILTER
3891     if (!PL_rsfp_filters) {
3892         filter_add(S_cr_textfilter,NULL);
3893     }
3894 #endif
3895     if (PL_rsfp_filters) {
3896         if (!append)
3897             SvCUR_set(sv, 0);   /* start with empty line        */
3898         if (FILTER_READ(0, sv, 0) > 0)
3899             return ( SvPVX(sv) ) ;
3900         else
3901             return NULL ;
3902     }
3903     else
3904         return (sv_gets(sv, PL_rsfp, append));
3905 }
3906
3907 STATIC HV *
3908 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3909 {
3910     dVAR;
3911     GV *gv;
3912
3913     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3914
3915     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3916         return PL_curstash;
3917
3918     if (len > 2 &&
3919         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3920         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3921     {
3922         return GvHV(gv);                        /* Foo:: */
3923     }
3924
3925     /* use constant CLASS => 'MyClass' */
3926     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3927     if (gv && GvCV(gv)) {
3928         SV * const sv = cv_const_sv(GvCV(gv));
3929         if (sv)
3930             pkgname = SvPV_const(sv, len);
3931     }
3932
3933     return gv_stashpvn(pkgname, len, 0);
3934 }
3935
3936 /*
3937  * S_readpipe_override
3938  * Check whether readpipe() is overriden, and generates the appropriate
3939  * optree, provided sublex_start() is called afterwards.
3940  */
3941 STATIC void
3942 S_readpipe_override(pTHX)
3943 {
3944     GV **gvp;
3945     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3946     pl_yylval.ival = OP_BACKTICK;
3947     if ((gv_readpipe
3948                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3949             ||
3950             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3951              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3952              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3953     {
3954         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3955             append_elem(OP_LIST,
3956                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3957                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3958     }
3959 }
3960
3961 #ifdef PERL_MAD
3962  /*
3963  * Perl_madlex
3964  * The intent of this yylex wrapper is to minimize the changes to the
3965  * tokener when we aren't interested in collecting madprops.  It remains
3966  * to be seen how successful this strategy will be...
3967  */
3968
3969 int
3970 Perl_madlex(pTHX)
3971 {
3972     int optype;
3973     char *s = PL_bufptr;
3974
3975     /* make sure PL_thiswhite is initialized */
3976     PL_thiswhite = 0;
3977     PL_thismad = 0;
3978
3979     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3980     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
3981         return S_pending_ident(aTHX);
3982
3983     /* previous token ate up our whitespace? */
3984     if (!PL_lasttoke && PL_nextwhite) {
3985         PL_thiswhite = PL_nextwhite;
3986         PL_nextwhite = 0;
3987     }
3988
3989     /* isolate the token, and figure out where it is without whitespace */
3990     PL_realtokenstart = -1;
3991     PL_thistoken = 0;
3992     optype = yylex();
3993     s = PL_bufptr;
3994     assert(PL_curforce < 0);
3995
3996     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3997         if (!PL_thistoken) {
3998             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3999                 PL_thistoken = newSVpvs("");
4000             else {
4001                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4002                 PL_thistoken = newSVpvn(tstart, s - tstart);
4003             }
4004         }
4005         if (PL_thismad) /* install head */
4006             CURMAD('X', PL_thistoken);
4007     }
4008
4009     /* last whitespace of a sublex? */
4010     if (optype == ')' && PL_endwhite) {
4011         CURMAD('X', PL_endwhite);
4012     }
4013
4014     if (!PL_thismad) {
4015
4016         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4017         if (!PL_thiswhite && !PL_endwhite && !optype) {
4018             sv_free(PL_thistoken);
4019             PL_thistoken = 0;
4020             return 0;
4021         }
4022
4023         /* put off final whitespace till peg */
4024         if (optype == ';' && !PL_rsfp) {
4025             PL_nextwhite = PL_thiswhite;
4026             PL_thiswhite = 0;
4027         }
4028         else if (PL_thisopen) {
4029             CURMAD('q', PL_thisopen);
4030             if (PL_thistoken)
4031                 sv_free(PL_thistoken);
4032             PL_thistoken = 0;
4033         }
4034         else {
4035             /* Store actual token text as madprop X */
4036             CURMAD('X', PL_thistoken);
4037         }
4038
4039         if (PL_thiswhite) {
4040             /* add preceding whitespace as madprop _ */
4041             CURMAD('_', PL_thiswhite);
4042         }
4043
4044         if (PL_thisstuff) {
4045             /* add quoted material as madprop = */
4046             CURMAD('=', PL_thisstuff);
4047         }
4048
4049         if (PL_thisclose) {
4050             /* add terminating quote as madprop Q */
4051             CURMAD('Q', PL_thisclose);
4052         }
4053     }
4054
4055     /* special processing based on optype */
4056
4057     switch (optype) {
4058
4059     /* opval doesn't need a TOKEN since it can already store mp */
4060     case WORD:
4061     case METHOD:
4062     case FUNCMETH:
4063     case THING:
4064     case PMFUNC:
4065     case PRIVATEREF:
4066     case FUNC0SUB:
4067     case UNIOPSUB:
4068     case LSTOPSUB:
4069         if (pl_yylval.opval)
4070             append_madprops(PL_thismad, pl_yylval.opval, 0);
4071         PL_thismad = 0;
4072         return optype;
4073
4074     /* fake EOF */
4075     case 0:
4076         optype = PEG;
4077         if (PL_endwhite) {
4078             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4079             PL_endwhite = 0;
4080         }
4081         break;
4082
4083     case ']':
4084     case '}':
4085         if (PL_faketokens)
4086             break;
4087         /* remember any fake bracket that lexer is about to discard */
4088         if (PL_lex_brackets == 1 &&
4089             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4090         {
4091             s = PL_bufptr;
4092             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4093                 s++;
4094             if (*s == '}') {
4095                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4096                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4097                 PL_thiswhite = 0;
4098                 PL_bufptr = s - 1;
4099                 break;  /* don't bother looking for trailing comment */
4100             }
4101             else
4102                 s = PL_bufptr;
4103         }
4104         if (optype == ']')
4105             break;
4106         /* FALLTHROUGH */
4107
4108     /* attach a trailing comment to its statement instead of next token */
4109     case ';':
4110         if (PL_faketokens)
4111             break;
4112         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4113             s = PL_bufptr;
4114             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4115                 s++;
4116             if (*s == '\n' || *s == '#') {
4117                 while (s < PL_bufend && *s != '\n')
4118                     s++;
4119                 if (s < PL_bufend)
4120                     s++;
4121                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4122                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4123                 PL_thiswhite = 0;
4124                 PL_bufptr = s;
4125             }
4126         }
4127         break;
4128
4129     /* pval */
4130     case LABEL:
4131         break;
4132
4133     /* ival */
4134     default:
4135         break;
4136
4137     }
4138
4139     /* Create new token struct.  Note: opvals return early above. */
4140     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4141     PL_thismad = 0;
4142     return optype;
4143 }
4144 #endif
4145
4146 STATIC char *
4147 S_tokenize_use(pTHX_ int is_use, char *s) {
4148     dVAR;
4149
4150     PERL_ARGS_ASSERT_TOKENIZE_USE;
4151
4152     if (PL_expect != XSTATE)
4153         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4154                     is_use ? "use" : "no"));
4155     s = SKIPSPACE1(s);
4156     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4157         s = force_version(s, TRUE);
4158         if (*s == ';' || *s == '}'
4159                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4160             start_force(PL_curforce);
4161             NEXTVAL_NEXTTOKE.opval = NULL;
4162             force_next(WORD);
4163         }
4164         else if (*s == 'v') {
4165             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4166             s = force_version(s, FALSE);
4167         }
4168     }
4169     else {
4170         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4171         s = force_version(s, FALSE);
4172     }
4173     pl_yylval.ival = is_use;
4174     return s;
4175 }
4176 #ifdef DEBUGGING
4177     static const char* const exp_name[] =
4178         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4179           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4180         };
4181 #endif
4182
4183 /*
4184   yylex
4185
4186   Works out what to call the token just pulled out of the input
4187   stream.  The yacc parser takes care of taking the ops we return and
4188   stitching them into a tree.
4189
4190   Returns:
4191     PRIVATEREF
4192
4193   Structure:
4194       if read an identifier
4195           if we're in a my declaration
4196               croak if they tried to say my($foo::bar)
4197               build the ops for a my() declaration
4198           if it's an access to a my() variable
4199               are we in a sort block?
4200                   croak if my($a); $a <=> $b
4201               build ops for access to a my() variable
4202           if in a dq string, and they've said @foo and we can't find @foo
4203               croak
4204           build ops for a bareword
4205       if we already built the token before, use it.
4206 */
4207
4208
4209 #ifdef __SC__
4210 #pragma segment Perl_yylex
4211 #endif
4212 int
4213 Perl_yylex(pTHX)
4214 {
4215     dVAR;
4216     register char *s = PL_bufptr;
4217     register char *d;
4218     STRLEN len;
4219     bool bof = FALSE;
4220     U32 fake_eof = 0;
4221
4222     /* orig_keyword, gvp, and gv are initialized here because
4223      * jump to the label just_a_word_zero can bypass their
4224      * initialization later. */
4225     I32 orig_keyword = 0;
4226     GV *gv = NULL;
4227     GV **gvp = NULL;
4228
4229     DEBUG_T( {
4230         SV* tmp = newSVpvs("");
4231         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4232             (IV)CopLINE(PL_curcop),
4233             lex_state_names[PL_lex_state],
4234             exp_name[PL_expect],
4235             pv_display(tmp, s, strlen(s), 0, 60));
4236         SvREFCNT_dec(tmp);
4237     } );
4238     /* check if there's an identifier for us to look at */
4239     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4240         return REPORT(S_pending_ident(aTHX));
4241
4242     /* no identifier pending identification */
4243
4244     switch (PL_lex_state) {
4245 #ifdef COMMENTARY
4246     case LEX_NORMAL:            /* Some compilers will produce faster */
4247     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4248         break;
4249 #endif
4250
4251     /* when we've already built the next token, just pull it out of the queue */
4252     case LEX_KNOWNEXT:
4253 #ifdef PERL_MAD
4254         PL_lasttoke--;
4255         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4256         if (PL_madskills) {
4257             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4258             PL_nexttoke[PL_lasttoke].next_mad = 0;
4259             if (PL_thismad && PL_thismad->mad_key == '_') {
4260                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4261                 PL_thismad->mad_val = 0;
4262                 mad_free(PL_thismad);
4263                 PL_thismad = 0;
4264             }
4265         }
4266         if (!PL_lasttoke) {
4267             PL_lex_state = PL_lex_defer;
4268             PL_expect = PL_lex_expect;
4269             PL_lex_defer = LEX_NORMAL;
4270             if (!PL_nexttoke[PL_lasttoke].next_type)
4271                 return yylex();
4272         }
4273 #else
4274         PL_nexttoke--;
4275         pl_yylval = PL_nextval[PL_nexttoke];
4276         if (!PL_nexttoke) {
4277             PL_lex_state = PL_lex_defer;
4278             PL_expect = PL_lex_expect;
4279             PL_lex_defer = LEX_NORMAL;
4280         }
4281 #endif
4282 #ifdef PERL_MAD
4283         /* FIXME - can these be merged?  */
4284         return(PL_nexttoke[PL_lasttoke].next_type);
4285 #else
4286         return REPORT(PL_nexttype[PL_nexttoke]);
4287 #endif
4288
4289     /* interpolated case modifiers like \L \U, including \Q and \E.
4290        when we get here, PL_bufptr is at the \
4291     */
4292     case LEX_INTERPCASEMOD:
4293 #ifdef DEBUGGING
4294         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4295             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4296 #endif
4297         /* handle \E or end of string */
4298         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4299             /* if at a \E */
4300             if (PL_lex_casemods) {
4301                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4302                 PL_lex_casestack[PL_lex_casemods] = '\0';
4303
4304                 if (PL_bufptr != PL_bufend
4305                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4306                     PL_bufptr += 2;
4307                     PL_lex_state = LEX_INTERPCONCAT;
4308 #ifdef PERL_MAD
4309                     if (PL_madskills)
4310                         PL_thistoken = newSVpvs("\\E");
4311 #endif
4312                 }
4313                 return REPORT(')');
4314             }
4315 #ifdef PERL_MAD
4316             while (PL_bufptr != PL_bufend &&
4317               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4318                 if (!PL_thiswhite)
4319                     PL_thiswhite = newSVpvs("");
4320                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4321                 PL_bufptr += 2;
4322             }
4323 #else
4324             if (PL_bufptr != PL_bufend)
4325                 PL_bufptr += 2;
4326 #endif
4327             PL_lex_state = LEX_INTERPCONCAT;
4328             return yylex();
4329         }
4330         else {
4331             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4332               "### Saw case modifier\n"); });
4333             s = PL_bufptr + 1;
4334             if (s[1] == '\\' && s[2] == 'E') {
4335 #ifdef PERL_MAD
4336                 if (!PL_thiswhite)
4337                     PL_thiswhite = newSVpvs("");
4338                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4339 #endif
4340                 PL_bufptr = s + 3;
4341                 PL_lex_state = LEX_INTERPCONCAT;
4342                 return yylex();
4343             }
4344             else {
4345                 I32 tmp;
4346                 if (!PL_madskills) /* when just compiling don't need correct */
4347                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4348                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4349                 if ((*s == 'L' || *s == 'U') &&
4350                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4351                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4352                     return REPORT(')');
4353                 }
4354                 if (PL_lex_casemods > 10)
4355                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4356                 PL_lex_casestack[PL_lex_casemods++] = *s;
4357                 PL_lex_casestack[PL_lex_casemods] = '\0';
4358                 PL_lex_state = LEX_INTERPCONCAT;
4359                 start_force(PL_curforce);
4360                 NEXTVAL_NEXTTOKE.ival = 0;
4361                 force_next('(');
4362                 start_force(PL_curforce);
4363                 if (*s == 'l')
4364                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4365                 else if (*s == 'u')
4366                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4367                 else if (*s == 'L')
4368                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4369                 else if (*s == 'U')
4370                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4371                 else if (*s == 'Q')
4372                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4373                 else
4374                     Perl_croak(aTHX_ "panic: yylex");
4375                 if (PL_madskills) {
4376                     SV* const tmpsv = newSVpvs("\\ ");
4377                     /* replace the space with the character we want to escape
4378                      */
4379                     SvPVX(tmpsv)[1] = *s;
4380                     curmad('_', tmpsv);
4381                 }
4382                 PL_bufptr = s + 1;
4383             }
4384             force_next(FUNC);
4385             if (PL_lex_starts) {
4386                 s = PL_bufptr;
4387                 PL_lex_starts = 0;
4388 #ifdef PERL_MAD
4389                 if (PL_madskills) {
4390                     if (PL_thistoken)
4391                         sv_free(PL_thistoken);
4392                     PL_thistoken = newSVpvs("");
4393                 }
4394 #endif
4395                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4396                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4397                     OPERATOR(',');
4398                 else
4399                     Aop(OP_CONCAT);
4400             }
4401             else
4402                 return yylex();
4403         }
4404
4405     case LEX_INTERPPUSH:
4406         return REPORT(sublex_push());
4407
4408     case LEX_INTERPSTART:
4409         if (PL_bufptr == PL_bufend)
4410             return REPORT(sublex_done());
4411         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4412               "### Interpolated variable\n"); });
4413         PL_expect = XTERM;
4414         PL_lex_dojoin = (*PL_bufptr == '@');
4415         PL_lex_state = LEX_INTERPNORMAL;
4416         if (PL_lex_dojoin) {
4417             start_force(PL_curforce);
4418             NEXTVAL_NEXTTOKE.ival = 0;
4419             force_next(',');
4420             start_force(PL_curforce);
4421             force_ident("\"", '$');
4422             start_force(PL_curforce);
4423             NEXTVAL_NEXTTOKE.ival = 0;
4424             force_next('$');
4425             start_force(PL_curforce);
4426             NEXTVAL_NEXTTOKE.ival = 0;
4427             force_next('(');
4428             start_force(PL_curforce);
4429             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4430             force_next(FUNC);
4431         }
4432         if (PL_lex_starts++) {
4433             s = PL_bufptr;
4434 #ifdef PERL_MAD
4435             if (PL_madskills) {
4436                 if (PL_thistoken)
4437                     sv_free(PL_thistoken);
4438                 PL_thistoken = newSVpvs("");
4439             }
4440 #endif
4441             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4442             if (!PL_lex_casemods && PL_lex_inpat)
4443                 OPERATOR(',');
4444             else
4445                 Aop(OP_CONCAT);
4446         }
4447         return yylex();
4448
4449     case LEX_INTERPENDMAYBE:
4450         if (intuit_more(PL_bufptr)) {
4451             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4452             break;
4453         }
4454         /* FALL THROUGH */
4455
4456     case LEX_INTERPEND:
4457         if (PL_lex_dojoin) {
4458             PL_lex_dojoin = FALSE;
4459             PL_lex_state = LEX_INTERPCONCAT;
4460 #ifdef PERL_MAD
4461             if (PL_madskills) {
4462                 if (PL_thistoken)
4463                     sv_free(PL_thistoken);
4464                 PL_thistoken = newSVpvs("");
4465             }
4466 #endif
4467             return REPORT(')');
4468         }
4469         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4470             && SvEVALED(PL_lex_repl))
4471         {
4472             if (PL_bufptr != PL_bufend)
4473                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4474             PL_lex_repl = NULL;
4475         }
4476         /* FALLTHROUGH */
4477     case LEX_INTERPCONCAT:
4478 #ifdef DEBUGGING
4479         if (PL_lex_brackets)
4480             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4481 #endif
4482         if (PL_bufptr == PL_bufend)
4483             return REPORT(sublex_done());
4484
4485         if (SvIVX(PL_linestr) == '\'') {
4486             SV *sv = newSVsv(PL_linestr);
4487             if (!PL_lex_inpat)
4488                 sv = tokeq(sv);
4489             else if ( PL_hints & HINT_NEW_RE )
4490                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4491             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4492             s = PL_bufend;
4493         }
4494         else {
4495             s = scan_const(PL_bufptr);
4496             if (*s == '\\')
4497                 PL_lex_state = LEX_INTERPCASEMOD;
4498             else
4499                 PL_lex_state = LEX_INTERPSTART;
4500         }
4501
4502         if (s != PL_bufptr) {
4503             start_force(PL_curforce);
4504             if (PL_madskills) {
4505                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4506             }
4507             NEXTVAL_NEXTTOKE = pl_yylval;
4508             PL_expect = XTERM;
4509             force_next(THING);
4510             if (PL_lex_starts++) {
4511 #ifdef PERL_MAD
4512                 if (PL_madskills) {
4513                     if (PL_thistoken)
4514                         sv_free(PL_thistoken);
4515                     PL_thistoken = newSVpvs("");
4516                 }
4517 #endif
4518                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4519                 if (!PL_lex_casemods && PL_lex_inpat)
4520                     OPERATOR(',');
4521                 else
4522                     Aop(OP_CONCAT);
4523             }
4524             else {
4525                 PL_bufptr = s;
4526                 return yylex();
4527             }
4528         }
4529
4530         return yylex();
4531     case LEX_FORMLINE:
4532         PL_lex_state = LEX_NORMAL;
4533         s = scan_formline(PL_bufptr);
4534         if (!PL_lex_formbrack)
4535             goto rightbracket;
4536         OPERATOR(';');
4537     }
4538
4539     s = PL_bufptr;
4540     PL_oldoldbufptr = PL_oldbufptr;
4541     PL_oldbufptr = s;
4542
4543   retry:
4544 #ifdef PERL_MAD
4545     if (PL_thistoken) {
4546         sv_free(PL_thistoken);
4547         PL_thistoken = 0;
4548     }
4549     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4550 #endif
4551     switch (*s) {
4552     default:
4553         if (isIDFIRST_lazy_if(s,UTF))
4554             goto keylookup;
4555         {
4556         unsigned char c = *s;
4557         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4558         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4559             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4560         } else {
4561             d = PL_linestart;
4562         }
4563         *s = '\0';
4564         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4565     }
4566     case 4:
4567     case 26:
4568         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4569     case 0:
4570 #ifdef PERL_MAD
4571         if (PL_madskills)
4572             PL_faketokens = 0;
4573 #endif
4574         if (!PL_rsfp) {
4575             PL_last_uni = 0;
4576             PL_last_lop = 0;
4577             if (PL_lex_brackets) {
4578                 yyerror((const char *)
4579                         (PL_lex_formbrack
4580                          ? "Format not terminated"
4581                          : "Missing right curly or square bracket"));
4582             }
4583             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4584                         "### Tokener got EOF\n");
4585             } );
4586             TOKEN(0);
4587         }
4588         if (s++ < PL_bufend)
4589             goto retry;                 /* ignore stray nulls */
4590         PL_last_uni = 0;
4591         PL_last_lop = 0;
4592         if (!PL_in_eval && !PL_preambled) {
4593             PL_preambled = TRUE;
4594 #ifdef PERL_MAD
4595             if (PL_madskills)
4596                 PL_faketokens = 1;
4597 #endif
4598             if (PL_perldb) {
4599                 /* Generate a string of Perl code to load the debugger.
4600                  * If PERL5DB is set, it will return the contents of that,
4601                  * otherwise a compile-time require of perl5db.pl.  */
4602
4603                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4604
4605                 if (pdb) {
4606                     sv_setpv(PL_linestr, pdb);
4607                     sv_catpvs(PL_linestr,";");
4608                 } else {
4609                     SETERRNO(0,SS_NORMAL);
4610                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4611                 }
4612             } else
4613                 sv_setpvs(PL_linestr,"");
4614             if (PL_preambleav) {
4615                 SV **svp = AvARRAY(PL_preambleav);
4616                 SV **const end = svp + AvFILLp(PL_preambleav);
4617                 while(svp <= end) {
4618                     sv_catsv(PL_linestr, *svp);
4619                     ++svp;
4620                     sv_catpvs(PL_linestr, ";");
4621                 }
4622                 sv_free(MUTABLE_SV(PL_preambleav));
4623                 PL_preambleav = NULL;
4624             }
4625             if (PL_minus_E)
4626                 sv_catpvs(PL_linestr,
4627                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4628             if (PL_minus_n || PL_minus_p) {
4629                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4630                 if (PL_minus_l)
4631                     sv_catpvs(PL_linestr,"chomp;");
4632                 if (PL_minus_a) {
4633                     if (PL_minus_F) {
4634                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4635                              || *PL_splitstr == '"')
4636                               && strchr(PL_splitstr + 1, *PL_splitstr))
4637                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4638                         else {
4639                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4640                                bytes can be used as quoting characters.  :-) */
4641                             const char *splits = PL_splitstr;
4642                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4643                             do {
4644                                 /* Need to \ \s  */
4645                                 if (*splits == '\\')
4646                                     sv_catpvn(PL_linestr, splits, 1);
4647                                 sv_catpvn(PL_linestr, splits, 1);
4648                             } while (*splits++);
4649                             /* This loop will embed the trailing NUL of
4650                                PL_linestr as the last thing it does before
4651                                terminating.  */
4652                             sv_catpvs(PL_linestr, ");");
4653                         }
4654                     }
4655                     else
4656                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4657                 }
4658             }
4659             sv_catpvs(PL_linestr, "\n");
4660             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4661             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4662             PL_last_lop = PL_last_uni = NULL;
4663             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4664                 update_debugger_info(PL_linestr, NULL, 0);
4665             goto retry;
4666         }
4667         do {
4668             fake_eof = 0;
4669             bof = PL_rsfp ? TRUE : FALSE;
4670             if (0) {
4671               fake_eof:
4672                 fake_eof = LEX_FAKE_EOF;
4673             }
4674             PL_bufptr = PL_bufend;
4675             CopLINE_inc(PL_curcop);
4676             if (!lex_next_chunk(fake_eof)) {
4677                 CopLINE_dec(PL_curcop);
4678                 s = PL_bufptr;
4679                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4680             }
4681             CopLINE_dec(PL_curcop);
4682 #ifdef PERL_MAD
4683             if (!PL_rsfp)
4684                 PL_realtokenstart = -1;
4685 #endif
4686             s = PL_bufptr;
4687             /* If it looks like the start of a BOM or raw UTF-16,
4688              * check if it in fact is. */
4689             if (bof && PL_rsfp &&
4690                      (*s == 0 ||
4691                       *(U8*)s == 0xEF ||
4692                       *(U8*)s >= 0xFE ||
4693                       s[1] == 0)) {
4694                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4695                 if (bof) {
4696                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4697                     s = swallow_bom((U8*)s);
4698                 }
4699             }
4700             if (PL_doextract) {
4701                 /* Incest with pod. */
4702 #ifdef PERL_MAD
4703                 if (PL_madskills)
4704                     sv_catsv(PL_thiswhite, PL_linestr);
4705 #endif
4706                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4707                     sv_setpvs(PL_linestr, "");
4708                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4709                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4710                     PL_last_lop = PL_last_uni = NULL;
4711                     PL_doextract = FALSE;
4712                 }
4713             }
4714             if (PL_rsfp)
4715                 incline(s);
4716         } while (PL_doextract);
4717         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4718         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4719         PL_last_lop = PL_last_uni = NULL;
4720         if (CopLINE(PL_curcop) == 1) {
4721             while (s < PL_bufend && isSPACE(*s))
4722                 s++;
4723             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4724                 s++;
4725 #ifdef PERL_MAD
4726             if (PL_madskills)
4727                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4728 #endif
4729             d = NULL;
4730             if (!PL_in_eval) {
4731                 if (*s == '#' && *(s+1) == '!')
4732                     d = s + 2;
4733 #ifdef ALTERNATE_SHEBANG
4734                 else {
4735                     static char const as[] = ALTERNATE_SHEBANG;
4736                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4737                         d = s + (sizeof(as) - 1);
4738                 }
4739 #endif /* ALTERNATE_SHEBANG */
4740             }
4741             if (d) {
4742                 char *ipath;
4743                 char *ipathend;
4744
4745                 while (isSPACE(*d))
4746                     d++;
4747                 ipath = d;
4748                 while (*d && !isSPACE(*d))
4749                     d++;
4750                 ipathend = d;
4751
4752 #ifdef ARG_ZERO_IS_SCRIPT
4753                 if (ipathend > ipath) {
4754                     /*
4755                      * HP-UX (at least) sets argv[0] to the script name,
4756                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4757                      * at least, set argv[0] to the basename of the Perl
4758                      * interpreter. So, having found "#!", we'll set it right.
4759                      */
4760                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4761                                                     SVt_PV)); /* $^X */
4762                     assert(SvPOK(x) || SvGMAGICAL(x));
4763                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4764                         sv_setpvn(x, ipath, ipathend - ipath);
4765                         SvSETMAGIC(x);
4766                     }
4767                     else {
4768                         STRLEN blen;
4769                         STRLEN llen;
4770                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4771                         const char * const lstart = SvPV_const(x,llen);
4772                         if (llen < blen) {
4773                             bstart += blen - llen;
4774                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4775                                 sv_setpvn(x, ipath, ipathend - ipath);
4776                                 SvSETMAGIC(x);
4777                             }
4778                         }
4779                     }
4780                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4781                 }
4782 #endif /* ARG_ZERO_IS_SCRIPT */
4783
4784                 /*
4785                  * Look for options.
4786                  */
4787                 d = instr(s,"perl -");
4788                 if (!d) {
4789                     d = instr(s,"perl");
4790 #if defined(DOSISH)
4791                     /* avoid getting into infinite loops when shebang
4792                      * line contains "Perl" rather than "perl" */
4793                     if (!d) {
4794                         for (d = ipathend-4; d >= ipath; --d) {
4795                             if ((*d == 'p' || *d == 'P')
4796                                 && !ibcmp(d, "perl", 4))
4797                             {
4798                                 break;
4799                             }
4800                         }
4801                         if (d < ipath)
4802                             d = NULL;
4803                     }
4804 #endif
4805                 }
4806 #ifdef ALTERNATE_SHEBANG
4807                 /*
4808                  * If the ALTERNATE_SHEBANG on this system starts with a
4809                  * character that can be part of a Perl expression, then if
4810                  * we see it but not "perl", we're probably looking at the
4811                  * start of Perl code, not a request to hand off to some
4812                  * other interpreter.  Similarly, if "perl" is there, but
4813                  * not in the first 'word' of the line, we assume the line
4814                  * contains the start of the Perl program.
4815                  */
4816                 if (d && *s != '#') {
4817                     const char *c = ipath;
4818                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4819                         c++;
4820                     if (c < d)
4821                         d = NULL;       /* "perl" not in first word; ignore */
4822                     else
4823                         *s = '#';       /* Don't try to parse shebang line */
4824                 }
4825 #endif /* ALTERNATE_SHEBANG */
4826                 if (!d &&
4827                     *s == '#' &&
4828                     ipathend > ipath &&
4829                     !PL_minus_c &&
4830                     !instr(s,"indir") &&
4831                     instr(PL_origargv[0],"perl"))
4832                 {
4833                     dVAR;
4834                     char **newargv;
4835
4836                     *ipathend = '\0';
4837                     s = ipathend + 1;
4838                     while (s < PL_bufend && isSPACE(*s))
4839                         s++;
4840                     if (s < PL_bufend) {
4841                         Newx(newargv,PL_origargc+3,char*);
4842                         newargv[1] = s;
4843                         while (s < PL_bufend && !isSPACE(*s))
4844                             s++;
4845                         *s = '\0';
4846                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4847                     }
4848                     else
4849                         newargv = PL_origargv;
4850                     newargv[0] = ipath;
4851                     PERL_FPU_PRE_EXEC
4852                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4853                     PERL_FPU_POST_EXEC
4854                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4855                 }
4856                 if (d) {
4857                     while (*d && !isSPACE(*d))
4858                         d++;
4859                     while (SPACE_OR_TAB(*d))
4860                         d++;
4861
4862                     if (*d++ == '-') {
4863                         const bool switches_done = PL_doswitches;
4864                         const U32 oldpdb = PL_perldb;
4865                         const bool oldn = PL_minus_n;
4866                         const bool oldp = PL_minus_p;
4867                         const char *d1 = d;
4868
4869                         do {
4870                             bool baduni = FALSE;
4871                             if (*d1 == 'C') {
4872                                 const char *d2 = d1 + 1;
4873                                 if (parse_unicode_opts((const char **)&d2)
4874                                     != PL_unicode)
4875                                     baduni = TRUE;
4876                             }
4877                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4878                                 const char * const m = d1;
4879                                 while (*d1 && !isSPACE(*d1))
4880                                     d1++;
4881                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4882                                       (int)(d1 - m), m);
4883                             }
4884                             d1 = moreswitches(d1);
4885                         } while (d1);
4886                         if (PL_doswitches && !switches_done) {
4887                             int argc = PL_origargc;
4888                             char **argv = PL_origargv;
4889                             do {
4890                                 argc--,argv++;
4891                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4892                             init_argv_symbols(argc,argv);
4893                         }
4894                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4895                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4896                               /* if we have already added "LINE: while (<>) {",
4897                                  we must not do it again */
4898                         {
4899                             sv_setpvs(PL_linestr, "");
4900                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4901                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4902                             PL_last_lop = PL_last_uni = NULL;
4903                             PL_preambled = FALSE;
4904                             if (PERLDB_LINE || PERLDB_SAVESRC)
4905                                 (void)gv_fetchfile(PL_origfilename);
4906                             goto retry;
4907                         }
4908                     }
4909                 }
4910             }
4911         }
4912         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4913             PL_bufptr = s;
4914             PL_lex_state = LEX_FORMLINE;
4915             return yylex();
4916         }
4917         goto retry;
4918     case '\r':
4919 #ifdef PERL_STRICT_CR
4920         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4921         Perl_croak(aTHX_
4922       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4923 #endif
4924     case ' ': case '\t': case '\f': case 013:
4925 #ifdef PERL_MAD
4926         PL_realtokenstart = -1;
4927         if (!PL_thiswhite)
4928             PL_thiswhite = newSVpvs("");
4929         sv_catpvn(PL_thiswhite, s, 1);
4930 #endif
4931         s++;
4932         goto retry;
4933     case '#':
4934     case '\n':
4935 #ifdef PERL_MAD
4936         PL_realtokenstart = -1;
4937         if (PL_madskills)
4938             PL_faketokens = 0;
4939 #endif
4940         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4941             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4942                 /* handle eval qq[#line 1 "foo"\n ...] */
4943                 CopLINE_dec(PL_curcop);
4944                 incline(s);
4945             }
4946             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4947                 s = SKIPSPACE0(s);
4948                 if (!PL_in_eval || PL_rsfp)
4949                     incline(s);
4950             }
4951             else {
4952                 d = s;
4953                 while (d < PL_bufend && *d != '\n')
4954                     d++;
4955                 if (d < PL_bufend)
4956                     d++;
4957                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4958                   Perl_croak(aTHX_ "panic: input overflow");
4959 #ifdef PERL_MAD
4960                 if (PL_madskills)
4961                     PL_thiswhite = newSVpvn(s, d - s);
4962 #endif
4963                 s = d;
4964                 incline(s);
4965             }
4966             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4967                 PL_bufptr = s;
4968                 PL_lex_state = LEX_FORMLINE;
4969                 return yylex();
4970             }
4971         }
4972         else {
4973 #ifdef PERL_MAD
4974             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4975                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4976                     PL_faketokens = 0;
4977                     s = SKIPSPACE0(s);
4978                     TOKEN(PEG); /* make sure any #! line is accessible */
4979                 }
4980                 s = SKIPSPACE0(s);
4981             }
4982             else {
4983 /*              if (PL_madskills && PL_lex_formbrack) { */
4984                     d = s;
4985                     while (d < PL_bufend && *d != '\n')
4986                         d++;
4987                     if (d < PL_bufend)
4988                         d++;
4989                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4990                       Perl_croak(aTHX_ "panic: input overflow");
4991                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4992                         if (!PL_thiswhite)
4993                             PL_thiswhite = newSVpvs("");
4994                         if (CopLINE(PL_curcop) == 1) {
4995                             sv_setpvs(PL_thiswhite, "");
4996                             PL_faketokens = 0;
4997                         }
4998                         sv_catpvn(PL_thiswhite, s, d - s);
4999                     }
5000                     s = d;
5001 /*              }
5002                 *s = '\0';
5003                 PL_bufend = s; */
5004             }
5005 #else
5006             *s = '\0';
5007             PL_bufend = s;
5008 #endif
5009         }
5010         goto retry;
5011     case '-':
5012         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5013             I32 ftst = 0;
5014             char tmp;
5015
5016             s++;
5017             PL_bufptr = s;
5018             tmp = *s++;
5019
5020             while (s < PL_bufend && SPACE_OR_TAB(*s))
5021                 s++;
5022
5023             if (strnEQ(s,"=>",2)) {
5024                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5025                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5026                 OPERATOR('-');          /* unary minus */
5027             }
5028             PL_last_uni = PL_oldbufptr;
5029             switch (tmp) {
5030             case 'r': ftst = OP_FTEREAD;        break;
5031             case 'w': ftst = OP_FTEWRITE;       break;
5032             case 'x': ftst = OP_FTEEXEC;        break;
5033             case 'o': ftst = OP_FTEOWNED;       break;
5034             case 'R': ftst = OP_FTRREAD;        break;
5035             case 'W': ftst = OP_FTRWRITE;       break;
5036             case 'X': ftst = OP_FTREXEC;        break;
5037             case 'O': ftst = OP_FTROWNED;       break;
5038             case 'e': ftst = OP_FTIS;           break;
5039             case 'z': ftst = OP_FTZERO;         break;
5040             case 's': ftst = OP_FTSIZE;         break;
5041             case 'f': ftst = OP_FTFILE;         break;
5042             case 'd': ftst = OP_FTDIR;          break;
5043             case 'l': ftst = OP_FTLINK;         break;
5044             case 'p': ftst = OP_FTPIPE;         break;
5045             case 'S': ftst = OP_FTSOCK;         break;
5046             case 'u': ftst = OP_FTSUID;         break;
5047             case 'g': ftst = OP_FTSGID;         break;
5048             case 'k': ftst = OP_FTSVTX;         break;
5049             case 'b': ftst = OP_FTBLK;          break;
5050             case 'c': ftst = OP_FTCHR;          break;
5051             case 't': ftst = OP_FTTTY;          break;
5052             case 'T': ftst = OP_FTTEXT;         break;
5053             case 'B': ftst = OP_FTBINARY;       break;
5054             case 'M': case 'A': case 'C':
5055                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5056                 switch (tmp) {
5057                 case 'M': ftst = OP_FTMTIME;    break;
5058                 case 'A': ftst = OP_FTATIME;    break;
5059                 case 'C': ftst = OP_FTCTIME;    break;
5060                 default:                        break;
5061                 }
5062                 break;
5063             default:
5064                 break;
5065             }
5066             if (ftst) {
5067                 PL_last_lop_op = (OPCODE)ftst;
5068                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5069                         "### Saw file test %c\n", (int)tmp);
5070                 } );
5071                 FTST(ftst);
5072             }
5073             else {
5074                 /* Assume it was a minus followed by a one-letter named
5075                  * subroutine call (or a -bareword), then. */
5076                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5077                         "### '-%c' looked like a file test but was not\n",
5078                         (int) tmp);
5079                 } );
5080                 s = --PL_bufptr;
5081             }
5082         }
5083         {
5084             const char tmp = *s++;
5085             if (*s == tmp) {
5086                 s++;
5087                 if (PL_expect == XOPERATOR)
5088                     TERM(POSTDEC);
5089                 else
5090                     OPERATOR(PREDEC);
5091             }
5092             else if (*s == '>') {
5093                 s++;
5094                 s = SKIPSPACE1(s);
5095                 if (isIDFIRST_lazy_if(s,UTF)) {
5096                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5097                     TOKEN(ARROW);
5098                 }
5099                 else if (*s == '$')
5100                     OPERATOR(ARROW);
5101                 else
5102                     TERM(ARROW);
5103             }
5104             if (PL_expect == XOPERATOR)
5105                 Aop(OP_SUBTRACT);
5106             else {
5107                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5108                     check_uni();
5109                 OPERATOR('-');          /* unary minus */
5110             }
5111         }
5112
5113     case '+':
5114         {
5115             const char tmp = *s++;
5116             if (*s == tmp) {
5117                 s++;
5118                 if (PL_expect == XOPERATOR)
5119                     TERM(POSTINC);
5120                 else
5121                     OPERATOR(PREINC);
5122             }
5123             if (PL_expect == XOPERATOR)
5124                 Aop(OP_ADD);
5125             else {
5126                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5127                     check_uni();
5128                 OPERATOR('+');
5129             }
5130         }
5131
5132     case '*':
5133         if (PL_expect != XOPERATOR) {
5134             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5135             PL_expect = XOPERATOR;
5136             force_ident(PL_tokenbuf, '*');
5137             if (!*PL_tokenbuf)
5138                 PREREF('*');
5139             TERM('*');
5140         }
5141         s++;
5142         if (*s == '*') {
5143             s++;
5144             PWop(OP_POW);
5145         }
5146         Mop(OP_MULTIPLY);
5147
5148     case '%':
5149         if (PL_expect == XOPERATOR) {
5150             ++s;
5151             Mop(OP_MODULO);
5152         }
5153         PL_tokenbuf[0] = '%';
5154         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5155                 sizeof PL_tokenbuf - 1, FALSE);
5156         if (!PL_tokenbuf[1]) {
5157             PREREF('%');
5158         }
5159         PL_pending_ident = '%';
5160         TERM('%');
5161
5162     case '^':
5163         s++;
5164         BOop(OP_BIT_XOR);
5165     case '[':
5166         PL_lex_brackets++;
5167         {
5168             const char tmp = *s++;
5169             OPERATOR(tmp);
5170         }
5171     case '~':
5172         if (s[1] == '~'
5173             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5174         {
5175             s += 2;
5176             Eop(OP_SMARTMATCH);
5177         }
5178     case ',':
5179         {
5180             const char tmp = *s++;
5181             OPERATOR(tmp);
5182         }
5183     case ':':
5184         if (s[1] == ':') {
5185             len = 0;
5186             goto just_a_word_zero_gv;
5187         }
5188         s++;
5189         switch (PL_expect) {
5190             OP *attrs;
5191 #ifdef PERL_MAD
5192             I32 stuffstart;
5193 #endif
5194         case XOPERATOR:
5195             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5196                 break;
5197             PL_bufptr = s;      /* update in case we back off */
5198             if (*s == '=') {
5199                 deprecate(":= for an empty attribute list");
5200             }
5201             goto grabattrs;
5202         case XATTRBLOCK:
5203             PL_expect = XBLOCK;
5204             goto grabattrs;
5205         case XATTRTERM:
5206             PL_expect = XTERMBLOCK;
5207          grabattrs:
5208 #ifdef PERL_MAD
5209             stuffstart = s - SvPVX(PL_linestr) - 1;
5210 #endif
5211             s = PEEKSPACE(s);
5212             attrs = NULL;
5213             while (isIDFIRST_lazy_if(s,UTF)) {
5214                 I32 tmp;
5215                 SV *sv;
5216                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5217                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5218                     if (tmp < 0) tmp = -tmp;
5219                     switch (tmp) {
5220                     case KEY_or:
5221                     case KEY_and:
5222                     case KEY_for:
5223                     case KEY_foreach:
5224                     case KEY_unless:
5225                     case KEY_if:
5226                     case KEY_while:
5227                     case KEY_until:
5228                         goto got_attrs;
5229                     default:
5230                         break;
5231                     }
5232                 }
5233                 sv = newSVpvn(s, len);
5234                 if (*d == '(') {
5235                     d = scan_str(d,TRUE,TRUE);
5236                     if (!d) {
5237                         /* MUST advance bufptr here to avoid bogus
5238                            "at end of line" context messages from yyerror().
5239                          */
5240                         PL_bufptr = s + len;
5241                         yyerror("Unterminated attribute parameter in attribute list");
5242                         if (attrs)
5243                             op_free(attrs);
5244                         sv_free(sv);
5245                         return REPORT(0);       /* EOF indicator */
5246                     }
5247                 }
5248                 if (PL_lex_stuff) {
5249                     sv_catsv(sv, PL_lex_stuff);
5250                     attrs = append_elem(OP_LIST, attrs,
5251                                         newSVOP(OP_CONST, 0, sv));
5252                     SvREFCNT_dec(PL_lex_stuff);
5253                     PL_lex_stuff = NULL;
5254                 }
5255                 else {
5256                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5257                         sv_free(sv);
5258                         if (PL_in_my == KEY_our) {
5259                             deprecate(":unique");
5260                         }
5261                         else
5262                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5263                     }
5264
5265                     /* NOTE: any CV attrs applied here need to be part of
5266                        the CVf_BUILTIN_ATTRS define in cv.h! */
5267                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5268                         sv_free(sv);
5269                         CvLVALUE_on(PL_compcv);
5270                     }
5271                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5272                         sv_free(sv);
5273                         deprecate(":locked");
5274                     }
5275                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5276                         sv_free(sv);
5277                         CvMETHOD_on(PL_compcv);
5278                     }
5279                     /* After we've set the flags, it could be argued that
5280                        we don't need to do the attributes.pm-based setting
5281                        process, and shouldn't bother appending recognized
5282                        flags.  To experiment with that, uncomment the
5283                        following "else".  (Note that's already been
5284                        uncommented.  That keeps the above-applied built-in
5285                        attributes from being intercepted (and possibly
5286                        rejected) by a package's attribute routines, but is
5287                        justified by the performance win for the common case
5288                        of applying only built-in attributes.) */
5289                     else
5290                         attrs = append_elem(OP_LIST, attrs,
5291                                             newSVOP(OP_CONST, 0,
5292                                                     sv));
5293                 }
5294                 s = PEEKSPACE(d);
5295                 if (*s == ':' && s[1] != ':')
5296                     s = PEEKSPACE(s+1);
5297                 else if (s == d)
5298                     break;      /* require real whitespace or :'s */
5299                 /* XXX losing whitespace on sequential attributes here */
5300             }
5301             {
5302                 const char tmp
5303                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5304                 if (*s != ';' && *s != '}' && *s != tmp
5305                     && (tmp != '=' || *s != ')')) {
5306                     const char q = ((*s == '\'') ? '"' : '\'');
5307                     /* If here for an expression, and parsed no attrs, back
5308                        off. */
5309                     if (tmp == '=' && !attrs) {
5310                         s = PL_bufptr;
5311                         break;
5312                     }
5313                     /* MUST advance bufptr here to avoid bogus "at end of line"
5314                        context messages from yyerror().
5315                     */
5316                     PL_bufptr = s;
5317                     yyerror( (const char *)
5318                              (*s
5319                               ? Perl_form(aTHX_ "Invalid separator character "
5320                                           "%c%c%c in attribute list", q, *s, q)
5321                               : "Unterminated attribute list" ) );
5322                     if (attrs)
5323                         op_free(attrs);
5324                     OPERATOR(':');
5325                 }
5326             }
5327         got_attrs:
5328             if (attrs) {
5329                 start_force(PL_curforce);
5330                 NEXTVAL_NEXTTOKE.opval = attrs;
5331                 CURMAD('_', PL_nextwhite);
5332                 force_next(THING);
5333             }
5334 #ifdef PERL_MAD
5335             if (PL_madskills) {
5336                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5337                                      (s - SvPVX(PL_linestr)) - stuffstart);
5338             }
5339 #endif
5340             TOKEN(COLONATTR);
5341         }
5342         OPERATOR(':');
5343     case '(':
5344         s++;
5345         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5346             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5347         else
5348             PL_expect = XTERM;
5349         s = SKIPSPACE1(s);
5350         TOKEN('(');
5351     case ';':
5352         CLINE;
5353         {
5354             const char tmp = *s++;
5355             OPERATOR(tmp);
5356         }
5357     case ')':
5358         {
5359             const char tmp = *s++;
5360             s = SKIPSPACE1(s);
5361             if (*s == '{')
5362                 PREBLOCK(tmp);
5363             TERM(tmp);
5364         }
5365     case ']':
5366         s++;
5367         if (PL_lex_brackets <= 0)
5368             yyerror("Unmatched right square bracket");
5369         else
5370             --PL_lex_brackets;
5371         if (PL_lex_state == LEX_INTERPNORMAL) {
5372             if (PL_lex_brackets == 0) {
5373                 if (*s == '-' && s[1] == '>')
5374                     PL_lex_state = LEX_INTERPENDMAYBE;
5375                 else if (*s != '[' && *s != '{')
5376                     PL_lex_state = LEX_INTERPEND;
5377             }
5378         }
5379         TERM(']');
5380     case '{':
5381       leftbracket:
5382         s++;
5383         if (PL_lex_brackets > 100) {
5384             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5385         }
5386         switch (PL_expect) {
5387         case XTERM:
5388             if (PL_lex_formbrack) {
5389                 s--;
5390                 PRETERMBLOCK(DO);
5391             }
5392             if (PL_oldoldbufptr == PL_last_lop)
5393                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5394             else
5395                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5396             OPERATOR(HASHBRACK);
5397         case XOPERATOR:
5398             while (s < PL_bufend && SPACE_OR_TAB(*s))
5399                 s++;
5400             d = s;
5401             PL_tokenbuf[0] = '\0';
5402             if (d < PL_bufend && *d == '-') {
5403                 PL_tokenbuf[0] = '-';
5404                 d++;
5405                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5406                     d++;
5407             }
5408             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5409                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5410                               FALSE, &len);
5411                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5412                     d++;
5413                 if (*d == '}') {
5414                     const char minus = (PL_tokenbuf[0] == '-');
5415                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5416                     if (minus)
5417                         force_next('-');
5418                 }
5419             }
5420             /* FALL THROUGH */
5421         case XATTRBLOCK:
5422         case XBLOCK:
5423             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5424             PL_expect = XSTATE;
5425             break;
5426         case XATTRTERM:
5427         case XTERMBLOCK:
5428             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5429             PL_expect = XSTATE;
5430             break;
5431         default: {
5432                 const char *t;
5433                 if (PL_oldoldbufptr == PL_last_lop)
5434                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5435                 else
5436                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5437                 s = SKIPSPACE1(s);
5438                 if (*s == '}') {
5439                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5440                         PL_expect = XTERM;
5441                         /* This hack is to get the ${} in the message. */
5442                         PL_bufptr = s+1;
5443                         yyerror("syntax error");
5444                         break;
5445                     }
5446                     OPERATOR(HASHBRACK);
5447                 }
5448                 /* This hack serves to disambiguate a pair of curlies
5449                  * as being a block or an anon hash.  Normally, expectation
5450                  * determines that, but in cases where we're not in a
5451                  * position to expect anything in particular (like inside
5452                  * eval"") we have to resolve the ambiguity.  This code
5453                  * covers the case where the first term in the curlies is a
5454                  * quoted string.  Most other cases need to be explicitly
5455                  * disambiguated by prepending a "+" before the opening
5456                  * curly in order to force resolution as an anon hash.
5457                  *
5458                  * XXX should probably propagate the outer expectation
5459                  * into eval"" to rely less on this hack, but that could
5460                  * potentially break current behavior of eval"".
5461                  * GSAR 97-07-21
5462                  */
5463                 t = s;
5464                 if (*s == '\'' || *s == '"' || *s == '`') {
5465                     /* common case: get past first string, handling escapes */
5466                     for (t++; t < PL_bufend && *t != *s;)
5467                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5468                             t++;
5469                     t++;
5470                 }
5471                 else if (*s == 'q') {
5472                     if (++t < PL_bufend
5473                         && (!isALNUM(*t)
5474                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5475                                 && !isALNUM(*t))))
5476                     {
5477                         /* skip q//-like construct */
5478                         const char *tmps;
5479                         char open, close, term;
5480                         I32 brackets = 1;
5481
5482                         while (t < PL_bufend && isSPACE(*t))
5483                             t++;
5484                         /* check for q => */
5485                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5486                             OPERATOR(HASHBRACK);
5487                         }
5488                         term = *t;
5489                         open = term;
5490                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5491                             term = tmps[5];
5492                         close = term;
5493                         if (open == close)
5494                             for (t++; t < PL_bufend; t++) {
5495                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5496                                     t++;
5497                                 else if (*t == open)
5498                                     break;
5499                             }
5500                         else {
5501                             for (t++; t < PL_bufend; t++) {
5502                                 if (*t == '\\' && t+1 < PL_bufend)
5503                                     t++;
5504                                 else if (*t == close && --brackets <= 0)
5505                                     break;
5506                                 else if (*t == open)
5507                                     brackets++;
5508                             }
5509                         }
5510                         t++;
5511                     }
5512                     else
5513                         /* skip plain q word */
5514                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5515                              t += UTF8SKIP(t);
5516                 }
5517                 else if (isALNUM_lazy_if(t,UTF)) {
5518                     t += UTF8SKIP(t);
5519                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5520                          t += UTF8SKIP(t);
5521                 }
5522                 while (t < PL_bufend && isSPACE(*t))
5523                     t++;
5524                 /* if comma follows first term, call it an anon hash */
5525                 /* XXX it could be a comma expression with loop modifiers */
5526                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5527                                    || (*t == '=' && t[1] == '>')))
5528                     OPERATOR(HASHBRACK);
5529                 if (PL_expect == XREF)
5530                     PL_expect = XTERM;
5531                 else {
5532                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5533                     PL_expect = XSTATE;
5534                 }
5535             }
5536             break;
5537         }
5538         pl_yylval.ival = CopLINE(PL_curcop);
5539         if (isSPACE(*s) || *s == '#')
5540             PL_copline = NOLINE;   /* invalidate current command line number */
5541         TOKEN('{');
5542     case '}':
5543       rightbracket:
5544         s++;
5545         if (PL_lex_brackets <= 0)
5546             yyerror("Unmatched right curly bracket");
5547         else
5548             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5549         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5550             PL_lex_formbrack = 0;
5551         if (PL_lex_state == LEX_INTERPNORMAL) {
5552             if (PL_lex_brackets == 0) {
5553                 if (PL_expect & XFAKEBRACK) {
5554                     PL_expect &= XENUMMASK;
5555                     PL_lex_state = LEX_INTERPEND;
5556                     PL_bufptr = s;
5557 #if 0
5558                     if (PL_madskills) {
5559                         if (!PL_thiswhite)
5560                             PL_thiswhite = newSVpvs("");
5561                         sv_catpvs(PL_thiswhite,"}");
5562                     }
5563 #endif
5564                     return yylex();     /* ignore fake brackets */
5565                 }
5566                 if (*s == '-' && s[1] == '>')
5567                     PL_lex_state = LEX_INTERPENDMAYBE;
5568                 else if (*s != '[' && *s != '{')
5569                     PL_lex_state = LEX_INTERPEND;
5570             }
5571         }
5572         if (PL_expect & XFAKEBRACK) {
5573             PL_expect &= XENUMMASK;
5574             PL_bufptr = s;
5575             return yylex();             /* ignore fake brackets */
5576         }
5577         start_force(PL_curforce);
5578         if (PL_madskills) {
5579             curmad('X', newSVpvn(s-1,1));
5580             CURMAD('_', PL_thiswhite);
5581         }
5582         force_next('}');
5583 #ifdef PERL_MAD
5584         if (!PL_thistoken)
5585             PL_thistoken = newSVpvs("");
5586 #endif
5587         TOKEN(';');
5588     case '&':
5589         s++;
5590         if (*s++ == '&')
5591             AOPERATOR(ANDAND);
5592         s--;
5593         if (PL_expect == XOPERATOR) {
5594             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5595                 && isIDFIRST_lazy_if(s,UTF))
5596             {
5597                 CopLINE_dec(PL_curcop);
5598                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5599                 CopLINE_inc(PL_curcop);
5600             }
5601             BAop(OP_BIT_AND);
5602         }
5603
5604         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5605         if (*PL_tokenbuf) {
5606             PL_expect = XOPERATOR;
5607             force_ident(PL_tokenbuf, '&');
5608         }
5609         else
5610             PREREF('&');
5611         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5612         TERM('&');
5613
5614     case '|':
5615         s++;
5616         if (*s++ == '|')
5617             AOPERATOR(OROR);
5618         s--;
5619         BOop(OP_BIT_OR);
5620     case '=':
5621         s++;
5622         {
5623             const char tmp = *s++;
5624             if (tmp == '=')
5625                 Eop(OP_EQ);
5626             if (tmp == '>')
5627                 OPERATOR(',');
5628             if (tmp == '~')
5629                 PMop(OP_MATCH);
5630             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5631                 && strchr("+-*/%.^&|<",tmp))
5632                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5633                             "Reversed %c= operator",(int)tmp);
5634             s--;
5635             if (PL_expect == XSTATE && isALPHA(tmp) &&
5636                 (s == PL_linestart+1 || s[-2] == '\n') )
5637                 {
5638                     if (PL_in_eval && !PL_rsfp) {
5639                         d = PL_bufend;
5640                         while (s < d) {
5641                             if (*s++ == '\n') {
5642                                 incline(s);
5643                                 if (strnEQ(s,"=cut",4)) {
5644                                     s = strchr(s,'\n');
5645                                     if (s)
5646                                         s++;
5647                                     else
5648                                         s = d;
5649                                     incline(s);
5650                                     goto retry;
5651                                 }
5652                             }
5653                         }
5654                         goto retry;
5655                     }
5656 #ifdef PERL_MAD
5657                     if (PL_madskills) {
5658                         if (!PL_thiswhite)
5659                             PL_thiswhite = newSVpvs("");
5660                         sv_catpvn(PL_thiswhite, PL_linestart,
5661                                   PL_bufend - PL_linestart);
5662                     }
5663 #endif
5664                     s = PL_bufend;
5665                     PL_doextract = TRUE;
5666                     goto retry;
5667                 }
5668         }
5669         if (PL_lex_brackets < PL_lex_formbrack) {
5670             const char *t = s;
5671 #ifdef PERL_STRICT_CR
5672             while (SPACE_OR_TAB(*t))
5673 #else
5674             while (SPACE_OR_TAB(*t) || *t == '\r')
5675 #endif
5676                 t++;
5677             if (*t == '\n' || *t == '#') {
5678                 s--;
5679                 PL_expect = XBLOCK;
5680                 goto leftbracket;
5681             }
5682         }
5683         pl_yylval.ival = 0;
5684         OPERATOR(ASSIGNOP);
5685     case '!':
5686         s++;
5687         {
5688             const char tmp = *s++;
5689             if (tmp == '=') {
5690                 /* was this !=~ where !~ was meant?
5691                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5692
5693                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5694                     const char *t = s+1;
5695
5696                     while (t < PL_bufend && isSPACE(*t))
5697                         ++t;
5698
5699                     if (*t == '/' || *t == '?' ||
5700                         ((*t == 'm' || *t == 's' || *t == 'y')
5701                          && !isALNUM(t[1])) ||
5702                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5703                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5704                                     "!=~ should be !~");
5705                 }
5706                 Eop(OP_NE);
5707             }
5708             if (tmp == '~')
5709                 PMop(OP_NOT);
5710         }
5711         s--;
5712         OPERATOR('!');
5713     case '<':
5714         if (PL_expect != XOPERATOR) {
5715             if (s[1] != '<' && !strchr(s,'>'))
5716                 check_uni();
5717             if (s[1] == '<')
5718                 s = scan_heredoc(s);
5719             else
5720                 s = scan_inputsymbol(s);
5721             TERM(sublex_start());
5722         }
5723         s++;
5724         {
5725             char tmp = *s++;
5726             if (tmp == '<')
5727                 SHop(OP_LEFT_SHIFT);
5728             if (tmp == '=') {
5729                 tmp = *s++;
5730                 if (tmp == '>')
5731                     Eop(OP_NCMP);
5732                 s--;
5733                 Rop(OP_LE);
5734             }
5735         }
5736         s--;
5737         Rop(OP_LT);
5738     case '>':
5739         s++;
5740         {
5741             const char tmp = *s++;
5742             if (tmp == '>')
5743                 SHop(OP_RIGHT_SHIFT);
5744             else if (tmp == '=')
5745                 Rop(OP_GE);
5746         }
5747         s--;
5748         Rop(OP_GT);
5749
5750     case '$':
5751         CLINE;
5752
5753         if (PL_expect == XOPERATOR) {
5754             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5755                 return deprecate_commaless_var_list();
5756             }
5757         }
5758
5759         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5760             PL_tokenbuf[0] = '@';
5761             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5762                            sizeof PL_tokenbuf - 1, FALSE);
5763             if (PL_expect == XOPERATOR)
5764                 no_op("Array length", s);
5765             if (!PL_tokenbuf[1])
5766                 PREREF(DOLSHARP);
5767             PL_expect = XOPERATOR;
5768             PL_pending_ident = '#';
5769             TOKEN(DOLSHARP);
5770         }
5771
5772         PL_tokenbuf[0] = '$';
5773         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5774                        sizeof PL_tokenbuf - 1, FALSE);
5775         if (PL_expect == XOPERATOR)
5776             no_op("Scalar", s);
5777         if (!PL_tokenbuf[1]) {
5778             if (s == PL_bufend)
5779                 yyerror("Final $ should be \\$ or $name");
5780             PREREF('$');
5781         }
5782
5783         /* This kludge not intended to be bulletproof. */
5784         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5785             pl_yylval.opval = newSVOP(OP_CONST, 0,
5786                                    newSViv(CopARYBASE_get(&PL_compiling)));
5787             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5788             TERM(THING);
5789         }
5790
5791         d = s;
5792         {
5793             const char tmp = *s;
5794             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5795                 s = SKIPSPACE1(s);
5796
5797             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5798                 && intuit_more(s)) {
5799                 if (*s == '[') {
5800                     PL_tokenbuf[0] = '@';
5801                     if (ckWARN(WARN_SYNTAX)) {
5802                         char *t = s+1;
5803
5804                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5805                             t++;
5806                         if (*t++ == ',') {
5807                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5808                             while (t < PL_bufend && *t != ']')
5809                                 t++;
5810                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5811                                         "Multidimensional syntax %.*s not supported",
5812                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5813                         }
5814                     }
5815                 }
5816                 else if (*s == '{') {
5817                     char *t;
5818                     PL_tokenbuf[0] = '%';
5819                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5820                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5821                         {
5822                             char tmpbuf[sizeof PL_tokenbuf];
5823                             do {
5824                                 t++;
5825                             } while (isSPACE(*t));
5826                             if (isIDFIRST_lazy_if(t,UTF)) {
5827                                 STRLEN len;
5828                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5829                                               &len);
5830                                 while (isSPACE(*t))
5831                                     t++;
5832                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5833                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5834                                                 "You need to quote \"%s\"",
5835                                                 tmpbuf);
5836                             }
5837                         }
5838                 }
5839             }
5840
5841             PL_expect = XOPERATOR;
5842             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5843                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5844                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5845                     PL_expect = XOPERATOR;
5846                 else if (strchr("$@\"'`q", *s))
5847                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5848                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5849                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5850                 else if (isIDFIRST_lazy_if(s,UTF)) {
5851                     char tmpbuf[sizeof PL_tokenbuf];
5852                     int t2;
5853                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5854                     if ((t2 = keyword(tmpbuf, len, 0))) {
5855                         /* binary operators exclude handle interpretations */
5856                         switch (t2) {
5857                         case -KEY_x:
5858                         case -KEY_eq:
5859                         case -KEY_ne:
5860                         case -KEY_gt:
5861                         case -KEY_lt:
5862                         case -KEY_ge:
5863                         case -KEY_le:
5864                         case -KEY_cmp:
5865                             break;
5866                         default:
5867                             PL_expect = XTERM;  /* e.g. print $fh length() */
5868                             break;
5869                         }
5870                     }
5871                     else {
5872                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5873                     }
5874                 }
5875                 else if (isDIGIT(*s))
5876                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5877                 else if (*s == '.' && isDIGIT(s[1]))
5878                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5879                 else if ((*s == '?' || *s == '-' || *s == '+')
5880                          && !isSPACE(s[1]) && s[1] != '=')
5881                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5882                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5883                          && s[1] != '/')
5884                     PL_expect = XTERM;          /* e.g. print $fh /.../
5885                                                    XXX except DORDOR operator
5886                                                 */
5887                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5888                          && s[2] != '=')
5889                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5890             }
5891         }
5892         PL_pending_ident = '$';
5893         TOKEN('$');
5894
5895     case '@':
5896         if (PL_expect == XOPERATOR)
5897             no_op("Array", s);
5898         PL_tokenbuf[0] = '@';
5899         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5900         if (!PL_tokenbuf[1]) {
5901             PREREF('@');
5902         }
5903         if (PL_lex_state == LEX_NORMAL)
5904             s = SKIPSPACE1(s);
5905         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5906             if (*s == '{')
5907                 PL_tokenbuf[0] = '%';
5908
5909             /* Warn about @ where they meant $. */
5910             if (*s == '[' || *s == '{') {
5911                 if (ckWARN(WARN_SYNTAX)) {
5912                     const char *t = s + 1;
5913                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5914                         t++;
5915                     if (*t == '}' || *t == ']') {
5916                         t++;
5917                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5918                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5919                             "Scalar value %.*s better written as $%.*s",
5920                             (int)(t-PL_bufptr), PL_bufptr,
5921                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5922                     }
5923                 }
5924             }
5925         }
5926         PL_pending_ident = '@';
5927         TERM('@');
5928
5929      case '/':                  /* may be division, defined-or, or pattern */
5930         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5931             s += 2;
5932             AOPERATOR(DORDOR);
5933         }
5934      case '?':                  /* may either be conditional or pattern */
5935         if (PL_expect == XOPERATOR) {
5936              char tmp = *s++;
5937              if(tmp == '?') {
5938                 OPERATOR('?');
5939              }
5940              else {
5941                  tmp = *s++;
5942                  if(tmp == '/') {
5943                      /* A // operator. */
5944                     AOPERATOR(DORDOR);
5945                  }
5946                  else {
5947                      s--;
5948                      Mop(OP_DIVIDE);
5949                  }
5950              }
5951          }
5952          else {
5953              /* Disable warning on "study /blah/" */
5954              if (PL_oldoldbufptr == PL_last_uni
5955               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5956                   || memNE(PL_last_uni, "study", 5)
5957                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5958               ))
5959                  check_uni();
5960              s = scan_pat(s,OP_MATCH);
5961              TERM(sublex_start());
5962          }
5963
5964     case '.':
5965         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5966 #ifdef PERL_STRICT_CR
5967             && s[1] == '\n'
5968 #else
5969             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5970 #endif
5971             && (s == PL_linestart || s[-1] == '\n') )
5972         {
5973             PL_lex_formbrack = 0;
5974             PL_expect = XSTATE;
5975             goto rightbracket;
5976         }
5977         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5978             s += 3;
5979             OPERATOR(YADAYADA);
5980         }
5981         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5982             char tmp = *s++;
5983             if (*s == tmp) {
5984                 s++;
5985                 if (*s == tmp) {
5986                     s++;
5987                     pl_yylval.ival = OPf_SPECIAL;
5988                 }
5989                 else
5990                     pl_yylval.ival = 0;
5991                 OPERATOR(DOTDOT);
5992             }
5993             Aop(OP_CONCAT);
5994         }
5995         /* FALL THROUGH */
5996     case '0': case '1': case '2': case '3': case '4':
5997     case '5': case '6': case '7': case '8': case '9':
5998         s = scan_num(s, &pl_yylval);
5999         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6000         if (PL_expect == XOPERATOR)
6001             no_op("Number",s);
6002         TERM(THING);
6003
6004     case '\'':
6005         s = scan_str(s,!!PL_madskills,FALSE);
6006         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6007         if (PL_expect == XOPERATOR) {
6008             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6009                 return deprecate_commaless_var_list();
6010             }
6011             else
6012                 no_op("String",s);
6013         }
6014         if (!s)
6015             missingterm(NULL);
6016         pl_yylval.ival = OP_CONST;
6017         TERM(sublex_start());
6018
6019     case '"':
6020         s = scan_str(s,!!PL_madskills,FALSE);
6021         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6022         if (PL_expect == XOPERATOR) {
6023             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6024                 return deprecate_commaless_var_list();
6025             }
6026             else
6027                 no_op("String",s);
6028         }
6029         if (!s)
6030             missingterm(NULL);
6031         pl_yylval.ival = OP_CONST;
6032         /* FIXME. I think that this can be const if char *d is replaced by
6033            more localised variables.  */
6034         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6035             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6036                 pl_yylval.ival = OP_STRINGIFY;
6037                 break;
6038             }
6039         }
6040         TERM(sublex_start());
6041
6042     case '`':
6043         s = scan_str(s,!!PL_madskills,FALSE);
6044         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6045         if (PL_expect == XOPERATOR)
6046             no_op("Backticks",s);
6047         if (!s)
6048             missingterm(NULL);
6049         readpipe_override();
6050         TERM(sublex_start());
6051
6052     case '\\':
6053         s++;
6054         if (PL_lex_inwhat && isDIGIT(*s))
6055             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6056                            *s, *s);
6057         if (PL_expect == XOPERATOR)
6058             no_op("Backslash",s);
6059         OPERATOR(REFGEN);
6060
6061     case 'v':
6062         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6063             char *start = s + 2;
6064             while (isDIGIT(*start) || *start == '_')
6065                 start++;
6066             if (*start == '.' && isDIGIT(start[1])) {
6067                 s = scan_num(s, &pl_yylval);
6068                 TERM(THING);
6069             }
6070             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6071             else if (!isALPHA(*start) && (PL_expect == XTERM
6072                         || PL_expect == XREF || PL_expect == XSTATE
6073                         || PL_expect == XTERMORDORDOR)) {
6074                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6075                 if (!gv) {
6076                     s = scan_num(s, &pl_yylval);
6077                     TERM(THING);
6078                 }
6079             }
6080         }
6081         goto keylookup;
6082     case 'x':
6083         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6084             s++;
6085             Mop(OP_REPEAT);
6086         }
6087         goto keylookup;
6088
6089     case '_':
6090     case 'a': case 'A':
6091     case 'b': case 'B':
6092     case 'c': case 'C':
6093     case 'd': case 'D':
6094     case 'e': case 'E':
6095     case 'f': case 'F':
6096     case 'g': case 'G':
6097     case 'h': case 'H':
6098     case 'i': case 'I':
6099     case 'j': case 'J':
6100     case 'k': case 'K':
6101     case 'l': case 'L':
6102     case 'm': case 'M':
6103     case 'n': case 'N':
6104     case 'o': case 'O':
6105     case 'p': case 'P':
6106     case 'q': case 'Q':
6107     case 'r': case 'R':
6108     case 's': case 'S':
6109     case 't': case 'T':
6110     case 'u': case 'U':
6111               case 'V':
6112     case 'w': case 'W':
6113               case 'X':
6114     case 'y': case 'Y':
6115     case 'z': case 'Z':
6116
6117       keylookup: {
6118         bool anydelim;
6119         I32 tmp;
6120
6121         orig_keyword = 0;
6122         gv = NULL;
6123         gvp = NULL;
6124
6125         PL_bufptr = s;
6126         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6127
6128         /* Some keywords can be followed by any delimiter, including ':' */
6129         anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
6130                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6131                              (PL_tokenbuf[0] == 'q' &&
6132                               strchr("qwxr", PL_tokenbuf[1])))));
6133
6134         /* x::* is just a word, unless x is "CORE" */
6135         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6136             goto just_a_word;
6137
6138         d = s;
6139         while (d < PL_bufend && isSPACE(*d))
6140                 d++;    /* no comments skipped here, or s### is misparsed */
6141
6142         /* Is this a word before a => operator? */
6143         if (*d == '=' && d[1] == '>') {
6144             CLINE;
6145             pl_yylval.opval
6146                 = (OP*)newSVOP(OP_CONST, 0,
6147                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6148             pl_yylval.opval->op_private = OPpCONST_BARE;
6149             TERM(WORD);
6150         }
6151
6152         /* Check for plugged-in keyword */
6153         {
6154             OP *o;
6155             int result;
6156             char *saved_bufptr = PL_bufptr;
6157             PL_bufptr = s;
6158             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6159             s = PL_bufptr;
6160             if (result == KEYWORD_PLUGIN_DECLINE) {
6161                 /* not a plugged-in keyword */
6162                 PL_bufptr = saved_bufptr;
6163             } else if (result == KEYWORD_PLUGIN_STMT) {
6164                 pl_yylval.opval = o;
6165                 CLINE;
6166                 PL_expect = XSTATE;
6167                 return REPORT(PLUGSTMT);
6168             } else if (result == KEYWORD_PLUGIN_EXPR) {
6169                 pl_yylval.opval = o;
6170                 CLINE;
6171                 PL_expect = XOPERATOR;
6172                 return REPORT(PLUGEXPR);
6173             } else {
6174                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6175                                         PL_tokenbuf);
6176             }
6177         }
6178
6179         /* Check for built-in keyword */
6180         tmp = keyword(PL_tokenbuf, len, 0);
6181
6182         /* Is this a label? */
6183         if (!anydelim && PL_expect == XSTATE
6184               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6185             s = d + 1;
6186             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6187             CLINE;
6188             TOKEN(LABEL);
6189         }
6190
6191         if (tmp < 0) {                  /* second-class keyword? */
6192             GV *ogv = NULL;     /* override (winner) */
6193             GV *hgv = NULL;     /* hidden (loser) */
6194             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6195                 CV *cv;
6196                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6197                     (cv = GvCVu(gv)))
6198                 {
6199                     if (GvIMPORTED_CV(gv))
6200                         ogv = gv;
6201                     else if (! CvMETHOD(cv))
6202                         hgv = gv;
6203                 }
6204                 if (!ogv &&
6205                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6206                     (gv = *gvp) && isGV_with_GP(gv) &&
6207                     GvCVu(gv) && GvIMPORTED_CV(gv))
6208                 {
6209                     ogv = gv;
6210                 }
6211             }
6212             if (ogv) {
6213                 orig_keyword = tmp;
6214                 tmp = 0;                /* overridden by import or by GLOBAL */
6215             }
6216             else if (gv && !gvp
6217                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6218                      && GvCVu(gv))
6219             {
6220                 tmp = 0;                /* any sub overrides "weak" keyword */
6221             }
6222             else {                      /* no override */
6223                 tmp = -tmp;
6224                 if (tmp == KEY_dump) {
6225                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6226                                    "dump() better written as CORE::dump()");
6227                 }
6228                 gv = NULL;
6229                 gvp = 0;
6230                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6231                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6232                                    "Ambiguous call resolved as CORE::%s(), "
6233                                    "qualify as such or use &",
6234                                    GvENAME(hgv));
6235             }
6236         }
6237
6238       reserved_word:
6239         switch (tmp) {
6240
6241         default:                        /* not a keyword */
6242             /* Trade off - by using this evil construction we can pull the
6243                variable gv into the block labelled keylookup. If not, then
6244                we have to give it function scope so that the goto from the
6245                earlier ':' case doesn't bypass the initialisation.  */
6246             if (0) {
6247             just_a_word_zero_gv:
6248                 gv = NULL;
6249                 gvp = NULL;
6250                 orig_keyword = 0;
6251             }
6252           just_a_word: {
6253                 SV *sv;
6254                 int pkgname = 0;
6255                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6256                 OP *rv2cv_op;
6257                 CV *cv;
6258 #ifdef PERL_MAD
6259                 SV *nextPL_nextwhite = 0;
6260 #endif
6261
6262
6263                 /* Get the rest if it looks like a package qualifier */
6264
6265                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6266                     STRLEN morelen;
6267                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6268                                   TRUE, &morelen);
6269                     if (!morelen)
6270                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6271                                 *s == '\'' ? "'" : "::");
6272                     len += morelen;
6273                     pkgname = 1;
6274                 }
6275
6276                 if (PL_expect == XOPERATOR) {
6277                     if (PL_bufptr == PL_linestart) {
6278                         CopLINE_dec(PL_curcop);
6279                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6280                         CopLINE_inc(PL_curcop);
6281                     }
6282                     else
6283                         no_op("Bareword",s);
6284                 }
6285
6286                 /* Look for a subroutine with this name in current package,
6287                    unless name is "Foo::", in which case Foo is a bearword
6288                    (and a package name). */
6289
6290                 if (len > 2 && !PL_madskills &&
6291                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6292                 {
6293                     if (ckWARN(WARN_BAREWORD)
6294                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6295                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6296                             "Bareword \"%s\" refers to nonexistent package",
6297                              PL_tokenbuf);
6298                     len -= 2;
6299                     PL_tokenbuf[len] = '\0';
6300                     gv = NULL;
6301                     gvp = 0;
6302                 }
6303                 else {
6304                     if (!gv) {
6305                         /* Mustn't actually add anything to a symbol table.
6306                            But also don't want to "initialise" any placeholder
6307                            constants that might already be there into full
6308                            blown PVGVs with attached PVCV.  */
6309                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6310                                                GV_NOADD_NOINIT, SVt_PVCV);
6311                     }
6312                     len = 0;
6313                 }
6314
6315                 /* if we saw a global override before, get the right name */
6316
6317                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6318                     len ? len : strlen(PL_tokenbuf));
6319                 if (gvp) {
6320                     SV * const tmp_sv = sv;
6321                     sv = newSVpvs("CORE::GLOBAL::");
6322                     sv_catsv(sv, tmp_sv);
6323                     SvREFCNT_dec(tmp_sv);
6324                 }
6325
6326 #ifdef PERL_MAD
6327                 if (PL_madskills && !PL_thistoken) {
6328                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6329                     PL_thistoken = newSVpvn(start,s - start);
6330                     PL_realtokenstart = s - SvPVX(PL_linestr);
6331                 }
6332 #endif
6333
6334                 /* Presume this is going to be a bareword of some sort. */
6335                 CLINE;
6336                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6337                 pl_yylval.opval->op_private = OPpCONST_BARE;
6338
6339                 /* And if "Foo::", then that's what it certainly is. */
6340                 if (len)
6341                     goto safe_bareword;
6342
6343                 cv = NULL;
6344                 {
6345                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6346                     const_op->op_private = OPpCONST_BARE;
6347                     rv2cv_op = newCVREF(0, const_op);
6348                 }
6349                 if (rv2cv_op->op_type == OP_RV2CV &&
6350                         (rv2cv_op->op_flags & OPf_KIDS)) {
6351                     OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6352                     switch (rv_op->op_type) {
6353                         case OP_CONST: {
6354                             SV *sv = cSVOPx_sv(rv_op);
6355                             if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6356                                 cv = (CV*)SvRV(sv);
6357                         } break;
6358                         case OP_GV: {
6359                             GV *gv = cGVOPx_gv(rv_op);
6360                             CV *maybe_cv = GvCVu(gv);
6361                             if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6362                                 cv = maybe_cv;
6363                         } break;
6364                     }
6365                 }
6366
6367                 /* See if it's the indirect object for a list operator. */
6368
6369                 if (PL_oldoldbufptr &&
6370                     PL_oldoldbufptr < PL_bufptr &&
6371                     (PL_oldoldbufptr == PL_last_lop
6372                      || PL_oldoldbufptr == PL_last_uni) &&
6373                     /* NO SKIPSPACE BEFORE HERE! */
6374                     (PL_expect == XREF ||
6375                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6376                 {
6377                     bool immediate_paren = *s == '(';
6378
6379                     /* (Now we can afford to cross potential line boundary.) */
6380                     s = SKIPSPACE2(s,nextPL_nextwhite);
6381 #ifdef PERL_MAD
6382                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6383 #endif
6384
6385                     /* Two barewords in a row may indicate method call. */
6386
6387                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6388                         (tmp = intuit_method(s, gv, cv))) {
6389                         op_free(rv2cv_op);
6390                         return REPORT(tmp);
6391                     }
6392
6393                     /* If not a declared subroutine, it's an indirect object. */
6394                     /* (But it's an indir obj regardless for sort.) */
6395                     /* Also, if "_" follows a filetest operator, it's a bareword */
6396
6397                     if (
6398                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6399                          (!cv &&
6400                         (PL_last_lop_op != OP_MAPSTART &&
6401                          PL_last_lop_op != OP_GREPSTART))))
6402                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6403                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6404                        )
6405                     {
6406                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6407                         goto bareword;
6408                     }
6409                 }
6410
6411                 PL_expect = XOPERATOR;
6412 #ifdef PERL_MAD
6413                 if (isSPACE(*s))
6414                     s = SKIPSPACE2(s,nextPL_nextwhite);
6415                 PL_nextwhite = nextPL_nextwhite;
6416 #else
6417                 s = skipspace(s);
6418 #endif
6419
6420                 /* Is this a word before a => operator? */
6421                 if (*s == '=' && s[1] == '>' && !pkgname) {
6422                     op_free(rv2cv_op);
6423                     CLINE;
6424                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6425                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6426                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6427                     TERM(WORD);
6428                 }
6429
6430                 /* If followed by a paren, it's certainly a subroutine. */
6431                 if (*s == '(') {
6432                     CLINE;
6433                     if (cv) {
6434                         d = s + 1;
6435                         while (SPACE_OR_TAB(*d))
6436                             d++;
6437                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6438                             s = d + 1;
6439                             goto its_constant;
6440                         }
6441                     }
6442 #ifdef PERL_MAD
6443                     if (PL_madskills) {
6444                         PL_nextwhite = PL_thiswhite;
6445                         PL_thiswhite = 0;
6446                     }
6447                     start_force(PL_curforce);
6448 #endif
6449                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6450                     PL_expect = XOPERATOR;
6451 #ifdef PERL_MAD
6452                     if (PL_madskills) {
6453                         PL_nextwhite = nextPL_nextwhite;
6454                         curmad('X', PL_thistoken);
6455                         PL_thistoken = newSVpvs("");
6456                     }
6457 #endif
6458                     op_free(rv2cv_op);
6459                     force_next(WORD);
6460                     pl_yylval.ival = 0;
6461                     TOKEN('&');
6462                 }
6463
6464                 /* If followed by var or block, call it a method (unless sub) */
6465
6466                 if ((*s == '$' || *s == '{') && !cv) {
6467                     op_free(rv2cv_op);
6468                     PL_last_lop = PL_oldbufptr;
6469                     PL_last_lop_op = OP_METHOD;
6470                     PREBLOCK(METHOD);
6471                 }
6472
6473                 /* If followed by a bareword, see if it looks like indir obj. */
6474
6475                 if (!orig_keyword
6476                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6477                         && (tmp = intuit_method(s, gv, cv))) {
6478                     op_free(rv2cv_op);
6479                     return REPORT(tmp);
6480                 }
6481
6482                 /* Not a method, so call it a subroutine (if defined) */
6483
6484                 if (cv) {
6485                     if (lastchar == '-')
6486                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6487                                          "Ambiguous use of -%s resolved as -&%s()",
6488                                          PL_tokenbuf, PL_tokenbuf);
6489                     /* Check for a constant sub */
6490                     if ((sv = cv_const_sv(cv))) {
6491                   its_constant:
6492                         op_free(rv2cv_op);
6493                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6494                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6495                         pl_yylval.opval->op_private = 0;
6496                         TOKEN(WORD);
6497                     }
6498
6499                     op_free(pl_yylval.opval);
6500                     pl_yylval.opval = rv2cv_op;
6501                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6502                     PL_last_lop = PL_oldbufptr;
6503                     PL_last_lop_op = OP_ENTERSUB;
6504                     /* Is there a prototype? */
6505                     if (
6506 #ifdef PERL_MAD
6507                         cv &&
6508 #endif
6509                         SvPOK(cv))
6510                     {
6511                         STRLEN protolen;
6512                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6513                         if (!protolen)
6514                             TERM(FUNC0SUB);
6515                         while (*proto == ';')
6516                             proto++;
6517                         if (
6518                             (
6519                                 (
6520                                     *proto == '$' || *proto == '_'
6521                                  || *proto == '*'
6522                                 )
6523                              && proto[1] == '\0'
6524                             )
6525                          || (
6526                              *proto == '\\' && proto[1] && proto[2] == '\0'
6527                             )
6528                         )
6529                             OPERATOR(UNIOPSUB);
6530                         if (*proto == '\\' && proto[1] == '[') {
6531                             const char *p = proto + 2;
6532                             while(*p && *p != ']')
6533                                 ++p;
6534                             if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6535                         }
6536                         if (*proto == '&' && *s == '{') {
6537                             if (PL_curstash)
6538                                 sv_setpvs(PL_subname, "__ANON__");
6539                             else
6540                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6541                             PREBLOCK(LSTOPSUB);
6542                         }
6543                     }
6544 #ifdef PERL_MAD
6545                     {
6546                         if (PL_madskills) {
6547                             PL_nextwhite = PL_thiswhite;
6548                             PL_thiswhite = 0;
6549                         }
6550                         start_force(PL_curforce);
6551                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6552                         PL_expect = XTERM;
6553                         if (PL_madskills) {
6554                             PL_nextwhite = nextPL_nextwhite;
6555                             curmad('X', PL_thistoken);
6556                             PL_thistoken = newSVpvs("");
6557                         }
6558                         force_next(WORD);
6559                         TOKEN(NOAMP);
6560                     }
6561                 }
6562
6563                 /* Guess harder when madskills require "best effort". */
6564                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6565                     int probable_sub = 0;
6566                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6567                         probable_sub = 1;
6568                     else if (isALPHA(*s)) {
6569                         char tmpbuf[1024];
6570                         STRLEN tmplen;
6571                         d = s;
6572                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6573                         if (!keyword(tmpbuf, tmplen, 0))
6574                             probable_sub = 1;
6575                         else {
6576                             while (d < PL_bufend && isSPACE(*d))
6577                                 d++;
6578                             if (*d == '=' && d[1] == '>')
6579                                 probable_sub = 1;
6580                         }
6581                     }
6582                     if (probable_sub) {
6583                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6584                         op_free(pl_yylval.opval);
6585                         pl_yylval.opval = rv2cv_op;
6586                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6587                         PL_last_lop = PL_oldbufptr;
6588                         PL_last_lop_op = OP_ENTERSUB;
6589                         PL_nextwhite = PL_thiswhite;
6590                         PL_thiswhite = 0;
6591                         start_force(PL_curforce);
6592                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6593                         PL_expect = XTERM;
6594                         PL_nextwhite = nextPL_nextwhite;
6595                         curmad('X', PL_thistoken);
6596                         PL_thistoken = newSVpvs("");
6597                         force_next(WORD);
6598                         TOKEN(NOAMP);
6599                     }
6600 #else
6601                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6602                     PL_expect = XTERM;
6603                     force_next(WORD);
6604                     TOKEN(NOAMP);
6605 #endif
6606                 }
6607
6608                 /* Call it a bare word */
6609
6610                 if (PL_hints & HINT_STRICT_SUBS)
6611                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6612                 else {
6613                 bareword:
6614                     /* after "print" and similar functions (corresponding to
6615                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6616                      * a filehandle should be subject to "strict subs".
6617                      * Likewise for the optional indirect-object argument to system
6618                      * or exec, which can't be a bareword */
6619                     if ((PL_last_lop_op == OP_PRINT
6620                             || PL_last_lop_op == OP_PRTF
6621                             || PL_last_lop_op == OP_SAY
6622                             || PL_last_lop_op == OP_SYSTEM
6623                             || PL_last_lop_op == OP_EXEC)
6624                             && (PL_hints & HINT_STRICT_SUBS))
6625                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6626                     if (lastchar != '-') {
6627                         if (ckWARN(WARN_RESERVED)) {
6628                             d = PL_tokenbuf;
6629                             while (isLOWER(*d))
6630                                 d++;
6631                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6632                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6633                                        PL_tokenbuf);
6634                         }
6635                     }
6636                 }
6637                 op_free(rv2cv_op);
6638
6639             safe_bareword:
6640                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6641                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6642                                      "Operator or semicolon missing before %c%s",
6643                                      lastchar, PL_tokenbuf);
6644                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6645                                      "Ambiguous use of %c resolved as operator %c",
6646                                      lastchar, lastchar);
6647                 }
6648                 TOKEN(WORD);
6649             }
6650
6651         case KEY___FILE__:
6652             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6653                                         newSVpv(CopFILE(PL_curcop),0));
6654             TERM(THING);
6655
6656         case KEY___LINE__:
6657             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6658                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6659             TERM(THING);
6660
6661         case KEY___PACKAGE__:
6662             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6663                                         (PL_curstash
6664                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6665                                          : &PL_sv_undef));
6666             TERM(THING);
6667
6668         case KEY___DATA__:
6669         case KEY___END__: {
6670             GV *gv;
6671             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6672                 const char *pname = "main";
6673                 if (PL_tokenbuf[2] == 'D')
6674                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6675                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6676                                 SVt_PVIO);
6677                 GvMULTI_on(gv);
6678                 if (!GvIO(gv))
6679                     GvIOp(gv) = newIO();
6680                 IoIFP(GvIOp(gv)) = PL_rsfp;
6681 #if defined(HAS_FCNTL) && defined(F_SETFD)
6682                 {
6683                     const int fd = PerlIO_fileno(PL_rsfp);
6684                     fcntl(fd,F_SETFD,fd >= 3);
6685                 }
6686 #endif
6687                 /* Mark this internal pseudo-handle as clean */
6688                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6689                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6690                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6691                 else
6692                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6693 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6694                 /* if the script was opened in binmode, we need to revert
6695                  * it to text mode for compatibility; but only iff it has CRs
6696                  * XXX this is a questionable hack at best. */
6697                 if (PL_bufend-PL_bufptr > 2
6698                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6699                 {
6700                     Off_t loc = 0;
6701                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6702                         loc = PerlIO_tell(PL_rsfp);
6703                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6704                     }
6705 #ifdef NETWARE
6706                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6707 #else
6708                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6709 #endif  /* NETWARE */
6710 #ifdef PERLIO_IS_STDIO /* really? */
6711 #  if defined(__BORLANDC__)
6712                         /* XXX see note in do_binmode() */
6713                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6714 #  endif
6715 #endif
6716                         if (loc > 0)
6717                             PerlIO_seek(PL_rsfp, loc, 0);
6718                     }
6719                 }
6720 #endif
6721 #ifdef PERLIO_LAYERS
6722                 if (!IN_BYTES) {
6723                     if (UTF)
6724                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6725                     else if (PL_encoding) {
6726                         SV *name;
6727                         dSP;
6728                         ENTER;
6729                         SAVETMPS;
6730                         PUSHMARK(sp);
6731                         EXTEND(SP, 1);
6732                         XPUSHs(PL_encoding);
6733                         PUTBACK;
6734                         call_method("name", G_SCALAR);
6735                         SPAGAIN;
6736                         name = POPs;
6737                         PUTBACK;
6738                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6739                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6740                                                       SVfARG(name)));
6741                         FREETMPS;
6742                         LEAVE;
6743                     }
6744                 }
6745 #endif
6746 #ifdef PERL_MAD
6747                 if (PL_madskills) {
6748                     if (PL_realtokenstart >= 0) {
6749                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6750                         if (!PL_endwhite)
6751                             PL_endwhite = newSVpvs("");
6752                         sv_catsv(PL_endwhite, PL_thiswhite);
6753                         PL_thiswhite = 0;
6754                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6755                         PL_realtokenstart = -1;
6756                     }
6757                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6758                            != NULL) ;
6759                 }
6760 #endif
6761                 PL_rsfp = NULL;
6762             }
6763             goto fake_eof;
6764         }
6765
6766         case KEY_AUTOLOAD:
6767         case KEY_DESTROY:
6768         case KEY_BEGIN:
6769         case KEY_UNITCHECK:
6770         case KEY_CHECK:
6771         case KEY_INIT:
6772         case KEY_END:
6773             if (PL_expect == XSTATE) {
6774                 s = PL_bufptr;
6775                 goto really_sub;
6776             }
6777             goto just_a_word;
6778
6779         case KEY_CORE:
6780             if (*s == ':' && s[1] == ':') {
6781                 s += 2;
6782                 d = s;
6783                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6784                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6785                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6786                 if (tmp < 0)
6787                     tmp = -tmp;
6788                 else if (tmp == KEY_require || tmp == KEY_do)
6789                     /* that's a way to remember we saw "CORE::" */
6790                     orig_keyword = tmp;
6791                 goto reserved_word;
6792             }
6793             goto just_a_word;
6794
6795         case KEY_abs:
6796             UNI(OP_ABS);
6797
6798         case KEY_alarm:
6799             UNI(OP_ALARM);
6800
6801         case KEY_accept:
6802             LOP(OP_ACCEPT,XTERM);
6803
6804         case KEY_and:
6805             OPERATOR(ANDOP);
6806
6807         case KEY_atan2:
6808             LOP(OP_ATAN2,XTERM);
6809
6810         case KEY_bind:
6811             LOP(OP_BIND,XTERM);
6812
6813         case KEY_binmode:
6814             LOP(OP_BINMODE,XTERM);
6815
6816         case KEY_bless:
6817             LOP(OP_BLESS,XTERM);
6818
6819         case KEY_break:
6820             FUN0(OP_BREAK);
6821
6822         case KEY_chop:
6823             UNI(OP_CHOP);
6824
6825         case KEY_continue:
6826             /* When 'use switch' is in effect, continue has a dual
6827                life as a control operator. */
6828             {
6829                 if (!FEATURE_IS_ENABLED("switch"))
6830                     PREBLOCK(CONTINUE);
6831                 else {
6832                     /* We have to disambiguate the two senses of
6833                       "continue". If the next token is a '{' then
6834                       treat it as the start of a continue block;
6835                       otherwise treat it as a control operator.
6836                      */
6837                     s = skipspace(s);
6838                     if (*s == '{')
6839             PREBLOCK(CONTINUE);
6840                     else
6841                         FUN0(OP_CONTINUE);
6842                 }
6843             }
6844
6845         case KEY_chdir:
6846             /* may use HOME */
6847             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6848             UNI(OP_CHDIR);
6849
6850         case KEY_close:
6851             UNI(OP_CLOSE);
6852
6853         case KEY_closedir:
6854             UNI(OP_CLOSEDIR);
6855
6856         case KEY_cmp:
6857             Eop(OP_SCMP);
6858
6859         case KEY_caller:
6860             UNI(OP_CALLER);
6861
6862         case KEY_crypt:
6863 #ifdef FCRYPT
6864             if (!PL_cryptseen) {
6865                 PL_cryptseen = TRUE;
6866                 init_des();
6867             }
6868 #endif
6869             LOP(OP_CRYPT,XTERM);
6870
6871         case KEY_chmod:
6872             LOP(OP_CHMOD,XTERM);
6873
6874         case KEY_chown:
6875             LOP(OP_CHOWN,XTERM);
6876
6877         case KEY_connect:
6878             LOP(OP_CONNECT,XTERM);
6879
6880         case KEY_chr:
6881             UNI(OP_CHR);
6882
6883         case KEY_cos:
6884             UNI(OP_COS);
6885
6886         case KEY_chroot:
6887             UNI(OP_CHROOT);
6888
6889         case KEY_default:
6890             PREBLOCK(DEFAULT);
6891
6892         case KEY_do:
6893             s = SKIPSPACE1(s);
6894             if (*s == '{')
6895                 PRETERMBLOCK(DO);
6896             if (*s != '\'')
6897                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6898             if (orig_keyword == KEY_do) {
6899                 orig_keyword = 0;
6900                 pl_yylval.ival = 1;
6901             }
6902             else
6903                 pl_yylval.ival = 0;
6904             OPERATOR(DO);
6905
6906         case KEY_die:
6907             PL_hints |= HINT_BLOCK_SCOPE;
6908             LOP(OP_DIE,XTERM);
6909
6910         case KEY_defined:
6911             UNI(OP_DEFINED);
6912
6913         case KEY_delete:
6914             UNI(OP_DELETE);
6915
6916         case KEY_dbmopen:
6917             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
6918                               STR_WITH_LEN("NDBM_File::"),
6919                               STR_WITH_LEN("DB_File::"),
6920                               STR_WITH_LEN("GDBM_File::"),
6921                               STR_WITH_LEN("SDBM_File::"),
6922                               STR_WITH_LEN("ODBM_File::"),
6923                               NULL);
6924             LOP(OP_DBMOPEN,XTERM);
6925
6926         case KEY_dbmclose:
6927             UNI(OP_DBMCLOSE);
6928
6929         case KEY_dump:
6930             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6931             LOOPX(OP_DUMP);
6932
6933         case KEY_else:
6934             PREBLOCK(ELSE);
6935
6936         case KEY_elsif:
6937             pl_yylval.ival = CopLINE(PL_curcop);
6938             OPERATOR(ELSIF);
6939
6940         case KEY_eq:
6941             Eop(OP_SEQ);
6942
6943         case KEY_exists:
6944             UNI(OP_EXISTS);
6945
6946         case KEY_exit:
6947             if (PL_madskills)
6948                 UNI(OP_INT);
6949             UNI(OP_EXIT);
6950
6951         case KEY_eval:
6952             s = SKIPSPACE1(s);
6953             if (*s == '{') { /* block eval */
6954                 PL_expect = XTERMBLOCK;
6955                 UNIBRACK(OP_ENTERTRY);
6956             }
6957             else { /* string eval */
6958                 PL_expect = XTERM;
6959                 UNIBRACK(OP_ENTEREVAL);
6960             }
6961
6962         case KEY_eof:
6963             UNI(OP_EOF);
6964
6965         case KEY_exp:
6966             UNI(OP_EXP);
6967
6968         case KEY_each:
6969             UNI(OP_EACH);
6970
6971         case KEY_exec:
6972             LOP(OP_EXEC,XREF);
6973
6974         case KEY_endhostent:
6975             FUN0(OP_EHOSTENT);
6976
6977         case KEY_endnetent:
6978             FUN0(OP_ENETENT);
6979
6980         case KEY_endservent:
6981             FUN0(OP_ESERVENT);
6982
6983         case KEY_endprotoent:
6984             FUN0(OP_EPROTOENT);
6985
6986         case KEY_endpwent:
6987             FUN0(OP_EPWENT);
6988
6989         case KEY_endgrent:
6990             FUN0(OP_EGRENT);
6991
6992         case KEY_for:
6993         case KEY_foreach:
6994             pl_yylval.ival = CopLINE(PL_curcop);
6995             s = SKIPSPACE1(s);
6996             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6997                 char *p = s;
6998 #ifdef PERL_MAD
6999                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7000 #endif
7001
7002                 if ((PL_bufend - p) >= 3 &&
7003                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7004                     p += 2;
7005                 else if ((PL_bufend - p) >= 4 &&
7006                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7007                     p += 3;
7008                 p = PEEKSPACE(p);
7009                 if (isIDFIRST_lazy_if(p,UTF)) {
7010                     p = scan_ident(p, PL_bufend,
7011                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7012                     p = PEEKSPACE(p);
7013                 }
7014                 if (*p != '$')
7015                     Perl_croak(aTHX_ "Missing $ on loop variable");
7016 #ifdef PERL_MAD
7017                 s = SvPVX(PL_linestr) + soff;
7018 #endif
7019             }
7020             OPERATOR(FOR);
7021
7022         case KEY_formline:
7023             LOP(OP_FORMLINE,XTERM);
7024
7025         case KEY_fork:
7026             FUN0(OP_FORK);
7027
7028         case KEY_fcntl:
7029             LOP(OP_FCNTL,XTERM);
7030
7031         case KEY_fileno:
7032             UNI(OP_FILENO);
7033
7034         case KEY_flock:
7035             LOP(OP_FLOCK,XTERM);
7036
7037         case KEY_gt:
7038             Rop(OP_SGT);
7039
7040         case KEY_ge:
7041             Rop(OP_SGE);
7042
7043         case KEY_grep:
7044             LOP(OP_GREPSTART, XREF);
7045
7046         case KEY_goto:
7047             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7048             LOOPX(OP_GOTO);
7049
7050         case KEY_gmtime:
7051             UNI(OP_GMTIME);
7052
7053         case KEY_getc:
7054             UNIDOR(OP_GETC);
7055
7056         case KEY_getppid:
7057             FUN0(OP_GETPPID);
7058
7059         case KEY_getpgrp:
7060             UNI(OP_GETPGRP);
7061
7062         case KEY_getpriority:
7063             LOP(OP_GETPRIORITY,XTERM);
7064
7065         case KEY_getprotobyname:
7066             UNI(OP_GPBYNAME);
7067
7068         case KEY_getprotobynumber:
7069             LOP(OP_GPBYNUMBER,XTERM);
7070
7071         case KEY_getprotoent:
7072             FUN0(OP_GPROTOENT);
7073
7074         case KEY_getpwent:
7075             FUN0(OP_GPWENT);
7076
7077         case KEY_getpwnam:
7078             UNI(OP_GPWNAM);
7079
7080         case KEY_getpwuid:
7081             UNI(OP_GPWUID);
7082
7083         case KEY_getpeername:
7084             UNI(OP_GETPEERNAME);
7085
7086         case KEY_gethostbyname:
7087             UNI(OP_GHBYNAME);
7088
7089         case KEY_gethostbyaddr:
7090             LOP(OP_GHBYADDR,XTERM);
7091
7092         case KEY_gethostent:
7093             FUN0(OP_GHOSTENT);
7094
7095         case KEY_getnetbyname:
7096             UNI(OP_GNBYNAME);
7097
7098         case KEY_getnetbyaddr:
7099             LOP(OP_GNBYADDR,XTERM);
7100
7101         case KEY_getnetent:
7102             FUN0(OP_GNETENT);
7103
7104         case KEY_getservbyname:
7105             LOP(OP_GSBYNAME,XTERM);
7106
7107         case KEY_getservbyport:
7108             LOP(OP_GSBYPORT,XTERM);
7109
7110         case KEY_getservent:
7111             FUN0(OP_GSERVENT);
7112
7113         case KEY_getsockname:
7114             UNI(OP_GETSOCKNAME);
7115
7116         case KEY_getsockopt:
7117             LOP(OP_GSOCKOPT,XTERM);
7118
7119         case KEY_getgrent:
7120             FUN0(OP_GGRENT);
7121
7122         case KEY_getgrnam:
7123             UNI(OP_GGRNAM);
7124
7125         case KEY_getgrgid:
7126             UNI(OP_GGRGID);
7127
7128         case KEY_getlogin:
7129             FUN0(OP_GETLOGIN);
7130
7131         case KEY_given:
7132             pl_yylval.ival = CopLINE(PL_curcop);
7133             OPERATOR(GIVEN);
7134
7135         case KEY_glob:
7136             LOP(OP_GLOB,XTERM);
7137
7138         case KEY_hex:
7139             UNI(OP_HEX);
7140
7141         case KEY_if:
7142             pl_yylval.ival = CopLINE(PL_curcop);
7143             OPERATOR(IF);
7144
7145         case KEY_index:
7146             LOP(OP_INDEX,XTERM);
7147
7148         case KEY_int:
7149             UNI(OP_INT);
7150
7151         case KEY_ioctl:
7152             LOP(OP_IOCTL,XTERM);
7153
7154         case KEY_join:
7155             LOP(OP_JOIN,XTERM);
7156
7157         case KEY_keys:
7158             UNI(OP_KEYS);
7159
7160         case KEY_kill:
7161             LOP(OP_KILL,XTERM);
7162
7163         case KEY_last:
7164             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7165             LOOPX(OP_LAST);
7166
7167         case KEY_lc:
7168             UNI(OP_LC);
7169
7170         case KEY_lcfirst:
7171             UNI(OP_LCFIRST);
7172
7173         case KEY_local:
7174             pl_yylval.ival = 0;
7175             OPERATOR(LOCAL);
7176
7177         case KEY_length:
7178             UNI(OP_LENGTH);
7179
7180         case KEY_lt:
7181             Rop(OP_SLT);
7182
7183         case KEY_le:
7184             Rop(OP_SLE);
7185
7186         case KEY_localtime:
7187             UNI(OP_LOCALTIME);
7188
7189         case KEY_log:
7190             UNI(OP_LOG);
7191
7192         case KEY_link:
7193             LOP(OP_LINK,XTERM);
7194
7195         case KEY_listen:
7196             LOP(OP_LISTEN,XTERM);
7197
7198         case KEY_lock:
7199             UNI(OP_LOCK);
7200
7201         case KEY_lstat:
7202             UNI(OP_LSTAT);
7203
7204         case KEY_m:
7205             s = scan_pat(s,OP_MATCH);
7206             TERM(sublex_start());
7207
7208         case KEY_map:
7209             LOP(OP_MAPSTART, XREF);
7210
7211         case KEY_mkdir:
7212             LOP(OP_MKDIR,XTERM);
7213
7214         case KEY_msgctl:
7215             LOP(OP_MSGCTL,XTERM);
7216
7217         case KEY_msgget:
7218             LOP(OP_MSGGET,XTERM);
7219
7220         case KEY_msgrcv:
7221             LOP(OP_MSGRCV,XTERM);
7222
7223         case KEY_msgsnd:
7224             LOP(OP_MSGSND,XTERM);
7225
7226         case KEY_our:
7227         case KEY_my:
7228         case KEY_state:
7229             PL_in_my = (U16)tmp;
7230             s = SKIPSPACE1(s);
7231             if (isIDFIRST_lazy_if(s,UTF)) {
7232 #ifdef PERL_MAD
7233                 char* start = s;
7234 #endif
7235                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7236                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7237                     goto really_sub;
7238                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7239                 if (!PL_in_my_stash) {
7240                     char tmpbuf[1024];
7241                     PL_bufptr = s;
7242                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7243                     yyerror(tmpbuf);
7244                 }
7245 #ifdef PERL_MAD
7246                 if (PL_madskills) {     /* just add type to declarator token */
7247                     sv_catsv(PL_thistoken, PL_nextwhite);
7248                     PL_nextwhite = 0;
7249                     sv_catpvn(PL_thistoken, start, s - start);
7250                 }
7251 #endif
7252             }
7253             pl_yylval.ival = 1;
7254             OPERATOR(MY);
7255
7256         case KEY_next:
7257             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7258             LOOPX(OP_NEXT);
7259
7260         case KEY_ne:
7261             Eop(OP_SNE);
7262
7263         case KEY_no:
7264             s = tokenize_use(0, s);
7265             OPERATOR(USE);
7266
7267         case KEY_not:
7268             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7269                 FUN1(OP_NOT);
7270             else
7271                 OPERATOR(NOTOP);
7272
7273         case KEY_open:
7274             s = SKIPSPACE1(s);
7275             if (isIDFIRST_lazy_if(s,UTF)) {
7276                 const char *t;
7277                 for (d = s; isALNUM_lazy_if(d,UTF);)
7278                     d++;
7279                 for (t=d; isSPACE(*t);)
7280                     t++;
7281                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7282                     /* [perl #16184] */
7283                     && !(t[0] == '=' && t[1] == '>')
7284                 ) {
7285                     int parms_len = (int)(d-s);
7286                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7287                            "Precedence problem: open %.*s should be open(%.*s)",
7288                             parms_len, s, parms_len, s);
7289                 }
7290             }
7291             LOP(OP_OPEN,XTERM);
7292
7293         case KEY_or:
7294             pl_yylval.ival = OP_OR;
7295             OPERATOR(OROP);
7296
7297         case KEY_ord:
7298             UNI(OP_ORD);
7299
7300         case KEY_oct:
7301             UNI(OP_OCT);
7302
7303         case KEY_opendir:
7304             LOP(OP_OPEN_DIR,XTERM);
7305
7306         case KEY_print:
7307             checkcomma(s,PL_tokenbuf,"filehandle");
7308             LOP(OP_PRINT,XREF);
7309
7310         case KEY_printf:
7311             checkcomma(s,PL_tokenbuf,"filehandle");
7312             LOP(OP_PRTF,XREF);
7313
7314         case KEY_prototype:
7315             UNI(OP_PROTOTYPE);
7316
7317         case KEY_push:
7318             LOP(OP_PUSH,XTERM);
7319
7320         case KEY_pop:
7321             UNIDOR(OP_POP);
7322
7323         case KEY_pos:
7324             UNIDOR(OP_POS);
7325
7326         case KEY_pack:
7327             LOP(OP_PACK,XTERM);
7328
7329         case KEY_package:
7330             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7331             s = SKIPSPACE1(s);
7332             s = force_strict_version(s);
7333             PL_lex_expect = XBLOCK;
7334             OPERATOR(PACKAGE);
7335
7336         case KEY_pipe:
7337             LOP(OP_PIPE_OP,XTERM);
7338
7339         case KEY_q:
7340             s = scan_str(s,!!PL_madskills,FALSE);
7341             if (!s)
7342                 missingterm(NULL);
7343             pl_yylval.ival = OP_CONST;
7344             TERM(sublex_start());
7345
7346         case KEY_quotemeta:
7347             UNI(OP_QUOTEMETA);
7348
7349         case KEY_qw: {
7350             OP *words = NULL;
7351             s = scan_str(s,!!PL_madskills,FALSE);
7352             if (!s)
7353                 missingterm(NULL);
7354             PL_expect = XOPERATOR;
7355             if (SvCUR(PL_lex_stuff)) {
7356                 int warned = 0;
7357                 d = SvPV_force(PL_lex_stuff, len);
7358                 while (len) {
7359                     for (; isSPACE(*d) && len; --len, ++d)
7360                         /**/;
7361                     if (len) {
7362                         SV *sv;
7363                         const char *b = d;
7364                         if (!warned && ckWARN(WARN_QW)) {
7365                             for (; !isSPACE(*d) && len; --len, ++d) {
7366                                 if (*d == ',') {
7367                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7368                                         "Possible attempt to separate words with commas");
7369                                     ++warned;
7370                                 }
7371                                 else if (*d == '#') {
7372                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7373                                         "Possible attempt to put comments in qw() list");
7374                                     ++warned;
7375                                 }
7376                             }
7377                         }
7378                         else {
7379                             for (; !isSPACE(*d) && len; --len, ++d)
7380                                 /**/;
7381                         }
7382                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7383                         words = append_elem(OP_LIST, words,
7384                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7385                     }
7386                 }
7387             }
7388             if (!words)
7389                 words = newNULLLIST();
7390             if (PL_lex_stuff) {
7391                 SvREFCNT_dec(PL_lex_stuff);
7392                 PL_lex_stuff = NULL;
7393             }
7394             PL_expect = XOPERATOR;
7395             pl_yylval.opval = sawparens(words);
7396             TOKEN(QWLIST);
7397         }
7398
7399         case KEY_qq:
7400             s = scan_str(s,!!PL_madskills,FALSE);
7401             if (!s)
7402                 missingterm(NULL);
7403             pl_yylval.ival = OP_STRINGIFY;
7404             if (SvIVX(PL_lex_stuff) == '\'')
7405                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
7406             TERM(sublex_start());
7407
7408         case KEY_qr:
7409             s = scan_pat(s,OP_QR);
7410             TERM(sublex_start());
7411
7412         case KEY_qx:
7413             s = scan_str(s,!!PL_madskills,FALSE);
7414             if (!s)
7415                 missingterm(NULL);
7416             readpipe_override();
7417             TERM(sublex_start());
7418
7419         case KEY_return:
7420             OLDLOP(OP_RETURN);
7421
7422         case KEY_require:
7423             s = SKIPSPACE1(s);
7424             if (isDIGIT(*s)) {
7425                 s = force_version(s, FALSE);
7426             }
7427             else if (*s != 'v' || !isDIGIT(s[1])
7428                     || (s = force_version(s, TRUE), *s == 'v'))
7429             {
7430                 *PL_tokenbuf = '\0';
7431                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7432                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7433                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7434                 else if (*s == '<')
7435                     yyerror("<> should be quotes");
7436             }
7437             if (orig_keyword == KEY_require) {
7438                 orig_keyword = 0;
7439                 pl_yylval.ival = 1;
7440             }
7441             else
7442                 pl_yylval.ival = 0;
7443             PL_expect = XTERM;
7444             PL_bufptr = s;
7445             PL_last_uni = PL_oldbufptr;
7446             PL_last_lop_op = OP_REQUIRE;
7447             s = skipspace(s);
7448             return REPORT( (int)REQUIRE );
7449
7450         case KEY_reset:
7451             UNI(OP_RESET);
7452
7453         case KEY_redo:
7454             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7455             LOOPX(OP_REDO);
7456
7457         case KEY_rename:
7458             LOP(OP_RENAME,XTERM);
7459
7460         case KEY_rand:
7461             UNI(OP_RAND);
7462
7463         case KEY_rmdir:
7464             UNI(OP_RMDIR);
7465
7466         case KEY_rindex:
7467             LOP(OP_RINDEX,XTERM);
7468
7469         case KEY_read:
7470             LOP(OP_READ,XTERM);
7471
7472         case KEY_readdir:
7473             UNI(OP_READDIR);
7474
7475         case KEY_readline:
7476             UNIDOR(OP_READLINE);
7477
7478         case KEY_readpipe:
7479             UNIDOR(OP_BACKTICK);
7480
7481         case KEY_rewinddir:
7482             UNI(OP_REWINDDIR);
7483
7484         case KEY_recv:
7485             LOP(OP_RECV,XTERM);
7486
7487         case KEY_reverse:
7488             LOP(OP_REVERSE,XTERM);
7489
7490         case KEY_readlink:
7491             UNIDOR(OP_READLINK);
7492
7493         case KEY_ref:
7494             UNI(OP_REF);
7495
7496         case KEY_s:
7497             s = scan_subst(s);
7498             if (pl_yylval.opval)
7499                 TERM(sublex_start());
7500             else
7501                 TOKEN(1);       /* force error */
7502
7503         case KEY_say:
7504             checkcomma(s,PL_tokenbuf,"filehandle");
7505             LOP(OP_SAY,XREF);
7506
7507         case KEY_chomp:
7508             UNI(OP_CHOMP);
7509
7510         case KEY_scalar:
7511             UNI(OP_SCALAR);
7512
7513         case KEY_select:
7514             LOP(OP_SELECT,XTERM);
7515
7516         case KEY_seek:
7517             LOP(OP_SEEK,XTERM);
7518
7519         case KEY_semctl:
7520             LOP(OP_SEMCTL,XTERM);
7521
7522         case KEY_semget:
7523             LOP(OP_SEMGET,XTERM);
7524
7525         case KEY_semop:
7526             LOP(OP_SEMOP,XTERM);
7527
7528         case KEY_send:
7529             LOP(OP_SEND,XTERM);
7530
7531         case KEY_setpgrp:
7532             LOP(OP_SETPGRP,XTERM);
7533
7534         case KEY_setpriority:
7535             LOP(OP_SETPRIORITY,XTERM);
7536
7537         case KEY_sethostent:
7538             UNI(OP_SHOSTENT);
7539
7540         case KEY_setnetent:
7541             UNI(OP_SNETENT);
7542
7543         case KEY_setservent:
7544             UNI(OP_SSERVENT);
7545
7546         case KEY_setprotoent:
7547             UNI(OP_SPROTOENT);
7548
7549         case KEY_setpwent:
7550             FUN0(OP_SPWENT);
7551
7552         case KEY_setgrent:
7553             FUN0(OP_SGRENT);
7554
7555         case KEY_seekdir:
7556             LOP(OP_SEEKDIR,XTERM);
7557
7558         case KEY_setsockopt:
7559             LOP(OP_SSOCKOPT,XTERM);
7560
7561         case KEY_shift:
7562             UNIDOR(OP_SHIFT);
7563
7564         case KEY_shmctl:
7565             LOP(OP_SHMCTL,XTERM);
7566
7567         case KEY_shmget:
7568             LOP(OP_SHMGET,XTERM);
7569
7570         case KEY_shmread:
7571             LOP(OP_SHMREAD,XTERM);
7572
7573         case KEY_shmwrite:
7574             LOP(OP_SHMWRITE,XTERM);
7575
7576         case KEY_shutdown:
7577             LOP(OP_SHUTDOWN,XTERM);
7578
7579         case KEY_sin:
7580             UNI(OP_SIN);
7581
7582         case KEY_sleep:
7583             UNI(OP_SLEEP);
7584
7585         case KEY_socket:
7586             LOP(OP_SOCKET,XTERM);
7587
7588         case KEY_socketpair:
7589             LOP(OP_SOCKPAIR,XTERM);
7590
7591         case KEY_sort:
7592             checkcomma(s,PL_tokenbuf,"subroutine name");
7593             s = SKIPSPACE1(s);
7594             if (*s == ';' || *s == ')')         /* probably a close */
7595                 Perl_croak(aTHX_ "sort is now a reserved word");
7596             PL_expect = XTERM;
7597             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7598             LOP(OP_SORT,XREF);
7599
7600         case KEY_split:
7601             LOP(OP_SPLIT,XTERM);
7602
7603         case KEY_sprintf:
7604             LOP(OP_SPRINTF,XTERM);
7605
7606         case KEY_splice:
7607             LOP(OP_SPLICE,XTERM);
7608
7609         case KEY_sqrt:
7610             UNI(OP_SQRT);
7611
7612         case KEY_srand:
7613             UNI(OP_SRAND);
7614
7615         case KEY_stat:
7616             UNI(OP_STAT);
7617
7618         case KEY_study:
7619             UNI(OP_STUDY);
7620
7621         case KEY_substr:
7622             LOP(OP_SUBSTR,XTERM);
7623
7624         case KEY_format:
7625         case KEY_sub:
7626           really_sub:
7627             {
7628                 char tmpbuf[sizeof PL_tokenbuf];
7629                 SSize_t tboffset = 0;
7630                 expectation attrful;
7631                 bool have_name, have_proto;
7632                 const int key = tmp;
7633
7634 #ifdef PERL_MAD
7635                 SV *tmpwhite = 0;
7636
7637                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7638                 SV *subtoken = newSVpvn(tstart, s - tstart);
7639                 PL_thistoken = 0;
7640
7641                 d = s;
7642                 s = SKIPSPACE2(s,tmpwhite);
7643 #else
7644                 s = skipspace(s);
7645 #endif
7646
7647                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7648                     (*s == ':' && s[1] == ':'))
7649                 {
7650 #ifdef PERL_MAD
7651                     SV *nametoke = NULL;
7652 #endif
7653
7654                     PL_expect = XBLOCK;
7655                     attrful = XATTRBLOCK;
7656                     /* remember buffer pos'n for later force_word */
7657                     tboffset = s - PL_oldbufptr;
7658                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7659 #ifdef PERL_MAD
7660                     if (PL_madskills)
7661                         nametoke = newSVpvn(s, d - s);
7662 #endif
7663                     if (memchr(tmpbuf, ':', len))
7664                         sv_setpvn(PL_subname, tmpbuf, len);
7665                     else {
7666                         sv_setsv(PL_subname,PL_curstname);
7667                         sv_catpvs(PL_subname,"::");
7668                         sv_catpvn(PL_subname,tmpbuf,len);
7669                     }
7670                     have_name = TRUE;
7671
7672 #ifdef PERL_MAD
7673
7674                     start_force(0);
7675                     CURMAD('X', nametoke);
7676                     CURMAD('_', tmpwhite);
7677                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7678                                       FALSE, TRUE, TRUE);
7679
7680                     s = SKIPSPACE2(d,tmpwhite);
7681 #else
7682                     s = skipspace(d);
7683 #endif
7684                 }
7685                 else {
7686                     if (key == KEY_my)
7687                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7688                     PL_expect = XTERMBLOCK;
7689                     attrful = XATTRTERM;
7690                     sv_setpvs(PL_subname,"?");
7691                     have_name = FALSE;
7692                 }
7693
7694                 if (key == KEY_format) {
7695                     if (*s == '=')
7696                         PL_lex_formbrack = PL_lex_brackets + 1;
7697 #ifdef PERL_MAD
7698                     PL_thistoken = subtoken;
7699                     s = d;
7700 #else
7701                     if (have_name)
7702                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7703                                           FALSE, TRUE, TRUE);
7704 #endif
7705                     OPERATOR(FORMAT);
7706                 }
7707
7708                 /* Look for a prototype */
7709                 if (*s == '(') {
7710                     char *p;
7711                     bool bad_proto = FALSE;
7712                     bool in_brackets = FALSE;
7713                     char greedy_proto = ' ';
7714                     bool proto_after_greedy_proto = FALSE;
7715                     bool must_be_last = FALSE;
7716                     bool underscore = FALSE;
7717                     bool seen_underscore = FALSE;
7718                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7719
7720                     s = scan_str(s,!!PL_madskills,FALSE);
7721                     if (!s)
7722                         Perl_croak(aTHX_ "Prototype not terminated");
7723                     /* strip spaces and check for bad characters */
7724                     d = SvPVX(PL_lex_stuff);
7725                     tmp = 0;
7726                     for (p = d; *p; ++p) {
7727                         if (!isSPACE(*p)) {
7728                             d[tmp++] = *p;
7729
7730                             if (warnillegalproto) {
7731                                 if (must_be_last)
7732                                     proto_after_greedy_proto = TRUE;
7733                                 if (!strchr("$@%*;[]&\\_", *p)) {
7734                                     bad_proto = TRUE;
7735                                 }
7736                                 else {
7737                                     if ( underscore ) {
7738                                         if ( *p != ';' )
7739                                             bad_proto = TRUE;
7740                                         underscore = FALSE;
7741                                     }
7742                                     if ( *p == '[' ) {
7743                                         in_brackets = TRUE;
7744                                     }
7745                                     else if ( *p == ']' ) {
7746                                         in_brackets = FALSE;
7747                                     }
7748                                     else if ( (*p == '@' || *p == '%') &&
7749                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
7750                                          !in_brackets ) {
7751                                         must_be_last = TRUE;
7752                                         greedy_proto = *p;
7753                                     }
7754                                     else if ( *p == '_' ) {
7755                                         underscore = seen_underscore = TRUE;
7756                                     }
7757                                 }
7758                             }
7759                         }
7760                     }
7761                     d[tmp] = '\0';
7762                     if (proto_after_greedy_proto)
7763                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7764                                     "Prototype after '%c' for %"SVf" : %s",
7765                                     greedy_proto, SVfARG(PL_subname), d);
7766                     if (bad_proto)
7767                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7768                                     "Illegal character %sin prototype for %"SVf" : %s",
7769                                     seen_underscore ? "after '_' " : "",
7770                                     SVfARG(PL_subname), d);
7771                     SvCUR_set(PL_lex_stuff, tmp);
7772                     have_proto = TRUE;
7773
7774 #ifdef PERL_MAD
7775                     start_force(0);
7776                     CURMAD('q', PL_thisopen);
7777                     CURMAD('_', tmpwhite);
7778                     CURMAD('=', PL_thisstuff);
7779                     CURMAD('Q', PL_thisclose);
7780                     NEXTVAL_NEXTTOKE.opval =
7781                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7782                     PL_lex_stuff = NULL;
7783                     force_next(THING);
7784
7785                     s = SKIPSPACE2(s,tmpwhite);
7786 #else
7787                     s = skipspace(s);
7788 #endif
7789                 }
7790                 else
7791                     have_proto = FALSE;
7792
7793                 if (*s == ':' && s[1] != ':')
7794                     PL_expect = attrful;
7795                 else if (*s != '{' && key == KEY_sub) {
7796                     if (!have_name)
7797                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7798                     else if (*s != ';' && *s != '}')
7799                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7800                 }
7801
7802 #ifdef PERL_MAD
7803                 start_force(0);
7804                 if (tmpwhite) {
7805                     if (PL_madskills)
7806                         curmad('^', newSVpvs(""));
7807                     CURMAD('_', tmpwhite);
7808                 }
7809                 force_next(0);
7810
7811                 PL_thistoken = subtoken;
7812 #else
7813                 if (have_proto) {
7814                     NEXTVAL_NEXTTOKE.opval =
7815                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7816                     PL_lex_stuff = NULL;
7817                     force_next(THING);
7818                 }
7819 #endif
7820                 if (!have_name) {
7821                     if (PL_curstash)
7822                         sv_setpvs(PL_subname, "__ANON__");
7823                     else
7824                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7825                     TOKEN(ANONSUB);
7826                 }
7827 #ifndef PERL_MAD
7828                 (void) force_word(PL_oldbufptr + tboffset, WORD,
7829                                   FALSE, TRUE, TRUE);
7830 #endif
7831                 if (key == KEY_my)
7832                     TOKEN(MYSUB);
7833                 TOKEN(SUB);
7834             }
7835
7836         case KEY_system:
7837             LOP(OP_SYSTEM,XREF);
7838
7839         case KEY_symlink:
7840             LOP(OP_SYMLINK,XTERM);
7841
7842         case KEY_syscall:
7843             LOP(OP_SYSCALL,XTERM);
7844
7845         case KEY_sysopen:
7846             LOP(OP_SYSOPEN,XTERM);
7847
7848         case KEY_sysseek:
7849             LOP(OP_SYSSEEK,XTERM);
7850
7851         case KEY_sysread:
7852             LOP(OP_SYSREAD,XTERM);
7853
7854         case KEY_syswrite:
7855             LOP(OP_SYSWRITE,XTERM);
7856
7857         case KEY_tr:
7858             s = scan_trans(s);
7859             TERM(sublex_start());
7860
7861         case KEY_tell:
7862             UNI(OP_TELL);
7863
7864         case KEY_telldir:
7865             UNI(OP_TELLDIR);
7866
7867         case KEY_tie:
7868             LOP(OP_TIE,XTERM);
7869
7870         case KEY_tied:
7871             UNI(OP_TIED);
7872
7873         case KEY_time:
7874             FUN0(OP_TIME);
7875
7876         case KEY_times:
7877             FUN0(OP_TMS);
7878
7879         case KEY_truncate:
7880             LOP(OP_TRUNCATE,XTERM);
7881
7882         case KEY_uc:
7883             UNI(OP_UC);
7884
7885         case KEY_ucfirst:
7886             UNI(OP_UCFIRST);
7887
7888         case KEY_untie:
7889             UNI(OP_UNTIE);
7890
7891         case KEY_until:
7892             pl_yylval.ival = CopLINE(PL_curcop);
7893             OPERATOR(UNTIL);
7894
7895         case KEY_unless:
7896             pl_yylval.ival = CopLINE(PL_curcop);
7897             OPERATOR(UNLESS);
7898
7899         case KEY_unlink:
7900             LOP(OP_UNLINK,XTERM);
7901
7902         case KEY_undef:
7903             UNIDOR(OP_UNDEF);
7904
7905         case KEY_unpack:
7906             LOP(OP_UNPACK,XTERM);
7907
7908         case KEY_utime:
7909             LOP(OP_UTIME,XTERM);
7910
7911         case KEY_umask:
7912             UNIDOR(OP_UMASK);
7913
7914         case KEY_unshift:
7915             LOP(OP_UNSHIFT,XTERM);
7916
7917         case KEY_use:
7918             s = tokenize_use(1, s);
7919             OPERATOR(USE);
7920
7921         case KEY_values:
7922             UNI(OP_VALUES);
7923
7924         case KEY_vec:
7925             LOP(OP_VEC,XTERM);
7926
7927         case KEY_when:
7928             pl_yylval.ival = CopLINE(PL_curcop);
7929             OPERATOR(WHEN);
7930
7931         case KEY_while:
7932             pl_yylval.ival = CopLINE(PL_curcop);
7933             OPERATOR(WHILE);
7934
7935         case KEY_warn:
7936             PL_hints |= HINT_BLOCK_SCOPE;
7937             LOP(OP_WARN,XTERM);
7938
7939         case KEY_wait:
7940             FUN0(OP_WAIT);
7941
7942         case KEY_waitpid:
7943             LOP(OP_WAITPID,XTERM);
7944
7945         case KEY_wantarray:
7946             FUN0(OP_WANTARRAY);
7947
7948         case KEY_write:
7949 #ifdef EBCDIC
7950         {
7951             char ctl_l[2];
7952             ctl_l[0] = toCTRL('L');
7953             ctl_l[1] = '\0';
7954             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7955         }
7956 #else
7957             /* Make sure $^L is defined */
7958             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7959 #endif
7960             UNI(OP_ENTERWRITE);
7961
7962         case KEY_x:
7963             if (PL_expect == XOPERATOR)
7964                 Mop(OP_REPEAT);
7965             check_uni();
7966             goto just_a_word;
7967
7968         case KEY_xor:
7969             pl_yylval.ival = OP_XOR;
7970             OPERATOR(OROP);
7971
7972         case KEY_y:
7973             s = scan_trans(s);
7974             TERM(sublex_start());
7975         }
7976     }}
7977 }
7978 #ifdef __SC__
7979 #pragma segment Main
7980 #endif
7981
7982 static int
7983 S_pending_ident(pTHX)
7984 {
7985     dVAR;
7986     register char *d;
7987     PADOFFSET tmp = 0;
7988     /* pit holds the identifier we read and pending_ident is reset */
7989     char pit = PL_pending_ident;
7990     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7991     /* All routes through this function want to know if there is a colon.  */
7992     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7993     PL_pending_ident = 0;
7994
7995     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7996     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7997           "### Pending identifier '%s'\n", PL_tokenbuf); });
7998
7999     /* if we're in a my(), we can't allow dynamics here.
8000        $foo'bar has already been turned into $foo::bar, so
8001        just check for colons.
8002
8003        if it's a legal name, the OP is a PADANY.
8004     */
8005     if (PL_in_my) {
8006         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8007             if (has_colon)
8008                 yyerror(Perl_form(aTHX_ "No package name allowed for "
8009                                   "variable %s in \"our\"",
8010                                   PL_tokenbuf));
8011             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8012         }
8013         else {
8014             if (has_colon)
8015                 yyerror(Perl_form(aTHX_ PL_no_myglob,
8016                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8017
8018             pl_yylval.opval = newOP(OP_PADANY, 0);
8019             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8020             return PRIVATEREF;
8021         }
8022     }
8023
8024     /*
8025        build the ops for accesses to a my() variable.
8026
8027        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8028        then used in a comparison.  This catches most, but not
8029        all cases.  For instance, it catches
8030            sort { my($a); $a <=> $b }
8031        but not
8032            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8033        (although why you'd do that is anyone's guess).
8034     */
8035
8036     if (!has_colon) {
8037         if (!PL_in_my)
8038             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8039         if (tmp != NOT_IN_PAD) {
8040             /* might be an "our" variable" */
8041             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8042                 /* build ops for a bareword */
8043                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8044                 HEK * const stashname = HvNAME_HEK(stash);
8045                 SV *  const sym = newSVhek(stashname);
8046                 sv_catpvs(sym, "::");
8047                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8048                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8049                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8050                 gv_fetchsv(sym,
8051                     (PL_in_eval
8052                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8053                         : GV_ADDMULTI
8054                     ),
8055                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8056                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8057                      : SVt_PVHV));
8058                 return WORD;
8059             }
8060
8061             /* if it's a sort block and they're naming $a or $b */
8062             if (PL_last_lop_op == OP_SORT &&
8063                 PL_tokenbuf[0] == '$' &&
8064                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8065                 && !PL_tokenbuf[2])
8066             {
8067                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8068                      d < PL_bufend && *d != '\n';
8069                      d++)
8070                 {
8071                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8072                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8073                               PL_tokenbuf);
8074                     }
8075                 }
8076             }
8077
8078             pl_yylval.opval = newOP(OP_PADANY, 0);
8079             pl_yylval.opval->op_targ = tmp;
8080             return PRIVATEREF;
8081         }
8082     }
8083
8084     /*
8085        Whine if they've said @foo in a doublequoted string,
8086        and @foo isn't a variable we can find in the symbol
8087        table.
8088     */
8089     if (ckWARN(WARN_AMBIGUOUS) &&
8090         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8091         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8092                                          SVt_PVAV);
8093         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8094                 /* DO NOT warn for @- and @+ */
8095                 && !( PL_tokenbuf[2] == '\0' &&
8096                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8097            )
8098         {
8099             /* Downgraded from fatal to warning 20000522 mjd */
8100             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8101                         "Possible unintended interpolation of %s in string",
8102                         PL_tokenbuf);
8103         }
8104     }
8105
8106     /* build ops for a bareword */
8107     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8108                                                       tokenbuf_len - 1));
8109     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8110     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8111                      PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8112                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8113                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8114                       : SVt_PVHV));
8115     return WORD;
8116 }
8117
8118 /*
8119  *  The following code was generated by perl_keyword.pl.
8120  */
8121
8122 I32
8123 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
8124 {
8125     dVAR;
8126
8127     PERL_ARGS_ASSERT_KEYWORD;
8128
8129   switch (len)
8130   {
8131     case 1: /* 5 tokens of length 1 */
8132       switch (name[0])
8133       {
8134         case 'm':
8135           {                                       /* m          */
8136             return KEY_m;
8137           }
8138
8139         case 'q':
8140           {                                       /* q          */
8141             return KEY_q;
8142           }
8143
8144         case 's':
8145           {                                       /* s          */
8146             return KEY_s;
8147           }
8148
8149         case 'x':
8150           {                                       /* x          */
8151             return -KEY_x;
8152           }
8153
8154         case 'y':
8155           {                                       /* y          */
8156             return KEY_y;
8157           }
8158
8159         default:
8160           goto unknown;
8161       }
8162
8163     case 2: /* 18 tokens of length 2 */
8164       switch (name[0])
8165       {
8166         case 'd':
8167           if (name[1] == 'o')
8168           {                                       /* do         */
8169             return KEY_do;
8170           }
8171
8172           goto unknown;
8173
8174         case 'e':
8175           if (name[1] == 'q')
8176           {                                       /* eq         */
8177             return -KEY_eq;
8178           }
8179
8180           goto unknown;
8181
8182         case 'g':
8183           switch (name[1])
8184           {
8185             case 'e':
8186               {                                   /* ge         */
8187                 return -KEY_ge;
8188               }
8189
8190             case 't':
8191               {                                   /* gt         */
8192                 return -KEY_gt;
8193               }
8194
8195             default:
8196               goto unknown;
8197           }
8198
8199         case 'i':
8200           if (name[1] == 'f')
8201           {                                       /* if         */
8202             return KEY_if;
8203           }
8204
8205           goto unknown;
8206
8207         case 'l':
8208           switch (name[1])
8209           {
8210             case 'c':
8211               {                                   /* lc         */
8212                 return -KEY_lc;
8213               }
8214
8215             case 'e':
8216               {                                   /* le         */
8217                 return -KEY_le;
8218               }
8219
8220             case 't':
8221               {                                   /* lt         */
8222                 return -KEY_lt;
8223               }
8224
8225             default:
8226               goto unknown;
8227           }
8228
8229         case 'm':
8230           if (name[1] == 'y')
8231           {                                       /* my         */
8232             return KEY_my;
8233           }
8234
8235           goto unknown;
8236
8237         case 'n':
8238           switch (name[1])
8239           {
8240             case 'e':
8241               {                                   /* ne         */
8242                 return -KEY_ne;
8243               }
8244
8245             case 'o':
8246               {                                   /* no         */
8247                 return KEY_no;
8248               }
8249
8250             default:
8251               goto unknown;
8252           }
8253
8254         case 'o':
8255           if (name[1] == 'r')
8256           {                                       /* or         */
8257             return -KEY_or;
8258           }
8259
8260           goto unknown;
8261
8262         case 'q':
8263           switch (name[1])
8264           {
8265             case 'q':
8266               {                                   /* qq         */
8267                 return KEY_qq;
8268               }
8269
8270             case 'r':
8271               {                                   /* qr         */
8272                 return KEY_qr;
8273               }
8274
8275             case 'w':
8276               {                                   /* qw         */
8277                 return KEY_qw;
8278               }
8279
8280             case 'x':
8281               {                                   /* qx         */
8282                 return KEY_qx;
8283               }
8284
8285             default:
8286               goto unknown;
8287           }
8288
8289         case 't':
8290           if (name[1] == 'r')
8291           {                                       /* tr         */
8292             return KEY_tr;
8293           }
8294
8295           goto unknown;
8296
8297         case 'u':
8298           if (name[1] == 'c')
8299           {                                       /* uc         */
8300             return -KEY_uc;
8301           }
8302
8303           goto unknown;
8304
8305         default:
8306           goto unknown;
8307       }
8308
8309     case 3: /* 29 tokens of length 3 */
8310       switch (name[0])
8311       {
8312         case 'E':
8313           if (name[1] == 'N' &&
8314               name[2] == 'D')
8315           {                                       /* END        */
8316             return KEY_END;
8317           }
8318
8319           goto unknown;
8320
8321         case 'a':
8322           switch (name[1])
8323           {
8324             case 'b':
8325               if (name[2] == 's')
8326               {                                   /* abs        */
8327                 return -KEY_abs;
8328               }
8329
8330               goto unknown;
8331
8332             case 'n':
8333               if (name[2] == 'd')
8334               {                                   /* and        */
8335                 return -KEY_and;
8336               }
8337
8338               goto unknown;
8339
8340             default:
8341               goto unknown;
8342           }
8343
8344         case 'c':
8345           switch (name[1])
8346           {
8347             case 'h':
8348               if (name[2] == 'r')
8349               {                                   /* chr        */
8350                 return -KEY_chr;
8351               }
8352
8353               goto unknown;
8354
8355             case 'm':
8356               if (name[2] == 'p')
8357               {                                   /* cmp        */
8358                 return -KEY_cmp;
8359               }
8360
8361               goto unknown;
8362
8363             case 'o':
8364               if (name[2] == 's')
8365               {                                   /* cos        */
8366                 return -KEY_cos;
8367               }
8368
8369               goto unknown;
8370
8371             default:
8372               goto unknown;
8373           }
8374
8375         case 'd':
8376           if (name[1] == 'i' &&
8377               name[2] == 'e')
8378           {                                       /* die        */
8379             return -KEY_die;
8380           }
8381
8382           goto unknown;
8383
8384         case 'e':
8385           switch (name[1])
8386           {
8387             case 'o':
8388               if (name[2] == 'f')
8389               {                                   /* eof        */
8390                 return -KEY_eof;
8391               }
8392
8393               goto unknown;
8394
8395             case 'x':
8396               if (name[2] == 'p')
8397               {                                   /* exp        */
8398                 return -KEY_exp;
8399               }
8400
8401               goto unknown;
8402
8403             default:
8404               goto unknown;
8405           }
8406
8407         case 'f':
8408           if (name[1] == 'o' &&
8409               name[2] == 'r')
8410           {                                       /* for        */
8411             return KEY_for;
8412           }
8413
8414           goto unknown;
8415
8416         case 'h':
8417           if (name[1] == 'e' &&
8418               name[2] == 'x')
8419           {                                       /* hex        */
8420             return -KEY_hex;
8421           }
8422
8423           goto unknown;
8424
8425         case 'i':
8426           if (name[1] == 'n' &&
8427               name[2] == 't')
8428           {                                       /* int        */
8429             return -KEY_int;
8430           }
8431
8432           goto unknown;
8433
8434         case 'l':
8435           if (name[1] == 'o' &&
8436               name[2] == 'g')
8437           {                                       /* log        */
8438             return -KEY_log;
8439           }
8440
8441           goto unknown;
8442
8443         case 'm':
8444           if (name[1] == 'a' &&
8445               name[2] == 'p')
8446           {                                       /* map        */
8447             return KEY_map;
8448           }
8449
8450           goto unknown;
8451
8452         case 'n':
8453           if (name[1] == 'o' &&
8454               name[2] == 't')
8455           {                                       /* not        */
8456             return -KEY_not;
8457           }
8458
8459           goto unknown;
8460
8461         case 'o':
8462           switch (name[1])
8463           {
8464             case 'c':
8465               if (name[2] == 't')
8466               {                                   /* oct        */
8467                 return -KEY_oct;
8468               }
8469
8470               goto unknown;
8471
8472             case 'r':
8473               if (name[2] == 'd')
8474               {                                   /* ord        */
8475                 return -KEY_ord;
8476               }
8477
8478               goto unknown;
8479
8480             case 'u':
8481               if (name[2] == 'r')
8482               {                                   /* our        */
8483                 return KEY_our;
8484               }
8485
8486               goto unknown;
8487
8488             default:
8489               goto unknown;
8490           }
8491
8492         case 'p':
8493           if (name[1] == 'o')
8494           {
8495             switch (name[2])
8496             {
8497               case 'p':
8498                 {                                 /* pop        */
8499                   return -KEY_pop;
8500                 }
8501
8502               case 's':
8503                 {                                 /* pos        */
8504                   return KEY_pos;
8505                 }
8506
8507               default:
8508                 goto unknown;
8509             }
8510           }
8511
8512           goto unknown;
8513
8514         case 'r':
8515           if (name[1] == 'e' &&
8516               name[2] == 'f')
8517           {                                       /* ref        */
8518             return -KEY_ref;
8519           }
8520
8521           goto unknown;
8522
8523         case 's':
8524           switch (name[1])
8525           {
8526             case 'a':
8527               if (name[2] == 'y')
8528               {                                   /* say        */
8529                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8530               }
8531
8532               goto unknown;
8533
8534             case 'i':
8535               if (name[2] == 'n')
8536               {                                   /* sin        */
8537                 return -KEY_sin;
8538               }
8539
8540               goto unknown;
8541
8542             case 'u':
8543               if (name[2] == 'b')
8544               {                                   /* sub        */
8545                 return KEY_sub;
8546               }
8547
8548               goto unknown;
8549
8550             default:
8551               goto unknown;
8552           }
8553
8554         case 't':
8555           if (name[1] == 'i' &&
8556               name[2] == 'e')
8557           {                                       /* tie        */
8558             return -KEY_tie;
8559           }
8560
8561           goto unknown;
8562
8563         case 'u':
8564           if (name[1] == 's' &&
8565               name[2] == 'e')
8566           {                                       /* use        */
8567             return KEY_use;
8568           }
8569
8570           goto unknown;
8571
8572         case 'v':
8573           if (name[1] == 'e' &&
8574               name[2] == 'c')
8575           {                                       /* vec        */
8576             return -KEY_vec;
8577           }
8578
8579           goto unknown;
8580
8581         case 'x':
8582           if (name[1] == 'o' &&
8583               name[2] == 'r')
8584           {                                       /* xor        */
8585             return -KEY_xor;
8586           }
8587
8588           goto unknown;
8589
8590         default:
8591           goto unknown;
8592       }
8593
8594     case 4: /* 41 tokens of length 4 */
8595       switch (name[0])
8596       {
8597         case 'C':
8598           if (name[1] == 'O' &&
8599               name[2] == 'R' &&
8600               name[3] == 'E')
8601           {                                       /* CORE       */
8602             return -KEY_CORE;
8603           }
8604
8605           goto unknown;
8606
8607         case 'I':
8608           if (name[1] == 'N' &&
8609               name[2] == 'I' &&
8610               name[3] == 'T')
8611           {                                       /* INIT       */
8612             return KEY_INIT;
8613           }
8614
8615           goto unknown;
8616
8617         case 'b':
8618           if (name[1] == 'i' &&
8619               name[2] == 'n' &&
8620               name[3] == 'd')
8621           {                                       /* bind       */
8622             return -KEY_bind;
8623           }
8624
8625           goto unknown;
8626
8627         case 'c':
8628           if (name[1] == 'h' &&
8629               name[2] == 'o' &&
8630               name[3] == 'p')
8631           {                                       /* chop       */
8632             return -KEY_chop;
8633           }
8634
8635           goto unknown;
8636
8637         case 'd':
8638           if (name[1] == 'u' &&
8639               name[2] == 'm' &&
8640               name[3] == 'p')
8641           {                                       /* dump       */
8642             return -KEY_dump;
8643           }
8644
8645           goto unknown;
8646
8647         case 'e':
8648           switch (name[1])
8649           {
8650             case 'a':
8651               if (name[2] == 'c' &&
8652                   name[3] == 'h')
8653               {                                   /* each       */
8654                 return -KEY_each;
8655               }
8656
8657               goto unknown;
8658
8659             case 'l':
8660               if (name[2] == 's' &&
8661                   name[3] == 'e')
8662               {                                   /* else       */
8663                 return KEY_else;
8664               }
8665
8666               goto unknown;
8667
8668             case 'v':
8669               if (name[2] == 'a' &&
8670                   name[3] == 'l')
8671               {                                   /* eval       */
8672                 return KEY_eval;
8673               }
8674
8675               goto unknown;
8676
8677             case 'x':
8678               switch (name[2])
8679               {
8680                 case 'e':
8681                   if (name[3] == 'c')
8682                   {                               /* exec       */
8683                     return -KEY_exec;
8684                   }
8685
8686                   goto unknown;
8687
8688                 case 'i':
8689                   if (name[3] == 't')
8690                   {                               /* exit       */
8691                     return -KEY_exit;
8692                   }
8693
8694                   goto unknown;
8695
8696                 default:
8697                   goto unknown;
8698               }
8699
8700             default:
8701               goto unknown;
8702           }
8703
8704         case 'f':
8705           if (name[1] == 'o' &&
8706               name[2] == 'r' &&
8707               name[3] == 'k')
8708           {                                       /* fork       */
8709             return -KEY_fork;
8710           }
8711
8712           goto unknown;
8713
8714         case 'g':
8715           switch (name[1])
8716           {
8717             case 'e':
8718               if (name[2] == 't' &&
8719                   name[3] == 'c')
8720               {                                   /* getc       */
8721                 return -KEY_getc;
8722               }
8723
8724               goto unknown;
8725
8726             case 'l':
8727               if (name[2] == 'o' &&
8728                   name[3] == 'b')
8729               {                                   /* glob       */
8730                 return KEY_glob;
8731               }
8732
8733               goto unknown;
8734
8735             case 'o':
8736               if (name[2] == 't' &&
8737                   name[3] == 'o')
8738               {                                   /* goto       */
8739                 return KEY_goto;
8740               }
8741
8742               goto unknown;
8743
8744             case 'r':
8745               if (name[2] == 'e' &&
8746                   name[3] == 'p')
8747               {                                   /* grep       */
8748                 return KEY_grep;
8749               }
8750
8751               goto unknown;
8752
8753             default:
8754               goto unknown;
8755           }
8756
8757         case 'j':
8758           if (name[1] == 'o' &&
8759               name[2] == 'i' &&
8760               name[3] == 'n')
8761           {                                       /* join       */
8762             return -KEY_join;
8763           }
8764
8765           goto unknown;
8766
8767         case 'k':
8768           switch (name[1])
8769           {
8770             case 'e':
8771               if (name[2] == 'y' &&
8772                   name[3] == 's')
8773               {                                   /* keys       */
8774                 return -KEY_keys;
8775               }
8776
8777               goto unknown;
8778
8779             case 'i':
8780               if (name[2] == 'l' &&
8781                   name[3] == 'l')
8782               {                                   /* kill       */
8783                 return -KEY_kill;
8784               }
8785
8786               goto unknown;
8787
8788             default:
8789               goto unknown;
8790           }
8791
8792         case 'l':
8793           switch (name[1])
8794           {
8795             case 'a':
8796               if (name[2] == 's' &&
8797                   name[3] == 't')
8798               {                                   /* last       */
8799                 return KEY_last;
8800               }
8801
8802               goto unknown;
8803
8804             case 'i':
8805               if (name[2] == 'n' &&
8806                   name[3] == 'k')
8807               {                                   /* link       */
8808                 return -KEY_link;
8809               }
8810
8811               goto unknown;
8812
8813             case 'o':
8814               if (name[2] == 'c' &&
8815                   name[3] == 'k')
8816               {                                   /* lock       */
8817                 return -KEY_lock;
8818               }
8819
8820               goto unknown;
8821
8822             default:
8823               goto unknown;
8824           }
8825
8826         case 'n':
8827           if (name[1] == 'e' &&
8828               name[2] == 'x' &&
8829               name[3] == 't')
8830           {                                       /* next       */
8831             return KEY_next;
8832           }
8833
8834           goto unknown;
8835
8836         case 'o':
8837           if (name[1] == 'p' &&
8838               name[2] == 'e' &&
8839               name[3] == 'n')
8840           {                                       /* open       */
8841             return -KEY_open;
8842           }
8843
8844           goto unknown;
8845
8846         case 'p':
8847           switch (name[1])
8848           {
8849             case 'a':
8850               if (name[2] == 'c' &&
8851                   name[3] == 'k')
8852               {                                   /* pack       */
8853                 return -KEY_pack;
8854               }
8855
8856               goto unknown;
8857
8858             case 'i':
8859               if (name[2] == 'p' &&
8860                   name[3] == 'e')
8861               {                                   /* pipe       */
8862                 return -KEY_pipe;
8863               }
8864
8865               goto unknown;
8866
8867             case 'u':
8868               if (name[2] == 's' &&
8869                   name[3] == 'h')
8870               {                                   /* push       */
8871                 return -KEY_push;
8872               }
8873
8874               goto unknown;
8875
8876             default:
8877               goto unknown;
8878           }
8879
8880         case 'r':
8881           switch (name[1])
8882           {
8883             case 'a':
8884               if (name[2] == 'n' &&
8885                   name[3] == 'd')
8886               {                                   /* rand       */
8887                 return -KEY_rand;
8888               }
8889
8890               goto unknown;
8891
8892             case 'e':
8893               switch (name[2])
8894               {
8895                 case 'a':
8896                   if (name[3] == 'd')
8897                   {                               /* read       */
8898                     return -KEY_read;
8899                   }
8900
8901                   goto unknown;
8902
8903                 case 'c':
8904                   if (name[3] == 'v')
8905                   {                               /* recv       */
8906                     return -KEY_recv;
8907                   }
8908
8909                   goto unknown;
8910
8911                 case 'd':
8912                   if (name[3] == 'o')
8913                   {                               /* redo       */
8914                     return KEY_redo;
8915                   }
8916
8917                   goto unknown;
8918
8919                 default:
8920                   goto unknown;
8921               }
8922
8923             default:
8924               goto unknown;
8925           }
8926
8927         case 's':
8928           switch (name[1])
8929           {
8930             case 'e':
8931               switch (name[2])
8932               {
8933                 case 'e':
8934                   if (name[3] == 'k')
8935                   {                               /* seek       */
8936                     return -KEY_seek;
8937                   }
8938
8939                   goto unknown;
8940
8941                 case 'n':
8942                   if (name[3] == 'd')
8943                   {                               /* send       */
8944                     return -KEY_send;
8945                   }
8946
8947                   goto unknown;
8948
8949                 default:
8950                   goto unknown;
8951               }
8952
8953             case 'o':
8954               if (name[2] == 'r' &&
8955                   name[3] == 't')
8956               {                                   /* sort       */
8957                 return KEY_sort;
8958               }
8959
8960               goto unknown;
8961
8962             case 'q':
8963               if (name[2] == 'r' &&
8964                   name[3] == 't')
8965               {                                   /* sqrt       */
8966                 return -KEY_sqrt;
8967               }
8968
8969               goto unknown;
8970
8971             case 't':
8972               if (name[2] == 'a' &&
8973                   name[3] == 't')
8974               {                                   /* stat       */
8975                 return -KEY_stat;
8976               }
8977
8978               goto unknown;
8979
8980             default:
8981               goto unknown;
8982           }
8983
8984         case 't':
8985           switch (name[1])
8986           {
8987             case 'e':
8988               if (name[2] == 'l' &&
8989                   name[3] == 'l')
8990               {                                   /* tell       */
8991                 return -KEY_tell;
8992               }
8993
8994               goto unknown;
8995
8996             case 'i':
8997               switch (name[2])
8998               {
8999                 case 'e':
9000                   if (name[3] == 'd')
9001                   {                               /* tied       */
9002                     return -KEY_tied;
9003                   }
9004
9005                   goto unknown;
9006
9007                 case 'm':
9008                   if (name[3] == 'e')
9009                   {                               /* time       */
9010                     return -KEY_time;
9011                   }
9012
9013                   goto unknown;
9014
9015                 default:
9016                   goto unknown;
9017               }
9018
9019             default:
9020               goto unknown;
9021           }
9022
9023         case 'w':
9024           switch (name[1])
9025           {
9026             case 'a':
9027               switch (name[2])
9028               {
9029                 case 'i':
9030                   if (name[3] == 't')
9031                   {                               /* wait       */
9032                     return -KEY_wait;
9033                   }
9034
9035                   goto unknown;
9036
9037                 case 'r':
9038                   if (name[3] == 'n')
9039                   {                               /* warn       */
9040                     return -KEY_warn;
9041                   }
9042
9043                   goto unknown;
9044
9045                 default:
9046                   goto unknown;
9047               }
9048
9049             case 'h':
9050               if (name[2] == 'e' &&
9051                   name[3] == 'n')
9052               {                                   /* when       */
9053                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
9054               }
9055
9056               goto unknown;
9057
9058             default:
9059               goto unknown;
9060           }
9061
9062         default:
9063           goto unknown;
9064       }
9065
9066     case 5: /* 39 tokens of length 5 */
9067       switch (name[0])
9068       {
9069         case 'B':
9070           if (name[1] == 'E' &&
9071               name[2] == 'G' &&
9072               name[3] == 'I' &&
9073               name[4] == 'N')
9074           {                                       /* BEGIN      */
9075             return KEY_BEGIN;
9076           }
9077
9078           goto unknown;
9079
9080         case 'C':
9081           if (name[1] == 'H' &&
9082               name[2] == 'E' &&
9083               name[3] == 'C' &&
9084               name[4] == 'K')
9085           {                                       /* CHECK      */
9086             return KEY_CHECK;
9087           }
9088
9089           goto unknown;
9090
9091         case 'a':
9092           switch (name[1])
9093           {
9094             case 'l':
9095               if (name[2] == 'a' &&
9096                   name[3] == 'r' &&
9097                   name[4] == 'm')
9098               {                                   /* alarm      */
9099                 return -KEY_alarm;
9100               }
9101
9102               goto unknown;
9103
9104             case 't':
9105               if (name[2] == 'a' &&
9106                   name[3] == 'n' &&
9107                   name[4] == '2')
9108               {                                   /* atan2      */
9109                 return -KEY_atan2;
9110               }
9111
9112               goto unknown;
9113
9114             default:
9115               goto unknown;
9116           }
9117
9118         case 'b':
9119           switch (name[1])
9120           {
9121             case 'l':
9122               if (name[2] == 'e' &&
9123                   name[3] == 's' &&
9124                   name[4] == 's')
9125               {                                   /* bless      */
9126                 return -KEY_bless;
9127               }
9128
9129               goto unknown;
9130
9131             case 'r':
9132               if (name[2] == 'e' &&
9133                   name[3] == 'a' &&
9134                   name[4] == 'k')
9135               {                                   /* break      */
9136                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
9137               }
9138
9139               goto unknown;
9140
9141             default:
9142               goto unknown;
9143           }
9144
9145         case 'c':
9146           switch (name[1])
9147           {
9148             case 'h':
9149               switch (name[2])
9150               {
9151                 case 'd':
9152                   if (name[3] == 'i' &&
9153                       name[4] == 'r')
9154                   {                               /* chdir      */
9155                     return -KEY_chdir;
9156                   }
9157
9158                   goto unknown;
9159
9160                 case 'm':
9161                   if (name[3] == 'o' &&
9162                       name[4] == 'd')
9163                   {                               /* chmod      */
9164                     return -KEY_chmod;
9165                   }
9166
9167                   goto unknown;
9168
9169                 case 'o':
9170                   switch (name[3])
9171                   {
9172                     case 'm':
9173                       if (name[4] == 'p')
9174                       {                           /* chomp      */
9175                         return -KEY_chomp;
9176                       }
9177
9178                       goto unknown;
9179
9180                     case 'w':
9181                       if (name[4] == 'n')
9182                       {                           /* chown      */
9183                         return -KEY_chown;
9184                       }
9185
9186                       goto unknown;
9187
9188                     default:
9189                       goto unknown;
9190                   }
9191
9192                 default:
9193                   goto unknown;
9194               }
9195
9196             case 'l':
9197               if (name[2] == 'o' &&
9198                   name[3] == 's' &&
9199                   name[4] == 'e')
9200               {                                   /* close      */
9201                 return -KEY_close;
9202               }
9203
9204               goto unknown;
9205
9206             case 'r':
9207               if (name[2] == 'y' &&
9208                   name[3] == 'p' &&
9209                   name[4] == 't')
9210               {                                   /* crypt      */
9211                 return -KEY_crypt;
9212               }
9213
9214               goto unknown;
9215
9216             default:
9217               goto unknown;
9218           }
9219
9220         case 'e':
9221           if (name[1] == 'l' &&
9222               name[2] == 's' &&
9223               name[3] == 'i' &&
9224               name[4] == 'f')
9225           {                                       /* elsif      */
9226             return KEY_elsif;
9227           }
9228
9229           goto unknown;
9230
9231         case 'f':
9232           switch (name[1])
9233           {
9234             case 'c':
9235               if (name[2] == 'n' &&
9236                   name[3] == 't' &&
9237                   name[4] == 'l')
9238               {                                   /* fcntl      */
9239                 return -KEY_fcntl;
9240               }
9241
9242               goto unknown;
9243
9244             case 'l':
9245               if (name[2] == 'o' &&
9246                   name[3] == 'c' &&
9247                   name[4] == 'k')
9248               {                                   /* flock      */
9249                 return -KEY_flock;
9250               }
9251
9252               goto unknown;
9253
9254             default:
9255               goto unknown;
9256           }
9257
9258         case 'g':
9259           if (name[1] == 'i' &&
9260               name[2] == 'v' &&
9261               name[3] == 'e' &&
9262               name[4] == 'n')
9263           {                                       /* given      */
9264             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
9265           }
9266
9267           goto unknown;
9268
9269         case 'i':
9270           switch (name[1])
9271           {
9272             case 'n':
9273               if (name[2] == 'd' &&
9274                   name[3] == 'e' &&
9275                   name[4] == 'x')
9276               {                                   /* index      */
9277                 return -KEY_index;
9278               }
9279
9280               goto unknown;
9281
9282             case 'o':
9283               if (name[2] == 'c' &&
9284                   name[3] == 't' &&
9285                   name[4] == 'l')
9286               {                                   /* ioctl      */
9287                 return -KEY_ioctl;
9288               }
9289
9290               goto unknown;
9291
9292             default:
9293               goto unknown;
9294           }
9295
9296         case 'l':
9297           switch (name[1])
9298           {
9299             case 'o':
9300               if (name[2] == 'c' &&
9301                   name[3] == 'a' &&
9302                   name[4] == 'l')
9303               {                                   /* local      */
9304                 return KEY_local;
9305               }
9306
9307               goto unknown;
9308
9309             case 's':
9310               if (name[2] == 't' &&
9311                   name[3] == 'a' &&
9312                   name[4] == 't')
9313               {                                   /* lstat      */
9314                 return -KEY_lstat;
9315               }
9316
9317               goto unknown;
9318
9319             default:
9320               goto unknown;
9321           }
9322
9323         case 'm':
9324           if (name[1] == 'k' &&
9325               name[2] == 'd' &&
9326               name[3] == 'i' &&
9327               name[4] == 'r')
9328           {                                       /* mkdir      */
9329             return -KEY_mkdir;
9330           }
9331
9332           goto unknown;
9333
9334         case 'p':
9335           if (name[1] == 'r' &&
9336               name[2] == 'i' &&
9337               name[3] == 'n' &&
9338               name[4] == 't')
9339           {                                       /* print      */
9340             return KEY_print;
9341           }
9342
9343           goto unknown;
9344
9345         case 'r':
9346           switch (name[1])
9347           {
9348             case 'e':
9349               if (name[2] == 's' &&
9350                   name[3] == 'e' &&
9351                   name[4] == 't')
9352               {                                   /* reset      */
9353                 return -KEY_reset;
9354               }
9355
9356               goto unknown;
9357
9358             case 'm':
9359               if (name[2] == 'd' &&
9360                   name[3] == 'i' &&
9361                   name[4] == 'r')
9362               {                                   /* rmdir      */
9363                 return -KEY_rmdir;
9364               }
9365
9366               goto unknown;
9367
9368             default:
9369               goto unknown;
9370           }
9371
9372         case 's':
9373           switch (name[1])
9374           {
9375             case 'e':
9376               if (name[2] == 'm' &&
9377                   name[3] == 'o' &&
9378                   name[4] == 'p')
9379               {                                   /* semop      */
9380                 return -KEY_semop;
9381               }
9382
9383               goto unknown;
9384
9385             case 'h':
9386               if (name[2] == 'i' &&
9387                   name[3] == 'f' &&
9388                   name[4] == 't')
9389               {                                   /* shift      */
9390                 return -KEY_shift;
9391               }
9392
9393               goto unknown;
9394
9395             case 'l':
9396               if (name[2] == 'e' &&
9397                   name[3] == 'e' &&
9398                   name[4] == 'p')
9399               {                                   /* sleep      */
9400                 return -KEY_sleep;
9401               }
9402
9403               goto unknown;
9404
9405             case 'p':
9406               if (name[2] == 'l' &&
9407                   name[3] == 'i' &&
9408                   name[4] == 't')
9409               {                                   /* split      */
9410                 return KEY_split;
9411               }
9412
9413               goto unknown;
9414
9415             case 'r':
9416               if (name[2] == 'a' &&
9417                   name[3] == 'n' &&
9418                   name[4] == 'd')
9419               {                                   /* srand      */
9420                 return -KEY_srand;
9421               }
9422
9423               goto unknown;
9424
9425             case 't':
9426               switch (name[2])
9427               {
9428                 case 'a':
9429                   if (name[3] == 't' &&
9430                       name[4] == 'e')
9431                   {                               /* state      */
9432                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9433                   }
9434
9435                   goto unknown;
9436
9437                 case 'u':
9438                   if (name[3] == 'd' &&
9439                       name[4] == 'y')
9440                   {                               /* study      */
9441                     return KEY_study;
9442                   }
9443
9444                   goto unknown;
9445
9446                 default:
9447                   goto unknown;
9448               }
9449
9450             default:
9451               goto unknown;
9452           }
9453
9454         case 't':
9455           if (name[1] == 'i' &&
9456               name[2] == 'm' &&
9457               name[3] == 'e' &&
9458               name[4] == 's')
9459           {                                       /* times      */
9460             return -KEY_times;
9461           }
9462
9463           goto unknown;
9464
9465         case 'u':
9466           switch (name[1])
9467           {
9468             case 'm':
9469               if (name[2] == 'a' &&
9470                   name[3] == 's' &&
9471                   name[4] == 'k')
9472               {                                   /* umask      */
9473                 return -KEY_umask;
9474               }
9475
9476               goto unknown;
9477
9478             case 'n':
9479               switch (name[2])
9480               {
9481                 case 'd':
9482                   if (name[3] == 'e' &&
9483                       name[4] == 'f')
9484                   {                               /* undef      */
9485                     return KEY_undef;
9486                   }
9487
9488                   goto unknown;
9489
9490                 case 't':
9491                   if (name[3] == 'i')
9492                   {
9493                     switch (name[4])
9494                     {
9495                       case 'e':
9496                         {                         /* untie      */
9497                           return -KEY_untie;
9498                         }
9499
9500                       case 'l':
9501                         {                         /* until      */
9502                           return KEY_until;
9503                         }
9504
9505                       default:
9506                         goto unknown;
9507                     }
9508                   }
9509
9510                   goto unknown;
9511
9512                 default:
9513                   goto unknown;
9514               }
9515
9516             case 't':
9517               if (name[2] == 'i' &&
9518                   name[3] == 'm' &&
9519                   name[4] == 'e')
9520               {                                   /* utime      */
9521                 return -KEY_utime;
9522               }
9523
9524               goto unknown;
9525
9526             default:
9527               goto unknown;
9528           }
9529
9530         case 'w':
9531           switch (name[1])
9532           {
9533             case 'h':
9534               if (name[2] == 'i' &&
9535                   name[3] == 'l' &&
9536                   name[4] == 'e')
9537               {                                   /* while      */
9538                 return KEY_while;
9539               }
9540
9541               goto unknown;
9542
9543             case 'r':
9544               if (name[2] == 'i' &&
9545                   name[3] == 't' &&
9546                   name[4] == 'e')
9547               {                                   /* write      */
9548                 return -KEY_write;
9549               }
9550
9551               goto unknown;
9552
9553             default:
9554               goto unknown;
9555           }
9556
9557         default:
9558           goto unknown;
9559       }
9560
9561     case 6: /* 33 tokens of length 6 */
9562       switch (name[0])
9563       {
9564         case 'a':
9565           if (name[1] == 'c' &&
9566               name[2] == 'c' &&
9567               name[3] == 'e' &&
9568               name[4] == 'p' &&
9569               name[5] == 't')
9570           {                                       /* accept     */
9571             return -KEY_accept;
9572           }
9573
9574           goto unknown;
9575
9576         case 'c':
9577           switch (name[1])
9578           {
9579             case 'a':
9580               if (name[2] == 'l' &&
9581                   name[3] == 'l' &&
9582                   name[4] == 'e' &&
9583                   name[5] == 'r')
9584               {                                   /* caller     */
9585                 return -KEY_caller;
9586               }
9587
9588               goto unknown;
9589
9590             case 'h':
9591               if (name[2] == 'r' &&
9592                   name[3] == 'o' &&
9593                   name[4] == 'o' &&
9594                   name[5] == 't')
9595               {                                   /* chroot     */
9596                 return -KEY_chroot;
9597               }
9598
9599               goto unknown;
9600
9601             default:
9602               goto unknown;
9603           }
9604
9605         case 'd':
9606           if (name[1] == 'e' &&
9607               name[2] == 'l' &&
9608               name[3] == 'e' &&
9609               name[4] == 't' &&
9610               name[5] == 'e')
9611           {                                       /* delete     */
9612             return KEY_delete;
9613           }
9614
9615           goto unknown;
9616
9617         case 'e':
9618           switch (name[1])
9619           {
9620             case 'l':
9621               if (name[2] == 's' &&
9622                   name[3] == 'e' &&
9623                   name[4] == 'i' &&
9624                   name[5] == 'f')
9625               {                                   /* elseif     */
9626                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9627               }
9628
9629               goto unknown;
9630
9631             case 'x':
9632               if (name[2] == 'i' &&
9633                   name[3] == 's' &&
9634                   name[4] == 't' &&
9635                   name[5] == 's')
9636               {                                   /* exists     */
9637                 return KEY_exists;
9638               }
9639
9640               goto unknown;
9641
9642             default:
9643               goto unknown;
9644           }
9645
9646         case 'f':
9647           switch (name[1])
9648           {
9649             case 'i':
9650               if (name[2] == 'l' &&
9651                   name[3] == 'e' &&
9652                   name[4] == 'n' &&
9653                   name[5] == 'o')
9654               {                                   /* fileno     */
9655                 return -KEY_fileno;
9656               }
9657
9658               goto unknown;
9659
9660             case 'o':
9661               if (name[2] == 'r' &&
9662                   name[3] == 'm' &&
9663                   name[4] == 'a' &&
9664                   name[5] == 't')
9665               {                                   /* format     */
9666                 return KEY_format;
9667               }
9668
9669               goto unknown;
9670
9671             default:
9672               goto unknown;
9673           }
9674
9675         case 'g':
9676           if (name[1] == 'm' &&
9677               name[2] == 't' &&
9678               name[3] == 'i' &&
9679               name[4] == 'm' &&
9680               name[5] == 'e')
9681           {                                       /* gmtime     */
9682             return -KEY_gmtime;
9683           }
9684
9685           goto unknown;
9686
9687         case 'l':
9688           switch (name[1])
9689           {
9690             case 'e':
9691               if (name[2] == 'n' &&
9692                   name[3] == 'g' &&
9693                   name[4] == 't' &&
9694                   name[5] == 'h')
9695               {                                   /* length     */
9696                 return -KEY_length;
9697               }
9698
9699               goto unknown;
9700
9701             case 'i':
9702               if (name[2] == 's' &&
9703                   name[3] == 't' &&
9704                   name[4] == 'e' &&
9705                   name[5] == 'n')
9706               {                                   /* listen     */
9707                 return -KEY_listen;
9708               }
9709
9710               goto unknown;
9711
9712             default:
9713               goto unknown;
9714           }
9715
9716         case 'm':
9717           if (name[1] == 's' &&
9718               name[2] == 'g')
9719           {
9720             switch (name[3])
9721             {
9722               case 'c':
9723                 if (name[4] == 't' &&
9724                     name[5] == 'l')
9725                 {                                 /* msgctl     */
9726                   return -KEY_msgctl;
9727                 }
9728
9729                 goto unknown;
9730
9731               case 'g':
9732                 if (name[4] == 'e' &&
9733                     name[5] == 't')
9734                 {                                 /* msgget     */
9735                   return -KEY_msgget;
9736                 }
9737
9738                 goto unknown;
9739
9740               case 'r':
9741                 if (name[4] == 'c' &&
9742                     name[5] == 'v')
9743                 {                                 /* msgrcv     */
9744                   return -KEY_msgrcv;
9745                 }
9746
9747                 goto unknown;
9748
9749               case 's':
9750                 if (name[4] == 'n' &&
9751                     name[5] == 'd')
9752                 {                                 /* msgsnd     */
9753                   return -KEY_msgsnd;
9754                 }
9755
9756                 goto unknown;
9757
9758               default:
9759                 goto unknown;
9760             }
9761           }
9762
9763           goto unknown;
9764
9765         case 'p':
9766           if (name[1] == 'r' &&
9767               name[2] == 'i' &&
9768               name[3] == 'n' &&
9769               name[4] == 't' &&
9770               name[5] == 'f')
9771           {                                       /* printf     */
9772             return KEY_printf;
9773           }
9774
9775           goto unknown;
9776
9777         case 'r':
9778           switch (name[1])
9779           {
9780             case 'e':
9781               switch (name[2])
9782               {
9783                 case 'n':
9784                   if (name[3] == 'a' &&
9785                       name[4] == 'm' &&
9786                       name[5] == 'e')
9787                   {                               /* rename     */
9788                     return -KEY_rename;
9789                   }
9790
9791                   goto unknown;
9792
9793                 case 't':
9794                   if (name[3] == 'u' &&
9795                       name[4] == 'r' &&
9796                       name[5] == 'n')
9797                   {                               /* return     */
9798                     return KEY_return;
9799                   }
9800
9801                   goto unknown;
9802
9803                 default:
9804                   goto unknown;
9805               }
9806
9807             case 'i':
9808               if (name[2] == 'n' &&
9809                   name[3] == 'd' &&
9810                   name[4] == 'e' &&
9811                   name[5] == 'x')
9812               {                                   /* rindex     */
9813                 return -KEY_rindex;
9814               }
9815
9816               goto unknown;
9817
9818             default:
9819               goto unknown;
9820           }
9821
9822         case 's':
9823           switch (name[1])
9824           {
9825             case 'c':
9826               if (name[2] == 'a' &&
9827                   name[3] == 'l' &&
9828                   name[4] == 'a' &&
9829                   name[5] == 'r')
9830               {                                   /* scalar     */
9831                 return KEY_scalar;
9832               }
9833
9834               goto unknown;
9835
9836             case 'e':
9837               switch (name[2])
9838               {
9839                 case 'l':
9840                   if (name[3] == 'e' &&
9841                       name[4] == 'c' &&
9842                       name[5] == 't')
9843                   {                               /* select     */
9844                     return -KEY_select;
9845                   }
9846
9847                   goto unknown;
9848
9849                 case 'm':
9850                   switch (name[3])
9851                   {
9852                     case 'c':
9853                       if (name[4] == 't' &&
9854                           name[5] == 'l')
9855                       {                           /* semctl     */
9856                         return -KEY_semctl;
9857                       }
9858
9859                       goto unknown;
9860
9861                     case 'g':
9862                       if (name[4] == 'e' &&
9863                           name[5] == 't')
9864                       {                           /* semget     */
9865                         return -KEY_semget;
9866                       }
9867
9868                       goto unknown;
9869
9870                     default:
9871                       goto unknown;
9872                   }
9873
9874                 default:
9875                   goto unknown;
9876               }
9877
9878             case 'h':
9879               if (name[2] == 'm')
9880               {
9881                 switch (name[3])
9882                 {
9883                   case 'c':
9884                     if (name[4] == 't' &&
9885                         name[5] == 'l')
9886                     {                             /* shmctl     */
9887                       return -KEY_shmctl;
9888                     }
9889
9890                     goto unknown;
9891
9892                   case 'g':
9893                     if (name[4] == 'e' &&
9894                         name[5] == 't')
9895                     {                             /* shmget     */
9896                       return -KEY_shmget;
9897                     }
9898
9899                     goto unknown;
9900
9901                   default:
9902                     goto unknown;
9903                 }
9904               }
9905
9906               goto unknown;
9907
9908             case 'o':
9909               if (name[2] == 'c' &&
9910                   name[3] == 'k' &&
9911                   name[4] == 'e' &&
9912                   name[5] == 't')
9913               {                                   /* socket     */
9914                 return -KEY_socket;
9915               }
9916
9917               goto unknown;
9918
9919             case 'p':
9920               if (name[2] == 'l' &&
9921                   name[3] == 'i' &&
9922                   name[4] == 'c' &&
9923                   name[5] == 'e')
9924               {                                   /* splice     */
9925                 return -KEY_splice;
9926               }
9927
9928               goto unknown;
9929
9930             case 'u':
9931               if (name[2] == 'b' &&
9932                   name[3] == 's' &&
9933                   name[4] == 't' &&
9934                   name[5] == 'r')
9935               {                                   /* substr     */
9936                 return -KEY_substr;
9937               }
9938
9939               goto unknown;
9940
9941             case 'y':
9942               if (name[2] == 's' &&
9943                   name[3] == 't' &&
9944                   name[4] == 'e' &&
9945                   name[5] == 'm')
9946               {                                   /* system     */
9947                 return -KEY_system;
9948               }
9949
9950               goto unknown;
9951
9952             default:
9953               goto unknown;
9954           }
9955
9956         case 'u':
9957           if (name[1] == 'n')
9958           {
9959             switch (name[2])
9960             {
9961               case 'l':
9962                 switch (name[3])
9963                 {
9964                   case 'e':
9965                     if (name[4] == 's' &&
9966                         name[5] == 's')
9967                     {                             /* unless     */
9968                       return KEY_unless;
9969                     }
9970
9971                     goto unknown;
9972
9973                   case 'i':
9974                     if (name[4] == 'n' &&
9975                         name[5] == 'k')
9976                     {                             /* unlink     */
9977                       return -KEY_unlink;
9978                     }
9979
9980                     goto unknown;
9981
9982                   default:
9983                     goto unknown;
9984                 }
9985
9986               case 'p':
9987                 if (name[3] == 'a' &&
9988                     name[4] == 'c' &&
9989                     name[5] == 'k')
9990                 {                                 /* unpack     */
9991                   return -KEY_unpack;
9992                 }
9993
9994                 goto unknown;
9995
9996               default:
9997                 goto unknown;
9998             }
9999           }
10000
10001           goto unknown;
10002
10003         case 'v':
10004           if (name[1] == 'a' &&
10005               name[2] == 'l' &&
10006               name[3] == 'u' &&
10007               name[4] == 'e' &&
10008               name[5] == 's')
10009           {                                       /* values     */
10010             return -KEY_values;
10011           }
10012
10013           goto unknown;
10014
10015         default:
10016           goto unknown;
10017       }
10018
10019     case 7: /* 29 tokens of length 7 */
10020       switch (name[0])
10021       {
10022         case 'D':
10023           if (name[1] == 'E' &&
10024               name[2] == 'S' &&
10025               name[3] == 'T' &&
10026               name[4] == 'R' &&
10027               name[5] == 'O' &&
10028               name[6] == 'Y')
10029           {                                       /* DESTROY    */
10030             return KEY_DESTROY;
10031           }
10032
10033           goto unknown;
10034
10035         case '_':
10036           if (name[1] == '_' &&
10037               name[2] == 'E' &&
10038               name[3] == 'N' &&
10039               name[4] == 'D' &&
10040               name[5] == '_' &&
10041               name[6] == '_')
10042           {                                       /* __END__    */
10043             return KEY___END__;
10044           }
10045
10046           goto unknown;
10047
10048         case 'b':
10049           if (name[1] == 'i' &&
10050               name[2] == 'n' &&
10051               name[3] == 'm' &&
10052               name[4] == 'o' &&
10053               name[5] == 'd' &&
10054               name[6] == 'e')
10055           {                                       /* binmode    */
10056             return -KEY_binmode;
10057           }
10058
10059           goto unknown;
10060
10061         case 'c':
10062           if (name[1] == 'o' &&
10063               name[2] == 'n' &&
10064               name[3] == 'n' &&
10065               name[4] == 'e' &&
10066               name[5] == 'c' &&
10067               name[6] == 't')
10068           {                                       /* connect    */
10069             return -KEY_connect;
10070           }
10071
10072           goto unknown;
10073
10074         case 'd':
10075           switch (name[1])
10076           {
10077             case 'b':
10078               if (name[2] == 'm' &&
10079                   name[3] == 'o' &&
10080                   name[4] == 'p' &&
10081                   name[5] == 'e' &&
10082                   name[6] == 'n')
10083               {                                   /* dbmopen    */
10084                 return -KEY_dbmopen;
10085               }
10086
10087               goto unknown;
10088
10089             case 'e':
10090               if (name[2] == 'f')
10091               {
10092                 switch (name[3])
10093                 {
10094                   case 'a':
10095                     if (name[4] == 'u' &&
10096                         name[5] == 'l' &&
10097                         name[6] == 't')
10098                     {                             /* default    */
10099                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
10100                     }
10101
10102                     goto unknown;
10103
10104                   case 'i':
10105                     if (name[4] == 'n' &&
10106                         name[5] == 'e' &&
10107                         name[6] == 'd')
10108                     {                             /* defined    */
10109                       return KEY_defined;
10110                     }
10111
10112                     goto unknown;
10113
10114                   default:
10115                     goto unknown;
10116                 }
10117               }
10118
10119               goto unknown;
10120
10121             default:
10122               goto unknown;
10123           }
10124
10125         case 'f':
10126           if (name[1] == 'o' &&
10127               name[2] == 'r' &&
10128               name[3] == 'e' &&
10129               name[4] == 'a' &&
10130               name[5] == 'c' &&
10131               name[6] == 'h')
10132           {                                       /* foreach    */
10133             return KEY_foreach;
10134           }
10135
10136           goto unknown;
10137
10138         case 'g':
10139           if (name[1] == 'e' &&
10140               name[2] == 't' &&
10141               name[3] == 'p')
10142           {
10143             switch (name[4])
10144             {
10145               case 'g':
10146                 if (name[5] == 'r' &&
10147                     name[6] == 'p')
10148                 {                                 /* getpgrp    */
10149                   return -KEY_getpgrp;
10150                 }
10151
10152                 goto unknown;
10153
10154               case 'p':
10155                 if (name[5] == 'i' &&
10156                     name[6] == 'd')
10157                 {                                 /* getppid    */
10158                   return -KEY_getppid;
10159                 }
10160
10161                 goto unknown;
10162
10163               default:
10164                 goto unknown;
10165             }
10166           }
10167
10168           goto unknown;
10169
10170         case 'l':
10171           if (name[1] == 'c' &&
10172               name[2] == 'f' &&
10173               name[3] == 'i' &&
10174               name[4] == 'r' &&
10175               name[5] == 's' &&
10176               name[6] == 't')
10177           {                                       /* lcfirst    */
10178             return -KEY_lcfirst;
10179           }
10180
10181           goto unknown;
10182
10183         case 'o':
10184           if (name[1] == 'p' &&
10185               name[2] == 'e' &&
10186               name[3] == 'n' &&
10187               name[4] == 'd' &&
10188               name[5] == 'i' &&
10189               name[6] == 'r')
10190           {                                       /* opendir    */
10191             return -KEY_opendir;
10192           }
10193
10194           goto unknown;
10195
10196         case 'p':
10197           if (name[1] == 'a' &&
10198               name[2] == 'c' &&
10199               name[3] == 'k' &&
10200               name[4] == 'a' &&
10201               name[5] == 'g' &&
10202               name[6] == 'e')
10203           {                                       /* package    */
10204             return KEY_package;
10205           }
10206
10207           goto unknown;
10208
10209         case 'r':
10210           if (name[1] == 'e')
10211           {
10212             switch (name[2])
10213             {
10214               case 'a':
10215                 if (name[3] == 'd' &&
10216                     name[4] == 'd' &&
10217                     name[5] == 'i' &&
10218                     name[6] == 'r')
10219                 {                                 /* readdir    */
10220                   return -KEY_readdir;
10221                 }
10222
10223                 goto unknown;
10224
10225               case 'q':
10226                 if (name[3] == 'u' &&
10227                     name[4] == 'i' &&
10228                     name[5] == 'r' &&
10229                     name[6] == 'e')
10230                 {                                 /* require    */
10231                   return KEY_require;
10232                 }
10233
10234                 goto unknown;
10235
10236               case 'v':
10237                 if (name[3] == 'e' &&
10238                     name[4] == 'r' &&
10239                     name[5] == 's' &&
10240                     name[6] == 'e')
10241                 {                                 /* reverse    */
10242                   return -KEY_reverse;
10243                 }
10244
10245                 goto unknown;
10246
10247               default:
10248                 goto unknown;
10249             }
10250           }
10251
10252           goto unknown;
10253
10254         case 's':
10255           switch (name[1])
10256           {
10257             case 'e':
10258               switch (name[2])
10259               {
10260                 case 'e':
10261                   if (name[3] == 'k' &&
10262                       name[4] == 'd' &&
10263                       name[5] == 'i' &&
10264                       name[6] == 'r')
10265                   {                               /* seekdir    */
10266                     return -KEY_seekdir;
10267                   }
10268
10269                   goto unknown;
10270
10271                 case 't':
10272                   if (name[3] == 'p' &&
10273                       name[4] == 'g' &&
10274                       name[5] == 'r' &&
10275                       name[6] == 'p')
10276                   {                               /* setpgrp    */
10277                     return -KEY_setpgrp;
10278                   }
10279
10280                   goto unknown;
10281
10282                 default:
10283                   goto unknown;
10284               }
10285
10286             case 'h':
10287               if (name[2] == 'm' &&
10288                   name[3] == 'r' &&
10289                   name[4] == 'e' &&
10290                   name[5] == 'a' &&
10291                   name[6] == 'd')
10292               {                                   /* shmread    */
10293                 return -KEY_shmread;
10294               }
10295
10296               goto unknown;
10297
10298             case 'p':
10299               if (name[2] == 'r' &&
10300                   name[3] == 'i' &&
10301                   name[4] == 'n' &&
10302                   name[5] == 't' &&
10303                   name[6] == 'f')
10304               {                                   /* sprintf    */
10305                 return -KEY_sprintf;
10306               }
10307
10308               goto unknown;
10309
10310             case 'y':
10311               switch (name[2])
10312               {
10313                 case 'm':
10314                   if (name[3] == 'l' &&
10315                       name[4] == 'i' &&
10316                       name[5] == 'n' &&
10317                       name[6] == 'k')
10318                   {                               /* symlink    */
10319                     return -KEY_symlink;
10320                   }
10321
10322                   goto unknown;
10323
10324                 case 's':
10325                   switch (name[3])
10326                   {
10327                     case 'c':
10328                       if (name[4] == 'a' &&
10329                           name[5] == 'l' &&
10330                           name[6] == 'l')
10331                       {                           /* syscall    */
10332                         return -KEY_syscall;
10333                       }
10334
10335                       goto unknown;
10336
10337                     case 'o':
10338                       if (name[4] == 'p' &&
10339                           name[5] == 'e' &&
10340                           name[6] == 'n')
10341                       {                           /* sysopen    */
10342                         return -KEY_sysopen;
10343                       }
10344
10345                       goto unknown;
10346
10347                     case 'r':
10348                       if (name[4] == 'e' &&
10349                           name[5] == 'a' &&
10350                           name[6] == 'd')
10351                       {                           /* sysread    */
10352                         return -KEY_sysread;
10353                       }
10354
10355                       goto unknown;
10356
10357                     case 's':
10358                       if (name[4] == 'e' &&
10359                           name[5] == 'e' &&
10360                           name[6] == 'k')
10361                       {                           /* sysseek    */
10362                         return -KEY_sysseek;
10363                       }
10364
10365                       goto unknown;
10366
10367                     default:
10368                       goto unknown;
10369                   }
10370
10371                 default:
10372                   goto unknown;
10373               }
10374
10375             default:
10376               goto unknown;
10377           }
10378
10379         case 't':
10380           if (name[1] == 'e' &&
10381               name[2] == 'l' &&
10382               name[3] == 'l' &&
10383               name[4] == 'd' &&
10384               name[5] == 'i' &&
10385               name[6] == 'r')
10386           {                                       /* telldir    */
10387             return -KEY_telldir;
10388           }
10389
10390           goto unknown;
10391
10392         case 'u':
10393           switch (name[1])
10394           {
10395             case 'c':
10396               if (name[2] == 'f' &&
10397                   name[3] == 'i' &&
10398                   name[4] == 'r' &&
10399                   name[5] == 's' &&
10400                   name[6] == 't')
10401               {                                   /* ucfirst    */
10402                 return -KEY_ucfirst;
10403               }
10404
10405               goto unknown;
10406
10407             case 'n':
10408               if (name[2] == 's' &&
10409                   name[3] == 'h' &&
10410                   name[4] == 'i' &&
10411                   name[5] == 'f' &&
10412                   name[6] == 't')
10413               {                                   /* unshift    */
10414                 return -KEY_unshift;
10415               }
10416
10417               goto unknown;
10418
10419             default:
10420               goto unknown;
10421           }
10422
10423         case 'w':
10424           if (name[1] == 'a' &&
10425               name[2] == 'i' &&
10426               name[3] == 't' &&
10427               name[4] == 'p' &&
10428               name[5] == 'i' &&
10429               name[6] == 'd')
10430           {                                       /* waitpid    */
10431             return -KEY_waitpid;
10432           }
10433
10434           goto unknown;
10435
10436         default:
10437           goto unknown;
10438       }
10439
10440     case 8: /* 26 tokens of length 8 */
10441       switch (name[0])
10442       {
10443         case 'A':
10444           if (name[1] == 'U' &&
10445               name[2] == 'T' &&
10446               name[3] == 'O' &&
10447               name[4] == 'L' &&
10448               name[5] == 'O' &&
10449               name[6] == 'A' &&
10450               name[7] == 'D')
10451           {                                       /* AUTOLOAD   */
10452             return KEY_AUTOLOAD;
10453           }
10454
10455           goto unknown;
10456
10457         case '_':
10458           if (name[1] == '_')
10459           {
10460             switch (name[2])
10461             {
10462               case 'D':
10463                 if (name[3] == 'A' &&
10464                     name[4] == 'T' &&
10465                     name[5] == 'A' &&
10466                     name[6] == '_' &&
10467                     name[7] == '_')
10468                 {                                 /* __DATA__   */
10469                   return KEY___DATA__;
10470                 }
10471
10472                 goto unknown;
10473
10474               case 'F':
10475                 if (name[3] == 'I' &&
10476                     name[4] == 'L' &&
10477                     name[5] == 'E' &&
10478                     name[6] == '_' &&
10479                     name[7] == '_')
10480                 {                                 /* __FILE__   */
10481                   return -KEY___FILE__;
10482                 }
10483
10484                 goto unknown;
10485
10486               case 'L':
10487                 if (name[3] == 'I' &&
10488                     name[4] == 'N' &&
10489                     name[5] == 'E' &&
10490                     name[6] == '_' &&
10491                     name[7] == '_')
10492                 {                                 /* __LINE__   */
10493                   return -KEY___LINE__;
10494                 }
10495
10496                 goto unknown;
10497
10498               default:
10499                 goto unknown;
10500             }
10501           }
10502
10503           goto unknown;
10504
10505         case 'c':
10506           switch (name[1])
10507           {
10508             case 'l':
10509               if (name[2] == 'o' &&
10510                   name[3] == 's' &&
10511                   name[4] == 'e' &&
10512                   name[5] == 'd' &&
10513                   name[6] == 'i' &&
10514                   name[7] == 'r')
10515               {                                   /* closedir   */
10516                 return -KEY_closedir;
10517               }
10518
10519               goto unknown;
10520
10521             case 'o':
10522               if (name[2] == 'n' &&
10523                   name[3] == 't' &&
10524                   name[4] == 'i' &&
10525                   name[5] == 'n' &&
10526                   name[6] == 'u' &&
10527                   name[7] == 'e')
10528               {                                   /* continue   */
10529                 return -KEY_continue;
10530               }
10531
10532               goto unknown;
10533
10534             default:
10535               goto unknown;
10536           }
10537
10538         case 'd':
10539           if (name[1] == 'b' &&
10540               name[2] == 'm' &&
10541               name[3] == 'c' &&
10542               name[4] == 'l' &&
10543               name[5] == 'o' &&
10544               name[6] == 's' &&
10545               name[7] == 'e')
10546           {                                       /* dbmclose   */
10547             return -KEY_dbmclose;
10548           }
10549
10550           goto unknown;
10551
10552         case 'e':
10553           if (name[1] == 'n' &&
10554               name[2] == 'd')
10555           {
10556             switch (name[3])
10557             {
10558               case 'g':
10559                 if (name[4] == 'r' &&
10560                     name[5] == 'e' &&
10561                     name[6] == 'n' &&
10562                     name[7] == 't')
10563                 {                                 /* endgrent   */
10564                   return -KEY_endgrent;
10565                 }
10566
10567                 goto unknown;
10568
10569               case 'p':
10570                 if (name[4] == 'w' &&
10571                     name[5] == 'e' &&
10572                     name[6] == 'n' &&
10573                     name[7] == 't')
10574                 {                                 /* endpwent   */
10575                   return -KEY_endpwent;
10576                 }
10577
10578                 goto unknown;
10579
10580               default:
10581                 goto unknown;
10582             }
10583           }
10584
10585           goto unknown;
10586
10587         case 'f':
10588           if (name[1] == 'o' &&
10589               name[2] == 'r' &&
10590               name[3] == 'm' &&
10591               name[4] == 'l' &&
10592               name[5] == 'i' &&
10593               name[6] == 'n' &&
10594               name[7] == 'e')
10595           {                                       /* formline   */
10596             return -KEY_formline;
10597           }
10598
10599           goto unknown;
10600
10601         case 'g':
10602           if (name[1] == 'e' &&
10603               name[2] == 't')
10604           {
10605             switch (name[3])
10606             {
10607               case 'g':
10608                 if (name[4] == 'r')
10609                 {
10610                   switch (name[5])
10611                   {
10612                     case 'e':
10613                       if (name[6] == 'n' &&
10614                           name[7] == 't')
10615                       {                           /* getgrent   */
10616                         return -KEY_getgrent;
10617                       }
10618
10619                       goto unknown;
10620
10621                     case 'g':
10622                       if (name[6] == 'i' &&
10623                           name[7] == 'd')
10624                       {                           /* getgrgid   */
10625                         return -KEY_getgrgid;
10626                       }
10627
10628                       goto unknown;
10629
10630                     case 'n':
10631                       if (name[6] == 'a' &&
10632                           name[7] == 'm')
10633                       {                           /* getgrnam   */
10634                         return -KEY_getgrnam;
10635                       }
10636
10637                       goto unknown;
10638
10639                     default:
10640                       goto unknown;
10641                   }
10642                 }
10643
10644                 goto unknown;
10645
10646               case 'l':
10647                 if (name[4] == 'o' &&
10648                     name[5] == 'g' &&
10649                     name[6] == 'i' &&
10650                     name[7] == 'n')
10651                 {                                 /* getlogin   */
10652                   return -KEY_getlogin;
10653                 }
10654
10655                 goto unknown;
10656
10657               case 'p':
10658                 if (name[4] == 'w')
10659                 {
10660                   switch (name[5])
10661                   {
10662                     case 'e':
10663                       if (name[6] == 'n' &&
10664                           name[7] == 't')
10665                       {                           /* getpwent   */
10666                         return -KEY_getpwent;
10667                       }
10668
10669                       goto unknown;
10670
10671                     case 'n':
10672                       if (name[6] == 'a' &&
10673                           name[7] == 'm')
10674                       {                           /* getpwnam   */
10675                         return -KEY_getpwnam;
10676                       }
10677
10678                       goto unknown;
10679
10680                     case 'u':
10681                       if (name[6] == 'i' &&
10682                           name[7] == 'd')
10683                       {                           /* getpwuid   */
10684                         return -KEY_getpwuid;
10685                       }
10686
10687                       goto unknown;
10688
10689                     default:
10690                       goto unknown;
10691                   }
10692                 }
10693
10694                 goto unknown;
10695
10696               default:
10697                 goto unknown;
10698             }
10699           }
10700
10701           goto unknown;
10702
10703         case 'r':
10704           if (name[1] == 'e' &&
10705               name[2] == 'a' &&
10706               name[3] == 'd')
10707           {
10708             switch (name[4])
10709             {
10710               case 'l':
10711                 if (name[5] == 'i' &&
10712                     name[6] == 'n')
10713                 {
10714                   switch (name[7])
10715                   {
10716                     case 'e':
10717                       {                           /* readline   */
10718                         return -KEY_readline;
10719                       }
10720
10721                     case 'k':
10722                       {                           /* readlink   */
10723                         return -KEY_readlink;
10724                       }
10725
10726                     default:
10727                       goto unknown;
10728                   }
10729                 }
10730
10731                 goto unknown;
10732
10733               case 'p':
10734                 if (name[5] == 'i' &&
10735                     name[6] == 'p' &&
10736                     name[7] == 'e')
10737                 {                                 /* readpipe   */
10738                   return -KEY_readpipe;
10739                 }
10740
10741                 goto unknown;
10742
10743               default:
10744                 goto unknown;
10745             }
10746           }
10747
10748           goto unknown;
10749
10750         case 's':
10751           switch (name[1])
10752           {
10753             case 'e':
10754               if (name[2] == 't')
10755               {
10756                 switch (name[3])
10757                 {
10758                   case 'g':
10759                     if (name[4] == 'r' &&
10760                         name[5] == 'e' &&
10761                         name[6] == 'n' &&
10762                         name[7] == 't')
10763                     {                             /* setgrent   */
10764                       return -KEY_setgrent;
10765                     }
10766
10767                     goto unknown;
10768
10769                   case 'p':
10770                     if (name[4] == 'w' &&
10771                         name[5] == 'e' &&
10772                         name[6] == 'n' &&
10773                         name[7] == 't')
10774                     {                             /* setpwent   */
10775                       return -KEY_setpwent;
10776                     }
10777
10778                     goto unknown;
10779
10780                   default:
10781                     goto unknown;
10782                 }
10783               }
10784
10785               goto unknown;
10786
10787             case 'h':
10788               switch (name[2])
10789               {
10790                 case 'm':
10791                   if (name[3] == 'w' &&
10792                       name[4] == 'r' &&
10793                       name[5] == 'i' &&
10794                       name[6] == 't' &&
10795                       name[7] == 'e')
10796                   {                               /* shmwrite   */
10797                     return -KEY_shmwrite;
10798                   }
10799
10800                   goto unknown;
10801
10802                 case 'u':
10803                   if (name[3] == 't' &&
10804                       name[4] == 'd' &&
10805                       name[5] == 'o' &&
10806                       name[6] == 'w' &&
10807                       name[7] == 'n')
10808                   {                               /* shutdown   */
10809                     return -KEY_shutdown;
10810                   }
10811
10812                   goto unknown;
10813
10814                 default:
10815                   goto unknown;
10816               }
10817
10818             case 'y':
10819               if (name[2] == 's' &&
10820                   name[3] == 'w' &&
10821                   name[4] == 'r' &&
10822                   name[5] == 'i' &&
10823                   name[6] == 't' &&
10824                   name[7] == 'e')
10825               {                                   /* syswrite   */
10826                 return -KEY_syswrite;
10827               }
10828
10829               goto unknown;
10830
10831             default:
10832               goto unknown;
10833           }
10834
10835         case 't':
10836           if (name[1] == 'r' &&
10837               name[2] == 'u' &&
10838               name[3] == 'n' &&
10839               name[4] == 'c' &&
10840               name[5] == 'a' &&
10841               name[6] == 't' &&
10842               name[7] == 'e')
10843           {                                       /* truncate   */
10844             return -KEY_truncate;
10845           }
10846
10847           goto unknown;
10848
10849         default:
10850           goto unknown;
10851       }
10852
10853     case 9: /* 9 tokens of length 9 */
10854       switch (name[0])
10855       {
10856         case 'U':
10857           if (name[1] == 'N' &&
10858               name[2] == 'I' &&
10859               name[3] == 'T' &&
10860               name[4] == 'C' &&
10861               name[5] == 'H' &&
10862               name[6] == 'E' &&
10863               name[7] == 'C' &&
10864               name[8] == 'K')
10865           {                                       /* UNITCHECK  */
10866             return KEY_UNITCHECK;
10867           }
10868
10869           goto unknown;
10870
10871         case 'e':
10872           if (name[1] == 'n' &&
10873               name[2] == 'd' &&
10874               name[3] == 'n' &&
10875               name[4] == 'e' &&
10876               name[5] == 't' &&
10877               name[6] == 'e' &&
10878               name[7] == 'n' &&
10879               name[8] == 't')
10880           {                                       /* endnetent  */
10881             return -KEY_endnetent;
10882           }
10883
10884           goto unknown;
10885
10886         case 'g':
10887           if (name[1] == 'e' &&
10888               name[2] == 't' &&
10889               name[3] == 'n' &&
10890               name[4] == 'e' &&
10891               name[5] == 't' &&
10892               name[6] == 'e' &&
10893               name[7] == 'n' &&
10894               name[8] == 't')
10895           {                                       /* getnetent  */
10896             return -KEY_getnetent;
10897           }
10898
10899           goto unknown;
10900
10901         case 'l':
10902           if (name[1] == 'o' &&
10903               name[2] == 'c' &&
10904               name[3] == 'a' &&
10905               name[4] == 'l' &&
10906               name[5] == 't' &&
10907               name[6] == 'i' &&
10908               name[7] == 'm' &&
10909               name[8] == 'e')
10910           {                                       /* localtime  */
10911             return -KEY_localtime;
10912           }
10913
10914           goto unknown;
10915
10916         case 'p':
10917           if (name[1] == 'r' &&
10918               name[2] == 'o' &&
10919               name[3] == 't' &&
10920               name[4] == 'o' &&
10921               name[5] == 't' &&
10922               name[6] == 'y' &&
10923               name[7] == 'p' &&
10924               name[8] == 'e')
10925           {                                       /* prototype  */
10926             return KEY_prototype;
10927           }
10928
10929           goto unknown;
10930
10931         case 'q':
10932           if (name[1] == 'u' &&
10933               name[2] == 'o' &&
10934               name[3] == 't' &&
10935               name[4] == 'e' &&
10936               name[5] == 'm' &&
10937               name[6] == 'e' &&
10938               name[7] == 't' &&
10939               name[8] == 'a')
10940           {                                       /* quotemeta  */
10941             return -KEY_quotemeta;
10942           }
10943
10944           goto unknown;
10945
10946         case 'r':
10947           if (name[1] == 'e' &&
10948               name[2] == 'w' &&
10949               name[3] == 'i' &&
10950               name[4] == 'n' &&
10951               name[5] == 'd' &&
10952               name[6] == 'd' &&
10953               name[7] == 'i' &&
10954               name[8] == 'r')
10955           {                                       /* rewinddir  */
10956             return -KEY_rewinddir;
10957           }
10958
10959           goto unknown;
10960
10961         case 's':
10962           if (name[1] == 'e' &&
10963               name[2] == 't' &&
10964               name[3] == 'n' &&
10965               name[4] == 'e' &&
10966               name[5] == 't' &&
10967               name[6] == 'e' &&
10968               name[7] == 'n' &&
10969               name[8] == 't')
10970           {                                       /* setnetent  */
10971             return -KEY_setnetent;
10972           }
10973
10974           goto unknown;
10975
10976         case 'w':
10977           if (name[1] == 'a' &&
10978               name[2] == 'n' &&
10979               name[3] == 't' &&
10980               name[4] == 'a' &&
10981               name[5] == 'r' &&
10982               name[6] == 'r' &&
10983               name[7] == 'a' &&
10984               name[8] == 'y')
10985           {                                       /* wantarray  */
10986             return -KEY_wantarray;
10987           }
10988
10989           goto unknown;
10990
10991         default:
10992           goto unknown;
10993       }
10994
10995     case 10: /* 9 tokens of length 10 */
10996       switch (name[0])
10997       {
10998         case 'e':
10999           if (name[1] == 'n' &&
11000               name[2] == 'd')
11001           {
11002             switch (name[3])
11003             {
11004               case 'h':
11005                 if (name[4] == 'o' &&
11006                     name[5] == 's' &&
11007                     name[6] == 't' &&
11008                     name[7] == 'e' &&
11009                     name[8] == 'n' &&
11010                     name[9] == 't')
11011                 {                                 /* endhostent */
11012                   return -KEY_endhostent;
11013                 }
11014
11015                 goto unknown;
11016
11017               case 's':
11018                 if (name[4] == 'e' &&
11019                     name[5] == 'r' &&
11020                     name[6] == 'v' &&
11021                     name[7] == 'e' &&
11022                     name[8] == 'n' &&
11023                     name[9] == 't')
11024                 {                                 /* endservent */
11025                   return -KEY_endservent;
11026                 }
11027
11028                 goto unknown;
11029
11030               default:
11031                 goto unknown;
11032             }
11033           }
11034
11035           goto unknown;
11036
11037         case 'g':
11038           if (name[1] == 'e' &&
11039               name[2] == 't')
11040           {
11041             switch (name[3])
11042             {
11043               case 'h':
11044                 if (name[4] == 'o' &&
11045                     name[5] == 's' &&
11046                     name[6] == 't' &&
11047                     name[7] == 'e' &&
11048                     name[8] == 'n' &&
11049                     name[9] == 't')
11050                 {                                 /* gethostent */
11051                   return -KEY_gethostent;
11052                 }
11053
11054                 goto unknown;
11055
11056               case 's':
11057                 switch (name[4])
11058                 {
11059                   case 'e':
11060                     if (name[5] == 'r' &&
11061                         name[6] == 'v' &&
11062                         name[7] == 'e' &&
11063                         name[8] == 'n' &&
11064                         name[9] == 't')
11065                     {                             /* getservent */
11066                       return -KEY_getservent;
11067                     }
11068
11069                     goto unknown;
11070
11071                   case 'o':
11072                     if (name[5] == 'c' &&
11073                         name[6] == 'k' &&
11074                         name[7] == 'o' &&
11075                         name[8] == 'p' &&
11076                         name[9] == 't')
11077                     {                             /* getsockopt */
11078                       return -KEY_getsockopt;
11079                     }
11080
11081                     goto unknown;
11082
11083                   default:
11084                     goto unknown;
11085                 }
11086
11087               default:
11088                 goto unknown;
11089             }
11090           }
11091
11092           goto unknown;
11093
11094         case 's':
11095           switch (name[1])
11096           {
11097             case 'e':
11098               if (name[2] == 't')
11099               {
11100                 switch (name[3])
11101                 {
11102                   case 'h':
11103                     if (name[4] == 'o' &&
11104                         name[5] == 's' &&
11105                         name[6] == 't' &&
11106                         name[7] == 'e' &&
11107                         name[8] == 'n' &&
11108                         name[9] == 't')
11109                     {                             /* sethostent */
11110                       return -KEY_sethostent;
11111                     }
11112
11113                     goto unknown;
11114
11115                   case 's':
11116                     switch (name[4])
11117                     {
11118                       case 'e':
11119                         if (name[5] == 'r' &&
11120                             name[6] == 'v' &&
11121                             name[7] == 'e' &&
11122                             name[8] == 'n' &&
11123                             name[9] == 't')
11124                         {                         /* setservent */
11125                           return -KEY_setservent;
11126                         }
11127
11128                         goto unknown;
11129
11130                       case 'o':
11131                         if (name[5] == 'c' &&
11132                             name[6] == 'k' &&
11133                             name[7] == 'o' &&
11134                             name[8] == 'p' &&
11135                             name[9] == 't')
11136                         {                         /* setsockopt */
11137                           return -KEY_setsockopt;
11138                         }
11139
11140                         goto unknown;
11141
11142                       default:
11143                         goto unknown;
11144                     }
11145
11146                   default:
11147                     goto unknown;
11148                 }
11149               }
11150
11151               goto unknown;
11152
11153             case 'o':
11154               if (name[2] == 'c' &&
11155                   name[3] == 'k' &&
11156                   name[4] == 'e' &&
11157                   name[5] == 't' &&
11158                   name[6] == 'p' &&
11159                   name[7] == 'a' &&
11160                   name[8] == 'i' &&
11161                   name[9] == 'r')
11162               {                                   /* socketpair */
11163                 return -KEY_socketpair;
11164               }
11165
11166               goto unknown;
11167
11168             default:
11169               goto unknown;
11170           }
11171
11172         default:
11173           goto unknown;
11174       }
11175
11176     case 11: /* 8 tokens of length 11 */
11177       switch (name[0])
11178       {
11179         case '_':
11180           if (name[1] == '_' &&
11181               name[2] == 'P' &&
11182               name[3] == 'A' &&
11183               name[4] == 'C' &&
11184               name[5] == 'K' &&
11185               name[6] == 'A' &&
11186               name[7] == 'G' &&
11187               name[8] == 'E' &&
11188               name[9] == '_' &&
11189               name[10] == '_')
11190           {                                       /* __PACKAGE__ */
11191             return -KEY___PACKAGE__;
11192           }
11193
11194           goto unknown;
11195
11196         case 'e':
11197           if (name[1] == 'n' &&
11198               name[2] == 'd' &&
11199               name[3] == 'p' &&
11200               name[4] == 'r' &&
11201               name[5] == 'o' &&
11202               name[6] == 't' &&
11203               name[7] == 'o' &&
11204               name[8] == 'e' &&
11205               name[9] == 'n' &&
11206               name[10] == 't')
11207           {                                       /* endprotoent */
11208             return -KEY_endprotoent;
11209           }
11210
11211           goto unknown;
11212
11213         case 'g':
11214           if (name[1] == 'e' &&
11215               name[2] == 't')
11216           {
11217             switch (name[3])
11218             {
11219               case 'p':
11220                 switch (name[4])
11221                 {
11222                   case 'e':
11223                     if (name[5] == 'e' &&
11224                         name[6] == 'r' &&
11225                         name[7] == 'n' &&
11226                         name[8] == 'a' &&
11227                         name[9] == 'm' &&
11228                         name[10] == 'e')
11229                     {                             /* getpeername */
11230                       return -KEY_getpeername;
11231                     }
11232
11233                     goto unknown;
11234
11235                   case 'r':
11236                     switch (name[5])
11237                     {
11238                       case 'i':
11239                         if (name[6] == 'o' &&
11240                             name[7] == 'r' &&
11241                             name[8] == 'i' &&
11242                             name[9] == 't' &&
11243                             name[10] == 'y')
11244                         {                         /* getpriority */
11245                           return -KEY_getpriority;
11246                         }
11247
11248                         goto unknown;
11249
11250                       case 'o':
11251                         if (name[6] == 't' &&
11252                             name[7] == 'o' &&
11253                             name[8] == 'e' &&
11254                             name[9] == 'n' &&
11255                             name[10] == 't')
11256                         {                         /* getprotoent */
11257                           return -KEY_getprotoent;
11258                         }
11259
11260                         goto unknown;
11261
11262                       default:
11263                         goto unknown;
11264                     }
11265
11266                   default:
11267                     goto unknown;
11268                 }
11269
11270               case 's':
11271                 if (name[4] == 'o' &&
11272                     name[5] == 'c' &&
11273                     name[6] == 'k' &&
11274                     name[7] == 'n' &&
11275                     name[8] == 'a' &&
11276                     name[9] == 'm' &&
11277                     name[10] == 'e')
11278                 {                                 /* getsockname */
11279                   return -KEY_getsockname;
11280                 }
11281
11282                 goto unknown;
11283
11284               default:
11285                 goto unknown;
11286             }
11287           }
11288
11289           goto unknown;
11290
11291         case 's':
11292           if (name[1] == 'e' &&
11293               name[2] == 't' &&
11294               name[3] == 'p' &&
11295               name[4] == 'r')
11296           {
11297             switch (name[5])
11298             {
11299               case 'i':
11300                 if (name[6] == 'o' &&
11301                     name[7] == 'r' &&
11302                     name[8] == 'i' &&
11303                     name[9] == 't' &&
11304                     name[10] == 'y')
11305                 {                                 /* setpriority */
11306                   return -KEY_setpriority;
11307                 }
11308
11309                 goto unknown;
11310
11311               case 'o':
11312                 if (name[6] == 't' &&
11313                     name[7] == 'o' &&
11314                     name[8] == 'e' &&
11315                     name[9] == 'n' &&
11316                     name[10] == 't')
11317                 {                                 /* setprotoent */
11318                   return -KEY_setprotoent;
11319                 }
11320
11321                 goto unknown;
11322
11323               default:
11324                 goto unknown;
11325             }
11326           }
11327
11328           goto unknown;
11329
11330         default:
11331           goto unknown;
11332       }
11333
11334     case 12: /* 2 tokens of length 12 */
11335       if (name[0] == 'g' &&
11336           name[1] == 'e' &&
11337           name[2] == 't' &&
11338           name[3] == 'n' &&
11339           name[4] == 'e' &&
11340           name[5] == 't' &&
11341           name[6] == 'b' &&
11342           name[7] == 'y')
11343       {
11344         switch (name[8])
11345         {
11346           case 'a':
11347             if (name[9] == 'd' &&
11348                 name[10] == 'd' &&
11349                 name[11] == 'r')
11350             {                                     /* getnetbyaddr */
11351               return -KEY_getnetbyaddr;
11352             }
11353
11354             goto unknown;
11355
11356           case 'n':
11357             if (name[9] == 'a' &&
11358                 name[10] == 'm' &&
11359                 name[11] == 'e')
11360             {                                     /* getnetbyname */
11361               return -KEY_getnetbyname;
11362             }
11363
11364             goto unknown;
11365
11366           default:
11367             goto unknown;
11368         }
11369       }
11370
11371       goto unknown;
11372
11373     case 13: /* 4 tokens of length 13 */
11374       if (name[0] == 'g' &&
11375           name[1] == 'e' &&
11376           name[2] == 't')
11377       {
11378         switch (name[3])
11379         {
11380           case 'h':
11381             if (name[4] == 'o' &&
11382                 name[5] == 's' &&
11383                 name[6] == 't' &&
11384                 name[7] == 'b' &&
11385                 name[8] == 'y')
11386             {
11387               switch (name[9])
11388               {
11389                 case 'a':
11390                   if (name[10] == 'd' &&
11391                       name[11] == 'd' &&
11392                       name[12] == 'r')
11393                   {                               /* gethostbyaddr */
11394                     return -KEY_gethostbyaddr;
11395                   }
11396
11397                   goto unknown;
11398
11399                 case 'n':
11400                   if (name[10] == 'a' &&
11401                       name[11] == 'm' &&
11402                       name[12] == 'e')
11403                   {                               /* gethostbyname */
11404                     return -KEY_gethostbyname;
11405                   }
11406
11407                   goto unknown;
11408
11409                 default:
11410                   goto unknown;
11411               }
11412             }
11413
11414             goto unknown;
11415
11416           case 's':
11417             if (name[4] == 'e' &&
11418                 name[5] == 'r' &&
11419                 name[6] == 'v' &&
11420                 name[7] == 'b' &&
11421                 name[8] == 'y')
11422             {
11423               switch (name[9])
11424               {
11425                 case 'n':
11426                   if (name[10] == 'a' &&
11427                       name[11] == 'm' &&
11428                       name[12] == 'e')
11429                   {                               /* getservbyname */
11430                     return -KEY_getservbyname;
11431                   }
11432
11433                   goto unknown;
11434
11435                 case 'p':
11436                   if (name[10] == 'o' &&
11437                       name[11] == 'r' &&
11438                       name[12] == 't')
11439                   {                               /* getservbyport */
11440                     return -KEY_getservbyport;
11441                   }
11442
11443                   goto unknown;
11444
11445                 default:
11446                   goto unknown;
11447               }
11448             }
11449
11450             goto unknown;
11451
11452           default:
11453             goto unknown;
11454         }
11455       }
11456
11457       goto unknown;
11458
11459     case 14: /* 1 tokens of length 14 */
11460       if (name[0] == 'g' &&
11461           name[1] == 'e' &&
11462           name[2] == 't' &&
11463           name[3] == 'p' &&
11464           name[4] == 'r' &&
11465           name[5] == 'o' &&
11466           name[6] == 't' &&
11467           name[7] == 'o' &&
11468           name[8] == 'b' &&
11469           name[9] == 'y' &&
11470           name[10] == 'n' &&
11471           name[11] == 'a' &&
11472           name[12] == 'm' &&
11473           name[13] == 'e')
11474       {                                           /* getprotobyname */
11475         return -KEY_getprotobyname;
11476       }
11477
11478       goto unknown;
11479
11480     case 16: /* 1 tokens of length 16 */
11481       if (name[0] == 'g' &&
11482           name[1] == 'e' &&
11483           name[2] == 't' &&
11484           name[3] == 'p' &&
11485           name[4] == 'r' &&
11486           name[5] == 'o' &&
11487           name[6] == 't' &&
11488           name[7] == 'o' &&
11489           name[8] == 'b' &&
11490           name[9] == 'y' &&
11491           name[10] == 'n' &&
11492           name[11] == 'u' &&
11493           name[12] == 'm' &&
11494           name[13] == 'b' &&
11495           name[14] == 'e' &&
11496           name[15] == 'r')
11497       {                                           /* getprotobynumber */
11498         return -KEY_getprotobynumber;
11499       }
11500
11501       goto unknown;
11502
11503     default:
11504       goto unknown;
11505   }
11506
11507 unknown:
11508   return 0;
11509 }
11510
11511 STATIC void
11512 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11513 {
11514     dVAR;
11515
11516     PERL_ARGS_ASSERT_CHECKCOMMA;
11517
11518     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11519         if (ckWARN(WARN_SYNTAX)) {
11520             int level = 1;
11521             const char *w;
11522             for (w = s+2; *w && level; w++) {
11523                 if (*w == '(')
11524                     ++level;
11525                 else if (*w == ')')
11526                     --level;
11527             }
11528             while (isSPACE(*w))
11529                 ++w;
11530             /* the list of chars below is for end of statements or
11531              * block / parens, boolean operators (&&, ||, //) and branch
11532              * constructs (or, and, if, until, unless, while, err, for).
11533              * Not a very solid hack... */
11534             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11535                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11536                             "%s (...) interpreted as function",name);
11537         }
11538     }
11539     while (s < PL_bufend && isSPACE(*s))
11540         s++;
11541     if (*s == '(')
11542         s++;
11543     while (s < PL_bufend && isSPACE(*s))
11544         s++;
11545     if (isIDFIRST_lazy_if(s,UTF)) {
11546         const char * const w = s++;
11547         while (isALNUM_lazy_if(s,UTF))
11548             s++;
11549         while (s < PL_bufend && isSPACE(*s))
11550             s++;
11551         if (*s == ',') {
11552             GV* gv;
11553             if (keyword(w, s - w, 0))
11554                 return;
11555
11556             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11557             if (gv && GvCVu(gv))
11558                 return;
11559             Perl_croak(aTHX_ "No comma allowed after %s", what);
11560         }
11561     }
11562 }
11563
11564 /* Either returns sv, or mortalizes sv and returns a new SV*.
11565    Best used as sv=new_constant(..., sv, ...).
11566    If s, pv are NULL, calls subroutine with one argument,
11567    and type is used with error messages only. */
11568
11569 STATIC SV *
11570 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11571                SV *sv, SV *pv, const char *type, STRLEN typelen)
11572 {
11573     dVAR; dSP;
11574     HV * const table = GvHV(PL_hintgv);          /* ^H */
11575     SV *res;
11576     SV **cvp;
11577     SV *cv, *typesv;
11578     const char *why1 = "", *why2 = "", *why3 = "";
11579
11580     PERL_ARGS_ASSERT_NEW_CONSTANT;
11581
11582     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11583         SV *msg;
11584
11585         why2 = (const char *)
11586             (strEQ(key,"charnames")
11587              ? "(possibly a missing \"use charnames ...\")"
11588              : "");
11589         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11590                             (type ? type: "undef"), why2);
11591
11592         /* This is convoluted and evil ("goto considered harmful")
11593          * but I do not understand the intricacies of all the different
11594          * failure modes of %^H in here.  The goal here is to make
11595          * the most probable error message user-friendly. --jhi */
11596
11597         goto msgdone;
11598
11599     report:
11600         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11601                             (type ? type: "undef"), why1, why2, why3);
11602     msgdone:
11603         yyerror(SvPVX_const(msg));
11604         SvREFCNT_dec(msg);
11605         return sv;
11606     }
11607
11608     /* charnames doesn't work well if there have been errors found */
11609     if (PL_error_count > 0 && strEQ(key,"charnames"))
11610         return &PL_sv_undef;
11611
11612     cvp = hv_fetch(table, key, keylen, FALSE);
11613     if (!cvp || !SvOK(*cvp)) {
11614         why1 = "$^H{";
11615         why2 = key;
11616         why3 = "} is not defined";
11617         goto report;
11618     }
11619     sv_2mortal(sv);                     /* Parent created it permanently */
11620     cv = *cvp;
11621     if (!pv && s)
11622         pv = newSVpvn_flags(s, len, SVs_TEMP);
11623     if (type && pv)
11624         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11625     else
11626         typesv = &PL_sv_undef;
11627
11628     PUSHSTACKi(PERLSI_OVERLOAD);
11629     ENTER ;
11630     SAVETMPS;
11631
11632     PUSHMARK(SP) ;
11633     EXTEND(sp, 3);
11634     if (pv)
11635         PUSHs(pv);
11636     PUSHs(sv);
11637     if (pv)
11638         PUSHs(typesv);
11639     PUTBACK;
11640     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11641
11642     SPAGAIN ;
11643
11644     /* Check the eval first */
11645     if (!PL_in_eval && SvTRUE(ERRSV)) {
11646         sv_catpvs(ERRSV, "Propagated");
11647         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11648         (void)POPs;
11649         res = SvREFCNT_inc_simple(sv);
11650     }
11651     else {
11652         res = POPs;
11653         SvREFCNT_inc_simple_void(res);
11654     }
11655
11656     PUTBACK ;
11657     FREETMPS ;
11658     LEAVE ;
11659     POPSTACK;
11660
11661     if (!SvOK(res)) {
11662         why1 = "Call to &{$^H{";
11663         why2 = key;
11664         why3 = "}} did not return a defined value";
11665         sv = res;
11666         goto report;
11667     }
11668
11669     return res;
11670 }
11671
11672 /* Returns a NUL terminated string, with the length of the string written to
11673    *slp
11674    */
11675 STATIC char *
11676 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11677 {
11678     dVAR;
11679     register char *d = dest;
11680     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11681
11682     PERL_ARGS_ASSERT_SCAN_WORD;
11683
11684     for (;;) {
11685         if (d >= e)
11686             Perl_croak(aTHX_ ident_too_long);
11687         if (isALNUM(*s))        /* UTF handled below */
11688             *d++ = *s++;
11689         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11690             *d++ = ':';
11691             *d++ = ':';
11692             s++;
11693         }
11694         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11695             *d++ = *s++;
11696             *d++ = *s++;
11697         }
11698         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11699             char *t = s + UTF8SKIP(s);
11700             size_t len;
11701             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11702                 t += UTF8SKIP(t);
11703             len = t - s;
11704             if (d + len > e)
11705                 Perl_croak(aTHX_ ident_too_long);
11706             Copy(s, d, len, char);
11707             d += len;
11708             s = t;
11709         }
11710         else {
11711             *d = '\0';
11712             *slp = d - dest;
11713             return s;
11714         }
11715     }
11716 }
11717
11718 STATIC char *
11719 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11720 {
11721     dVAR;
11722     char *bracket = NULL;
11723     char funny = *s++;
11724     register char *d = dest;
11725     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
11726
11727     PERL_ARGS_ASSERT_SCAN_IDENT;
11728
11729     if (isSPACE(*s))
11730         s = PEEKSPACE(s);
11731     if (isDIGIT(*s)) {
11732         while (isDIGIT(*s)) {
11733             if (d >= e)
11734                 Perl_croak(aTHX_ ident_too_long);
11735             *d++ = *s++;
11736         }
11737     }
11738     else {
11739         for (;;) {
11740             if (d >= e)
11741                 Perl_croak(aTHX_ ident_too_long);
11742             if (isALNUM(*s))    /* UTF handled below */
11743                 *d++ = *s++;
11744             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11745                 *d++ = ':';
11746                 *d++ = ':';
11747                 s++;
11748             }
11749             else if (*s == ':' && s[1] == ':') {
11750                 *d++ = *s++;
11751                 *d++ = *s++;
11752             }
11753             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11754                 char *t = s + UTF8SKIP(s);
11755                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11756                     t += UTF8SKIP(t);
11757                 if (d + (t - s) > e)
11758                     Perl_croak(aTHX_ ident_too_long);
11759                 Copy(s, d, t - s, char);
11760                 d += t - s;
11761                 s = t;
11762             }
11763             else
11764                 break;
11765         }
11766     }
11767     *d = '\0';
11768     d = dest;
11769     if (*d) {
11770         if (PL_lex_state != LEX_NORMAL)
11771             PL_lex_state = LEX_INTERPENDMAYBE;
11772         return s;
11773     }
11774     if (*s == '$' && s[1] &&
11775         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11776     {
11777         return s;
11778     }
11779     if (*s == '{') {
11780         bracket = s;
11781         s++;
11782     }
11783     else if (ck_uni)
11784         check_uni();
11785     if (s < send)
11786         *d = *s++;
11787     d[1] = '\0';
11788     if (*d == '^' && *s && isCONTROLVAR(*s)) {
11789         *d = toCTRL(*s);
11790         s++;
11791     }
11792     if (bracket) {
11793         if (isSPACE(s[-1])) {
11794             while (s < send) {
11795                 const char ch = *s++;
11796                 if (!SPACE_OR_TAB(ch)) {
11797                     *d = ch;
11798                     break;
11799                 }
11800             }
11801         }
11802         if (isIDFIRST_lazy_if(d,UTF)) {
11803             d++;
11804             if (UTF) {
11805                 char *end = s;
11806                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11807                     end += UTF8SKIP(end);
11808                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11809                         end += UTF8SKIP(end);
11810                 }
11811                 Copy(s, d, end - s, char);
11812                 d += end - s;
11813                 s = end;
11814             }
11815             else {
11816                 while ((isALNUM(*s) || *s == ':') && d < e)
11817                     *d++ = *s++;
11818                 if (d >= e)
11819                     Perl_croak(aTHX_ ident_too_long);
11820             }
11821             *d = '\0';
11822             while (s < send && SPACE_OR_TAB(*s))
11823                 s++;
11824             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11825                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11826                     const char * const brack =
11827                         (const char *)
11828                         ((*s == '[') ? "[...]" : "{...}");
11829                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11830                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11831                         funny, dest, brack, funny, dest, brack);
11832                 }
11833                 bracket++;
11834                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11835                 return s;
11836             }
11837         }
11838         /* Handle extended ${^Foo} variables
11839          * 1999-02-27 mjd-perl-patch@plover.com */
11840         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11841                  && isALNUM(*s))
11842         {
11843             d++;
11844             while (isALNUM(*s) && d < e) {
11845                 *d++ = *s++;
11846             }
11847             if (d >= e)
11848                 Perl_croak(aTHX_ ident_too_long);
11849             *d = '\0';
11850         }
11851         if (*s == '}') {
11852             s++;
11853             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11854                 PL_lex_state = LEX_INTERPEND;
11855                 PL_expect = XREF;
11856             }
11857             if (PL_lex_state == LEX_NORMAL) {
11858                 if (ckWARN(WARN_AMBIGUOUS) &&
11859                     (keyword(dest, d - dest, 0)
11860                      || get_cvn_flags(dest, d - dest, 0)))
11861                 {
11862                     if (funny == '#')
11863                         funny = '@';
11864                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11865                         "Ambiguous use of %c{%s} resolved to %c%s",
11866                         funny, dest, funny, dest);
11867                 }
11868             }
11869         }
11870         else {
11871             s = bracket;                /* let the parser handle it */
11872             *dest = '\0';
11873         }
11874     }
11875     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11876         PL_lex_state = LEX_INTERPEND;
11877     return s;
11878 }
11879
11880 static U32
11881 S_pmflag(U32 pmfl, const char ch) {
11882     switch (ch) {
11883         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11884     case GLOBAL_PAT_MOD:      pmfl |= PMf_GLOBAL; break;
11885     case CONTINUE_PAT_MOD:    pmfl |= PMf_CONTINUE; break;
11886     case ONCE_PAT_MOD:        pmfl |= PMf_KEEP; break;
11887     case KEEPCOPY_PAT_MOD:    pmfl |= PMf_KEEPCOPY; break;
11888     case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
11889     }
11890     return pmfl;
11891 }
11892
11893 STATIC char *
11894 S_scan_pat(pTHX_ char *start, I32 type)
11895 {
11896     dVAR;
11897     PMOP *pm;
11898     char *s = scan_str(start,!!PL_madskills,FALSE);
11899     const char * const valid_flags =
11900         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11901 #ifdef PERL_MAD
11902     char *modstart;
11903 #endif
11904
11905     PERL_ARGS_ASSERT_SCAN_PAT;
11906
11907     if (!s) {
11908         const char * const delimiter = skipspace(start);
11909         Perl_croak(aTHX_
11910                    (const char *)
11911                    (*delimiter == '?'
11912                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11913                     : "Search pattern not terminated" ));
11914     }
11915
11916     pm = (PMOP*)newPMOP(type, 0);
11917     if (PL_multi_open == '?') {
11918         /* This is the only point in the code that sets PMf_ONCE:  */
11919         pm->op_pmflags |= PMf_ONCE;
11920
11921         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11922            allows us to restrict the list needed by reset to just the ??
11923            matches.  */
11924         assert(type != OP_TRANS);
11925         if (PL_curstash) {
11926             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11927             U32 elements;
11928             if (!mg) {
11929                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11930                                  0);
11931             }
11932             elements = mg->mg_len / sizeof(PMOP**);
11933             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11934             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11935             mg->mg_len = elements * sizeof(PMOP**);
11936             PmopSTASH_set(pm,PL_curstash);
11937         }
11938     }
11939 #ifdef PERL_MAD
11940     modstart = s;
11941 #endif
11942     while (*s && strchr(valid_flags, *s))
11943         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11944
11945     if (isALNUM(*s)) {
11946         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11947             "Having no space between pattern and following word is deprecated");
11948
11949     }
11950 #ifdef PERL_MAD
11951     if (PL_madskills && modstart != s) {
11952         SV* tmptoken = newSVpvn(modstart, s - modstart);
11953         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11954     }
11955 #endif
11956     /* issue a warning if /c is specified,but /g is not */
11957     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11958     {
11959         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11960                        "Use of /c modifier is meaningless without /g" );
11961     }
11962
11963     PL_lex_op = (OP*)pm;
11964     pl_yylval.ival = OP_MATCH;
11965     return s;
11966 }
11967
11968 STATIC char *
11969 S_scan_subst(pTHX_ char *start)
11970 {
11971     dVAR;
11972     register char *s;
11973     register PMOP *pm;
11974     I32 first_start;
11975     I32 es = 0;
11976 #ifdef PERL_MAD
11977     char *modstart;
11978 #endif
11979
11980     PERL_ARGS_ASSERT_SCAN_SUBST;
11981
11982     pl_yylval.ival = OP_NULL;
11983
11984     s = scan_str(start,!!PL_madskills,FALSE);
11985
11986     if (!s)
11987         Perl_croak(aTHX_ "Substitution pattern not terminated");
11988
11989     if (s[-1] == PL_multi_open)
11990         s--;
11991 #ifdef PERL_MAD
11992     if (PL_madskills) {
11993         CURMAD('q', PL_thisopen);
11994         CURMAD('_', PL_thiswhite);
11995         CURMAD('E', PL_thisstuff);
11996         CURMAD('Q', PL_thisclose);
11997         PL_realtokenstart = s - SvPVX(PL_linestr);
11998     }
11999 #endif
12000
12001     first_start = PL_multi_start;
12002     s = scan_str(s,!!PL_madskills,FALSE);
12003     if (!s) {
12004         if (PL_lex_stuff) {
12005             SvREFCNT_dec(PL_lex_stuff);
12006             PL_lex_stuff = NULL;
12007         }
12008         Perl_croak(aTHX_ "Substitution replacement not terminated");
12009     }
12010     PL_multi_start = first_start;       /* so whole substitution is taken together */
12011
12012     pm = (PMOP*)newPMOP(OP_SUBST, 0);
12013
12014 #ifdef PERL_MAD
12015     if (PL_madskills) {
12016         CURMAD('z', PL_thisopen);
12017         CURMAD('R', PL_thisstuff);
12018         CURMAD('Z', PL_thisclose);
12019     }
12020     modstart = s;
12021 #endif
12022
12023     while (*s) {
12024         if (*s == EXEC_PAT_MOD) {
12025             s++;
12026             es++;
12027         }
12028         else if (strchr(S_PAT_MODS, *s))
12029             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
12030         else {
12031             if (isALNUM(*s)) {
12032                 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12033                     "Having no space between pattern and following word is deprecated");
12034
12035             }
12036             break;
12037         }
12038     }
12039
12040 #ifdef PERL_MAD
12041     if (PL_madskills) {
12042         if (modstart != s)
12043             curmad('m', newSVpvn(modstart, s - modstart));
12044         append_madprops(PL_thismad, (OP*)pm, 0);
12045         PL_thismad = 0;
12046     }
12047 #endif
12048     if ((pm->op_pmflags & PMf_CONTINUE)) {
12049         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
12050     }
12051
12052     if (es) {
12053         SV * const repl = newSVpvs("");
12054
12055         PL_sublex_info.super_bufptr = s;
12056         PL_sublex_info.super_bufend = PL_bufend;
12057         PL_multi_end = 0;
12058         pm->op_pmflags |= PMf_EVAL;
12059         while (es-- > 0) {
12060             if (es)
12061                 sv_catpvs(repl, "eval ");
12062             else
12063                 sv_catpvs(repl, "do ");
12064         }
12065         sv_catpvs(repl, "{");
12066         sv_catsv(repl, PL_lex_repl);
12067         if (strchr(SvPVX(PL_lex_repl), '#'))
12068             sv_catpvs(repl, "\n");
12069         sv_catpvs(repl, "}");
12070         SvEVALED_on(repl);
12071         SvREFCNT_dec(PL_lex_repl);
12072         PL_lex_repl = repl;
12073     }
12074
12075     PL_lex_op = (OP*)pm;
12076     pl_yylval.ival = OP_SUBST;
12077     return s;
12078 }
12079
12080 STATIC char *
12081 S_scan_trans(pTHX_ char *start)
12082 {
12083     dVAR;
12084     register char* s;
12085     OP *o;
12086     short *tbl;
12087     U8 squash;
12088     U8 del;
12089     U8 complement;
12090 #ifdef PERL_MAD
12091     char *modstart;
12092 #endif
12093
12094     PERL_ARGS_ASSERT_SCAN_TRANS;
12095
12096     pl_yylval.ival = OP_NULL;
12097
12098     s = scan_str(start,!!PL_madskills,FALSE);
12099     if (!s)
12100         Perl_croak(aTHX_ "Transliteration pattern not terminated");
12101
12102     if (s[-1] == PL_multi_open)
12103         s--;
12104 #ifdef PERL_MAD
12105     if (PL_madskills) {
12106         CURMAD('q', PL_thisopen);
12107         CURMAD('_', PL_thiswhite);
12108         CURMAD('E', PL_thisstuff);
12109         CURMAD('Q', PL_thisclose);
12110         PL_realtokenstart = s - SvPVX(PL_linestr);
12111     }
12112 #endif
12113
12114     s = scan_str(s,!!PL_madskills,FALSE);
12115     if (!s) {
12116         if (PL_lex_stuff) {
12117             SvREFCNT_dec(PL_lex_stuff);
12118             PL_lex_stuff = NULL;
12119         }
12120         Perl_croak(aTHX_ "Transliteration replacement not terminated");
12121     }
12122     if (PL_madskills) {
12123         CURMAD('z', PL_thisopen);
12124         CURMAD('R', PL_thisstuff);
12125         CURMAD('Z', PL_thisclose);
12126     }
12127
12128     complement = del = squash = 0;
12129 #ifdef PERL_MAD
12130     modstart = s;
12131 #endif
12132     while (1) {
12133         switch (*s) {
12134         case 'c':
12135             complement = OPpTRANS_COMPLEMENT;
12136             break;
12137         case 'd':
12138             del = OPpTRANS_DELETE;
12139             break;
12140         case 's':
12141             squash = OPpTRANS_SQUASH;
12142             break;
12143         default:
12144             goto no_more;
12145         }
12146         s++;
12147     }
12148   no_more:
12149
12150     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
12151     o = newPVOP(OP_TRANS, 0, (char*)tbl);
12152     o->op_private &= ~OPpTRANS_ALL;
12153     o->op_private |= del|squash|complement|
12154       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12155       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
12156
12157     PL_lex_op = o;
12158     pl_yylval.ival = OP_TRANS;
12159
12160 #ifdef PERL_MAD
12161     if (PL_madskills) {
12162         if (modstart != s)
12163             curmad('m', newSVpvn(modstart, s - modstart));
12164         append_madprops(PL_thismad, o, 0);
12165         PL_thismad = 0;
12166     }
12167 #endif
12168
12169     return s;
12170 }
12171
12172 STATIC char *
12173 S_scan_heredoc(pTHX_ register char *s)
12174 {
12175     dVAR;
12176     SV *herewas;
12177     I32 op_type = OP_SCALAR;
12178     I32 len;
12179     SV *tmpstr;
12180     char term;
12181     const char *found_newline;
12182     register char *d;
12183     register char *e;
12184     char *peek;
12185     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
12186 #ifdef PERL_MAD
12187     I32 stuffstart = s - SvPVX(PL_linestr);
12188     char *tstart;
12189
12190     PL_realtokenstart = -1;
12191 #endif
12192
12193     PERL_ARGS_ASSERT_SCAN_HEREDOC;
12194
12195     s += 2;
12196     d = PL_tokenbuf;
12197     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
12198     if (!outer)
12199         *d++ = '\n';
12200     peek = s;
12201     while (SPACE_OR_TAB(*peek))
12202         peek++;
12203     if (*peek == '`' || *peek == '\'' || *peek =='"') {
12204         s = peek;
12205         term = *s++;
12206         s = delimcpy(d, e, s, PL_bufend, term, &len);
12207         d += len;
12208         if (s < PL_bufend)
12209             s++;
12210     }
12211     else {
12212         if (*s == '\\')
12213             s++, term = '\'';
12214         else
12215             term = '"';
12216         if (!isALNUM_lazy_if(s,UTF))
12217             deprecate("bare << to mean <<\"\"");
12218         for (; isALNUM_lazy_if(s,UTF); s++) {
12219             if (d < e)
12220                 *d++ = *s;
12221         }
12222     }
12223     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
12224         Perl_croak(aTHX_ "Delimiter for here document is too long");
12225     *d++ = '\n';
12226     *d = '\0';
12227     len = d - PL_tokenbuf;
12228
12229 #ifdef PERL_MAD
12230     if (PL_madskills) {
12231         tstart = PL_tokenbuf + !outer;
12232         PL_thisclose = newSVpvn(tstart, len - !outer);
12233         tstart = SvPVX(PL_linestr) + stuffstart;
12234         PL_thisopen = newSVpvn(tstart, s - tstart);
12235         stuffstart = s - SvPVX(PL_linestr);
12236     }
12237 #endif
12238 #ifndef PERL_STRICT_CR
12239     d = strchr(s, '\r');
12240     if (d) {
12241         char * const olds = s;
12242         s = d;
12243         while (s < PL_bufend) {
12244             if (*s == '\r') {
12245                 *d++ = '\n';
12246                 if (*++s == '\n')
12247                     s++;
12248             }
12249             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
12250                 *d++ = *s++;
12251                 s++;
12252             }
12253             else
12254                 *d++ = *s++;
12255         }
12256         *d = '\0';
12257         PL_bufend = d;
12258         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12259         s = olds;
12260     }
12261 #endif
12262 #ifdef PERL_MAD
12263     found_newline = 0;
12264 #endif
12265     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
12266         herewas = newSVpvn(s,PL_bufend-s);
12267     }
12268     else {
12269 #ifdef PERL_MAD
12270         herewas = newSVpvn(s-1,found_newline-s+1);
12271 #else
12272         s--;
12273         herewas = newSVpvn(s,found_newline-s);
12274 #endif
12275     }
12276 #ifdef PERL_MAD
12277     if (PL_madskills) {
12278         tstart = SvPVX(PL_linestr) + stuffstart;
12279         if (PL_thisstuff)
12280             sv_catpvn(PL_thisstuff, tstart, s - tstart);
12281         else
12282             PL_thisstuff = newSVpvn(tstart, s - tstart);
12283     }
12284 #endif
12285     s += SvCUR(herewas);
12286
12287 #ifdef PERL_MAD
12288     stuffstart = s - SvPVX(PL_linestr);
12289
12290     if (found_newline)
12291         s--;
12292 #endif
12293
12294     tmpstr = newSV_type(SVt_PVIV);
12295     SvGROW(tmpstr, 80);
12296     if (term == '\'') {
12297         op_type = OP_CONST;
12298         SvIV_set(tmpstr, -1);
12299     }
12300     else if (term == '`') {
12301         op_type = OP_BACKTICK;
12302         SvIV_set(tmpstr, '\\');
12303     }
12304
12305     CLINE;
12306     PL_multi_start = CopLINE(PL_curcop);
12307     PL_multi_open = PL_multi_close = '<';
12308     term = *PL_tokenbuf;
12309     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
12310         char * const bufptr = PL_sublex_info.super_bufptr;
12311         char * const bufend = PL_sublex_info.super_bufend;
12312         char * const olds = s - SvCUR(herewas);
12313         s = strchr(bufptr, '\n');
12314         if (!s)
12315             s = bufend;
12316         d = s;
12317         while (s < bufend &&
12318           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12319             if (*s++ == '\n')
12320                 CopLINE_inc(PL_curcop);
12321         }
12322         if (s >= bufend) {
12323             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12324             missingterm(PL_tokenbuf);
12325         }
12326         sv_setpvn(herewas,bufptr,d-bufptr+1);
12327         sv_setpvn(tmpstr,d+1,s-d);
12328         s += len - 1;
12329         sv_catpvn(herewas,s,bufend-s);
12330         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
12331
12332         s = olds;
12333         goto retval;
12334     }
12335     else if (!outer) {
12336         d = s;
12337         while (s < PL_bufend &&
12338           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12339             if (*s++ == '\n')
12340                 CopLINE_inc(PL_curcop);
12341         }
12342         if (s >= PL_bufend) {
12343             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12344             missingterm(PL_tokenbuf);
12345         }
12346         sv_setpvn(tmpstr,d+1,s-d);
12347 #ifdef PERL_MAD
12348         if (PL_madskills) {
12349             if (PL_thisstuff)
12350                 sv_catpvn(PL_thisstuff, d + 1, s - d);
12351             else
12352                 PL_thisstuff = newSVpvn(d + 1, s - d);
12353             stuffstart = s - SvPVX(PL_linestr);
12354         }
12355 #endif
12356         s += len - 1;
12357         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12358
12359         sv_catpvn(herewas,s,PL_bufend-s);
12360         sv_setsv(PL_linestr,herewas);
12361         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12362         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12363         PL_last_lop = PL_last_uni = NULL;
12364     }
12365     else
12366         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12367     while (s >= PL_bufend) {    /* multiple line string? */
12368 #ifdef PERL_MAD
12369         if (PL_madskills) {
12370             tstart = SvPVX(PL_linestr) + stuffstart;
12371             if (PL_thisstuff)
12372                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12373             else
12374                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12375         }
12376 #endif
12377         PL_bufptr = s;
12378         CopLINE_inc(PL_curcop);
12379         if (!outer || !lex_next_chunk(0)) {
12380             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12381             missingterm(PL_tokenbuf);
12382         }
12383         CopLINE_dec(PL_curcop);
12384         s = PL_bufptr;
12385 #ifdef PERL_MAD
12386         stuffstart = s - SvPVX(PL_linestr);
12387 #endif
12388         CopLINE_inc(PL_curcop);
12389         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12390         PL_last_lop = PL_last_uni = NULL;
12391 #ifndef PERL_STRICT_CR
12392         if (PL_bufend - PL_linestart >= 2) {
12393             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12394                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12395             {
12396                 PL_bufend[-2] = '\n';
12397                 PL_bufend--;
12398                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12399             }
12400             else if (PL_bufend[-1] == '\r')
12401                 PL_bufend[-1] = '\n';
12402         }
12403         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12404             PL_bufend[-1] = '\n';
12405 #endif
12406         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12407             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12408             *(SvPVX(PL_linestr) + off ) = ' ';
12409             sv_catsv(PL_linestr,herewas);
12410             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12411             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12412         }
12413         else {
12414             s = PL_bufend;
12415             sv_catsv(tmpstr,PL_linestr);
12416         }
12417     }
12418     s++;
12419 retval:
12420     PL_multi_end = CopLINE(PL_curcop);
12421     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12422         SvPV_shrink_to_cur(tmpstr);
12423     }
12424     SvREFCNT_dec(herewas);
12425     if (!IN_BYTES) {
12426         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12427             SvUTF8_on(tmpstr);
12428         else if (PL_encoding)
12429             sv_recode_to_utf8(tmpstr, PL_encoding);
12430     }
12431     PL_lex_stuff = tmpstr;
12432     pl_yylval.ival = op_type;
12433     return s;
12434 }
12435
12436 /* scan_inputsymbol
12437    takes: current position in input buffer
12438    returns: new position in input buffer
12439    side-effects: pl_yylval and lex_op are set.
12440
12441    This code handles:
12442
12443    <>           read from ARGV
12444    <FH>         read from filehandle
12445    <pkg::FH>    read from package qualified filehandle
12446    <pkg'FH>     read from package qualified filehandle
12447    <$fh>        read from filehandle in $fh
12448    <*.h>        filename glob
12449
12450 */
12451
12452 STATIC char *
12453 S_scan_inputsymbol(pTHX_ char *start)
12454 {
12455     dVAR;
12456     register char *s = start;           /* current position in buffer */
12457     char *end;
12458     I32 len;
12459     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12460     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12461
12462     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12463
12464     end = strchr(s, '\n');
12465     if (!end)
12466         end = PL_bufend;
12467     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12468
12469     /* die if we didn't have space for the contents of the <>,
12470        or if it didn't end, or if we see a newline
12471     */
12472
12473     if (len >= (I32)sizeof PL_tokenbuf)
12474         Perl_croak(aTHX_ "Excessively long <> operator");
12475     if (s >= end)
12476         Perl_croak(aTHX_ "Unterminated <> operator");
12477
12478     s++;
12479
12480     /* check for <$fh>
12481        Remember, only scalar variables are interpreted as filehandles by
12482        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12483        treated as a glob() call.
12484        This code makes use of the fact that except for the $ at the front,
12485        a scalar variable and a filehandle look the same.
12486     */
12487     if (*d == '$' && d[1]) d++;
12488
12489     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12490     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12491         d++;
12492
12493     /* If we've tried to read what we allow filehandles to look like, and
12494        there's still text left, then it must be a glob() and not a getline.
12495        Use scan_str to pull out the stuff between the <> and treat it
12496        as nothing more than a string.
12497     */
12498
12499     if (d - PL_tokenbuf != len) {
12500         pl_yylval.ival = OP_GLOB;
12501         s = scan_str(start,!!PL_madskills,FALSE);
12502         if (!s)
12503            Perl_croak(aTHX_ "Glob not terminated");
12504         return s;
12505     }
12506     else {
12507         bool readline_overriden = FALSE;
12508         GV *gv_readline;
12509         GV **gvp;
12510         /* we're in a filehandle read situation */
12511         d = PL_tokenbuf;
12512
12513         /* turn <> into <ARGV> */
12514         if (!len)
12515             Copy("ARGV",d,5,char);
12516
12517         /* Check whether readline() is overriden */
12518         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12519         if ((gv_readline
12520                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12521                 ||
12522                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12523                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12524                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12525             readline_overriden = TRUE;
12526
12527         /* if <$fh>, create the ops to turn the variable into a
12528            filehandle
12529         */
12530         if (*d == '$') {
12531             /* try to find it in the pad for this block, otherwise find
12532                add symbol table ops
12533             */
12534             const PADOFFSET tmp = pad_findmy(d, len, 0);
12535             if (tmp != NOT_IN_PAD) {
12536                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12537                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12538                     HEK * const stashname = HvNAME_HEK(stash);
12539                     SV * const sym = sv_2mortal(newSVhek(stashname));
12540                     sv_catpvs(sym, "::");
12541                     sv_catpv(sym, d+1);
12542                     d = SvPVX(sym);
12543                     goto intro_sym;
12544                 }
12545                 else {
12546                     OP * const o = newOP(OP_PADSV, 0);
12547                     o->op_targ = tmp;
12548                     PL_lex_op = readline_overriden
12549                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12550                                 append_elem(OP_LIST, o,
12551                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12552                         : (OP*)newUNOP(OP_READLINE, 0, o);
12553                 }
12554             }
12555             else {
12556                 GV *gv;
12557                 ++d;
12558 intro_sym:
12559                 gv = gv_fetchpv(d,
12560                                 (PL_in_eval
12561                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12562                                  : GV_ADDMULTI),
12563                                 SVt_PV);
12564                 PL_lex_op = readline_overriden
12565                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12566                             append_elem(OP_LIST,
12567                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12568                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12569                     : (OP*)newUNOP(OP_READLINE, 0,
12570                             newUNOP(OP_RV2SV, 0,
12571                                 newGVOP(OP_GV, 0, gv)));
12572             }
12573             if (!readline_overriden)
12574                 PL_lex_op->op_flags |= OPf_SPECIAL;
12575             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12576             pl_yylval.ival = OP_NULL;
12577         }
12578
12579         /* If it's none of the above, it must be a literal filehandle
12580            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12581         else {
12582             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12583             PL_lex_op = readline_overriden
12584                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12585                         append_elem(OP_LIST,
12586                             newGVOP(OP_GV, 0, gv),
12587                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12588                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12589             pl_yylval.ival = OP_NULL;
12590         }
12591     }
12592
12593     return s;
12594 }
12595
12596
12597 /* scan_str
12598    takes: start position in buffer
12599           keep_quoted preserve \ on the embedded delimiter(s)
12600           keep_delims preserve the delimiters around the string
12601    returns: position to continue reading from buffer
12602    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12603         updates the read buffer.
12604
12605    This subroutine pulls a string out of the input.  It is called for:
12606         q               single quotes           q(literal text)
12607         '               single quotes           'literal text'
12608         qq              double quotes           qq(interpolate $here please)
12609         "               double quotes           "interpolate $here please"
12610         qx              backticks               qx(/bin/ls -l)
12611         `               backticks               `/bin/ls -l`
12612         qw              quote words             @EXPORT_OK = qw( func() $spam )
12613         m//             regexp match            m/this/
12614         s///            regexp substitute       s/this/that/
12615         tr///           string transliterate    tr/this/that/
12616         y///            string transliterate    y/this/that/
12617         ($*@)           sub prototypes          sub foo ($)
12618         (stuff)         sub attr parameters     sub foo : attr(stuff)
12619         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12620
12621    In most of these cases (all but <>, patterns and transliterate)
12622    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12623    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12624    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12625    calls scan_str().
12626
12627    It skips whitespace before the string starts, and treats the first
12628    character as the delimiter.  If the delimiter is one of ([{< then
12629    the corresponding "close" character )]}> is used as the closing
12630    delimiter.  It allows quoting of delimiters, and if the string has
12631    balanced delimiters ([{<>}]) it allows nesting.
12632
12633    On success, the SV with the resulting string is put into lex_stuff or,
12634    if that is already non-NULL, into lex_repl. The second case occurs only
12635    when parsing the RHS of the special constructs s/// and tr/// (y///).
12636    For convenience, the terminating delimiter character is stuffed into
12637    SvIVX of the SV.
12638 */
12639
12640 STATIC char *
12641 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12642 {
12643     dVAR;
12644     SV *sv;                             /* scalar value: string */
12645     const char *tmps;                   /* temp string, used for delimiter matching */
12646     register char *s = start;           /* current position in the buffer */
12647     register char term;                 /* terminating character */
12648     register char *to;                  /* current position in the sv's data */
12649     I32 brackets = 1;                   /* bracket nesting level */
12650     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12651     I32 termcode;                       /* terminating char. code */
12652     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12653     STRLEN termlen;                     /* length of terminating string */
12654     int last_off = 0;                   /* last position for nesting bracket */
12655 #ifdef PERL_MAD
12656     int stuffstart;
12657     char *tstart;
12658 #endif
12659
12660     PERL_ARGS_ASSERT_SCAN_STR;
12661
12662     /* skip space before the delimiter */
12663     if (isSPACE(*s)) {
12664         s = PEEKSPACE(s);
12665     }
12666
12667 #ifdef PERL_MAD
12668     if (PL_realtokenstart >= 0) {
12669         stuffstart = PL_realtokenstart;
12670         PL_realtokenstart = -1;
12671     }
12672     else
12673         stuffstart = start - SvPVX(PL_linestr);
12674 #endif
12675     /* mark where we are, in case we need to report errors */
12676     CLINE;
12677
12678     /* after skipping whitespace, the next character is the terminator */
12679     term = *s;
12680     if (!UTF) {
12681         termcode = termstr[0] = term;
12682         termlen = 1;
12683     }
12684     else {
12685         termcode = utf8_to_uvchr((U8*)s, &termlen);
12686         Copy(s, termstr, termlen, U8);
12687         if (!UTF8_IS_INVARIANT(term))
12688             has_utf8 = TRUE;
12689     }
12690
12691     /* mark where we are */
12692     PL_multi_start = CopLINE(PL_curcop);
12693     PL_multi_open = term;
12694
12695     /* find corresponding closing delimiter */
12696     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12697         termcode = termstr[0] = term = tmps[5];
12698
12699     PL_multi_close = term;
12700
12701     /* create a new SV to hold the contents.  79 is the SV's initial length.
12702        What a random number. */
12703     sv = newSV_type(SVt_PVIV);
12704     SvGROW(sv, 80);
12705     SvIV_set(sv, termcode);
12706     (void)SvPOK_only(sv);               /* validate pointer */
12707
12708     /* move past delimiter and try to read a complete string */
12709     if (keep_delims)
12710         sv_catpvn(sv, s, termlen);
12711     s += termlen;
12712 #ifdef PERL_MAD
12713     tstart = SvPVX(PL_linestr) + stuffstart;
12714     if (!PL_thisopen && !keep_delims) {
12715         PL_thisopen = newSVpvn(tstart, s - tstart);
12716         stuffstart = s - SvPVX(PL_linestr);
12717     }
12718 #endif
12719     for (;;) {
12720         if (PL_encoding && !UTF) {
12721             bool cont = TRUE;
12722
12723             while (cont) {
12724                 int offset = s - SvPVX_const(PL_linestr);
12725                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12726                                            &offset, (char*)termstr, termlen);
12727                 const char * const ns = SvPVX_const(PL_linestr) + offset;
12728                 char * const svlast = SvEND(sv) - 1;
12729
12730                 for (; s < ns; s++) {
12731                     if (*s == '\n' && !PL_rsfp)
12732                         CopLINE_inc(PL_curcop);
12733                 }
12734                 if (!found)
12735                     goto read_more_line;
12736                 else {
12737                     /* handle quoted delimiters */
12738                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12739                         const char *t;
12740                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12741                             t--;
12742                         if ((svlast-1 - t) % 2) {
12743                             if (!keep_quoted) {
12744                                 *(svlast-1) = term;
12745                                 *svlast = '\0';
12746                                 SvCUR_set(sv, SvCUR(sv) - 1);
12747                             }
12748                             continue;
12749                         }
12750                     }
12751                     if (PL_multi_open == PL_multi_close) {
12752                         cont = FALSE;
12753                     }
12754                     else {
12755                         const char *t;
12756                         char *w;
12757                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12758                             /* At here, all closes are "was quoted" one,
12759                                so we don't check PL_multi_close. */
12760                             if (*t == '\\') {
12761                                 if (!keep_quoted && *(t+1) == PL_multi_open)
12762                                     t++;
12763                                 else
12764                                     *w++ = *t++;
12765                             }
12766                             else if (*t == PL_multi_open)
12767                                 brackets++;
12768
12769                             *w = *t;
12770                         }
12771                         if (w < t) {
12772                             *w++ = term;
12773                             *w = '\0';
12774                             SvCUR_set(sv, w - SvPVX_const(sv));
12775                         }
12776                         last_off = w - SvPVX(sv);
12777                         if (--brackets <= 0)
12778                             cont = FALSE;
12779                     }
12780                 }
12781             }
12782             if (!keep_delims) {
12783                 SvCUR_set(sv, SvCUR(sv) - 1);
12784                 *SvEND(sv) = '\0';
12785             }
12786             break;
12787         }
12788
12789         /* extend sv if need be */
12790         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12791         /* set 'to' to the next character in the sv's string */
12792         to = SvPVX(sv)+SvCUR(sv);
12793
12794         /* if open delimiter is the close delimiter read unbridle */
12795         if (PL_multi_open == PL_multi_close) {
12796             for (; s < PL_bufend; s++,to++) {
12797                 /* embedded newlines increment the current line number */
12798                 if (*s == '\n' && !PL_rsfp)
12799                     CopLINE_inc(PL_curcop);
12800                 /* handle quoted delimiters */
12801                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12802                     if (!keep_quoted && s[1] == term)
12803                         s++;
12804                 /* any other quotes are simply copied straight through */
12805                     else
12806                         *to++ = *s++;
12807                 }
12808                 /* terminate when run out of buffer (the for() condition), or
12809                    have found the terminator */
12810                 else if (*s == term) {
12811                     if (termlen == 1)
12812                         break;
12813                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12814                         break;
12815                 }
12816                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12817                     has_utf8 = TRUE;
12818                 *to = *s;
12819             }
12820         }
12821
12822         /* if the terminator isn't the same as the start character (e.g.,
12823            matched brackets), we have to allow more in the quoting, and
12824            be prepared for nested brackets.
12825         */
12826         else {
12827             /* read until we run out of string, or we find the terminator */
12828             for (; s < PL_bufend; s++,to++) {
12829                 /* embedded newlines increment the line count */
12830                 if (*s == '\n' && !PL_rsfp)
12831                     CopLINE_inc(PL_curcop);
12832                 /* backslashes can escape the open or closing characters */
12833                 if (*s == '\\' && s+1 < PL_bufend) {
12834                     if (!keep_quoted &&
12835                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12836                         s++;
12837                     else
12838                         *to++ = *s++;
12839                 }
12840                 /* allow nested opens and closes */
12841                 else if (*s == PL_multi_close && --brackets <= 0)
12842                     break;
12843                 else if (*s == PL_multi_open)
12844                     brackets++;
12845                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12846                     has_utf8 = TRUE;
12847                 *to = *s;
12848             }
12849         }
12850         /* terminate the copied string and update the sv's end-of-string */
12851         *to = '\0';
12852         SvCUR_set(sv, to - SvPVX_const(sv));
12853
12854         /*
12855          * this next chunk reads more into the buffer if we're not done yet
12856          */
12857
12858         if (s < PL_bufend)
12859             break;              /* handle case where we are done yet :-) */
12860
12861 #ifndef PERL_STRICT_CR
12862         if (to - SvPVX_const(sv) >= 2) {
12863             if ((to[-2] == '\r' && to[-1] == '\n') ||
12864                 (to[-2] == '\n' && to[-1] == '\r'))
12865             {
12866                 to[-2] = '\n';
12867                 to--;
12868                 SvCUR_set(sv, to - SvPVX_const(sv));
12869             }
12870             else if (to[-1] == '\r')
12871                 to[-1] = '\n';
12872         }
12873         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12874             to[-1] = '\n';
12875 #endif
12876
12877      read_more_line:
12878         /* if we're out of file, or a read fails, bail and reset the current
12879            line marker so we can report where the unterminated string began
12880         */
12881 #ifdef PERL_MAD
12882         if (PL_madskills) {
12883             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12884             if (PL_thisstuff)
12885                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12886             else
12887                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12888         }
12889 #endif
12890         CopLINE_inc(PL_curcop);
12891         PL_bufptr = PL_bufend;
12892         if (!lex_next_chunk(0)) {
12893             sv_free(sv);
12894             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12895             return NULL;
12896         }
12897         s = PL_bufptr;
12898 #ifdef PERL_MAD
12899         stuffstart = 0;
12900 #endif
12901     }
12902
12903     /* at this point, we have successfully read the delimited string */
12904
12905     if (!PL_encoding || UTF) {
12906 #ifdef PERL_MAD
12907         if (PL_madskills) {
12908             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12909             const int len = s - tstart;
12910             if (PL_thisstuff)
12911                 sv_catpvn(PL_thisstuff, tstart, len);
12912             else
12913                 PL_thisstuff = newSVpvn(tstart, len);
12914             if (!PL_thisclose && !keep_delims)
12915                 PL_thisclose = newSVpvn(s,termlen);
12916         }
12917 #endif
12918
12919         if (keep_delims)
12920             sv_catpvn(sv, s, termlen);
12921         s += termlen;
12922     }
12923 #ifdef PERL_MAD
12924     else {
12925         if (PL_madskills) {
12926             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12927             const int len = s - tstart - termlen;
12928             if (PL_thisstuff)
12929                 sv_catpvn(PL_thisstuff, tstart, len);
12930             else
12931                 PL_thisstuff = newSVpvn(tstart, len);
12932             if (!PL_thisclose && !keep_delims)
12933                 PL_thisclose = newSVpvn(s - termlen,termlen);
12934         }
12935     }
12936 #endif
12937     if (has_utf8 || PL_encoding)
12938         SvUTF8_on(sv);
12939
12940     PL_multi_end = CopLINE(PL_curcop);
12941
12942     /* if we allocated too much space, give some back */
12943     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12944         SvLEN_set(sv, SvCUR(sv) + 1);
12945         SvPV_renew(sv, SvLEN(sv));
12946     }
12947
12948     /* decide whether this is the first or second quoted string we've read
12949        for this op
12950     */
12951
12952     if (PL_lex_stuff)
12953         PL_lex_repl = sv;
12954     else
12955         PL_lex_stuff = sv;
12956     return s;
12957 }
12958
12959 /*
12960   scan_num
12961   takes: pointer to position in buffer
12962   returns: pointer to new position in buffer
12963   side-effects: builds ops for the constant in pl_yylval.op
12964
12965   Read a number in any of the formats that Perl accepts:
12966
12967   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12968   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12969   0b[01](_?[01])*
12970   0[0-7](_?[0-7])*
12971   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12972
12973   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12974   thing it reads.
12975
12976   If it reads a number without a decimal point or an exponent, it will
12977   try converting the number to an integer and see if it can do so
12978   without loss of precision.
12979 */
12980
12981 char *
12982 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12983 {
12984     dVAR;
12985     register const char *s = start;     /* current position in buffer */
12986     register char *d;                   /* destination in temp buffer */
12987     register char *e;                   /* end of temp buffer */
12988     NV nv;                              /* number read, as a double */
12989     SV *sv = NULL;                      /* place to put the converted number */
12990     bool floatit;                       /* boolean: int or float? */
12991     const char *lastub = NULL;          /* position of last underbar */
12992     static char const number_too_long[] = "Number too long";
12993
12994     PERL_ARGS_ASSERT_SCAN_NUM;
12995
12996     /* We use the first character to decide what type of number this is */
12997
12998     switch (*s) {
12999     default:
13000       Perl_croak(aTHX_ "panic: scan_num");
13001
13002     /* if it starts with a 0, it could be an octal number, a decimal in
13003        0.13 disguise, or a hexadecimal number, or a binary number. */
13004     case '0':
13005         {
13006           /* variables:
13007              u          holds the "number so far"
13008              shift      the power of 2 of the base
13009                         (hex == 4, octal == 3, binary == 1)
13010              overflowed was the number more than we can hold?
13011
13012              Shift is used when we add a digit.  It also serves as an "are
13013              we in octal/hex/binary?" indicator to disallow hex characters
13014              when in octal mode.
13015            */
13016             NV n = 0.0;
13017             UV u = 0;
13018             I32 shift;
13019             bool overflowed = FALSE;
13020             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
13021             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
13022             static const char* const bases[5] =
13023               { "", "binary", "", "octal", "hexadecimal" };
13024             static const char* const Bases[5] =
13025               { "", "Binary", "", "Octal", "Hexadecimal" };
13026             static const char* const maxima[5] =
13027               { "",
13028                 "0b11111111111111111111111111111111",
13029                 "",
13030                 "037777777777",
13031                 "0xffffffff" };
13032             const char *base, *Base, *max;
13033
13034             /* check for hex */
13035             if (s[1] == 'x' || s[1] == 'X') {
13036                 shift = 4;
13037                 s += 2;
13038                 just_zero = FALSE;
13039             } else if (s[1] == 'b' || s[1] == 'B') {
13040                 shift = 1;
13041                 s += 2;
13042                 just_zero = FALSE;
13043             }
13044             /* check for a decimal in disguise */
13045             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
13046                 goto decimal;
13047             /* so it must be octal */
13048             else {
13049                 shift = 3;
13050                 s++;
13051             }
13052
13053             if (*s == '_') {
13054                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13055                                "Misplaced _ in number");
13056                lastub = s++;
13057             }
13058
13059             base = bases[shift];
13060             Base = Bases[shift];
13061             max  = maxima[shift];
13062
13063             /* read the rest of the number */
13064             for (;;) {
13065                 /* x is used in the overflow test,
13066                    b is the digit we're adding on. */
13067                 UV x, b;
13068
13069                 switch (*s) {
13070
13071                 /* if we don't mention it, we're done */
13072                 default:
13073                     goto out;
13074
13075                 /* _ are ignored -- but warned about if consecutive */
13076                 case '_':
13077                     if (lastub && s == lastub + 1)
13078                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13079                                        "Misplaced _ in number");
13080                     lastub = s++;
13081                     break;
13082
13083                 /* 8 and 9 are not octal */
13084                 case '8': case '9':
13085                     if (shift == 3)
13086                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
13087                     /* FALL THROUGH */
13088
13089                 /* octal digits */
13090                 case '2': case '3': case '4':
13091                 case '5': case '6': case '7':
13092                     if (shift == 1)
13093                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
13094                     /* FALL THROUGH */
13095
13096                 case '0': case '1':
13097                     b = *s++ & 15;              /* ASCII digit -> value of digit */
13098                     goto digit;
13099
13100                 /* hex digits */
13101                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13102                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
13103                     /* make sure they said 0x */
13104                     if (shift != 4)
13105                         goto out;
13106                     b = (*s++ & 7) + 9;
13107
13108                     /* Prepare to put the digit we have onto the end
13109                        of the number so far.  We check for overflows.
13110                     */
13111
13112                   digit:
13113                     just_zero = FALSE;
13114                     if (!overflowed) {
13115                         x = u << shift; /* make room for the digit */
13116
13117                         if ((x >> shift) != u
13118                             && !(PL_hints & HINT_NEW_BINARY)) {
13119                             overflowed = TRUE;
13120                             n = (NV) u;
13121                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13122                                              "Integer overflow in %s number",
13123                                              base);
13124                         } else
13125                             u = x | b;          /* add the digit to the end */
13126                     }
13127                     if (overflowed) {
13128                         n *= nvshift[shift];
13129                         /* If an NV has not enough bits in its
13130                          * mantissa to represent an UV this summing of
13131                          * small low-order numbers is a waste of time
13132                          * (because the NV cannot preserve the
13133                          * low-order bits anyway): we could just
13134                          * remember when did we overflow and in the
13135                          * end just multiply n by the right
13136                          * amount. */
13137                         n += (NV) b;
13138                     }
13139                     break;
13140                 }
13141             }
13142
13143           /* if we get here, we had success: make a scalar value from
13144              the number.
13145           */
13146           out:
13147
13148             /* final misplaced underbar check */
13149             if (s[-1] == '_') {
13150                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13151             }
13152
13153             if (overflowed) {
13154                 if (n > 4294967295.0)
13155                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13156                                    "%s number > %s non-portable",
13157                                    Base, max);
13158                 sv = newSVnv(n);
13159             }
13160             else {
13161 #if UVSIZE > 4
13162                 if (u > 0xffffffff)
13163                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13164                                    "%s number > %s non-portable",
13165                                    Base, max);
13166 #endif
13167                 sv = newSVuv(u);
13168             }
13169             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
13170                 sv = new_constant(start, s - start, "integer",
13171                                   sv, NULL, NULL, 0);
13172             else if (PL_hints & HINT_NEW_BINARY)
13173                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
13174         }
13175         break;
13176
13177     /*
13178       handle decimal numbers.
13179       we're also sent here when we read a 0 as the first digit
13180     */
13181     case '1': case '2': case '3': case '4': case '5':
13182     case '6': case '7': case '8': case '9': case '.':
13183       decimal:
13184         d = PL_tokenbuf;
13185         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
13186         floatit = FALSE;
13187
13188         /* read next group of digits and _ and copy into d */
13189         while (isDIGIT(*s) || *s == '_') {
13190             /* skip underscores, checking for misplaced ones
13191                if -w is on
13192             */
13193             if (*s == '_') {
13194                 if (lastub && s == lastub + 1)
13195                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13196                                    "Misplaced _ in number");
13197                 lastub = s++;
13198             }
13199             else {
13200                 /* check for end of fixed-length buffer */
13201                 if (d >= e)
13202                     Perl_croak(aTHX_ number_too_long);
13203                 /* if we're ok, copy the character */
13204                 *d++ = *s++;
13205             }
13206         }
13207
13208         /* final misplaced underbar check */
13209         if (lastub && s == lastub + 1) {
13210             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13211         }
13212
13213         /* read a decimal portion if there is one.  avoid
13214            3..5 being interpreted as the number 3. followed
13215            by .5
13216         */
13217         if (*s == '.' && s[1] != '.') {
13218             floatit = TRUE;
13219             *d++ = *s++;
13220
13221             if (*s == '_') {
13222                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13223                                "Misplaced _ in number");
13224                 lastub = s;
13225             }
13226
13227             /* copy, ignoring underbars, until we run out of digits.
13228             */
13229             for (; isDIGIT(*s) || *s == '_'; s++) {
13230                 /* fixed length buffer check */
13231                 if (d >= e)
13232                     Perl_croak(aTHX_ number_too_long);
13233                 if (*s == '_') {
13234                    if (lastub && s == lastub + 1)
13235                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13236                                       "Misplaced _ in number");
13237                    lastub = s;
13238                 }
13239                 else
13240                     *d++ = *s;
13241             }
13242             /* fractional part ending in underbar? */
13243             if (s[-1] == '_') {
13244                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13245                                "Misplaced _ in number");
13246             }
13247             if (*s == '.' && isDIGIT(s[1])) {
13248                 /* oops, it's really a v-string, but without the "v" */
13249                 s = start;
13250                 goto vstring;
13251             }
13252         }
13253
13254         /* read exponent part, if present */
13255         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
13256             floatit = TRUE;
13257             s++;
13258
13259             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
13260             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
13261
13262             /* stray preinitial _ */
13263             if (*s == '_') {
13264                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13265                                "Misplaced _ in number");
13266                 lastub = s++;
13267             }
13268
13269             /* allow positive or negative exponent */
13270             if (*s == '+' || *s == '-')
13271                 *d++ = *s++;
13272
13273             /* stray initial _ */
13274             if (*s == '_') {
13275                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13276                                "Misplaced _ in number");
13277                 lastub = s++;
13278             }
13279
13280             /* read digits of exponent */
13281             while (isDIGIT(*s) || *s == '_') {
13282                 if (isDIGIT(*s)) {
13283                     if (d >= e)
13284                         Perl_croak(aTHX_ number_too_long);
13285                     *d++ = *s++;
13286                 }
13287                 else {
13288                    if (((lastub && s == lastub + 1) ||
13289                         (!isDIGIT(s[1]) && s[1] != '_')))
13290                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13291                                       "Misplaced _ in number");
13292                    lastub = s++;
13293                 }
13294             }
13295         }
13296
13297
13298         /*
13299            We try to do an integer conversion first if no characters
13300            indicating "float" have been found.
13301          */
13302
13303         if (!floatit) {
13304             UV uv;
13305             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
13306
13307             if (flags == IS_NUMBER_IN_UV) {
13308               if (uv <= IV_MAX)
13309                 sv = newSViv(uv); /* Prefer IVs over UVs. */
13310               else
13311                 sv = newSVuv(uv);
13312             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13313               if (uv <= (UV) IV_MIN)
13314                 sv = newSViv(-(IV)uv);
13315               else
13316                 floatit = TRUE;
13317             } else
13318               floatit = TRUE;
13319         }
13320         if (floatit) {
13321             /* terminate the string */
13322             *d = '\0';
13323             nv = Atof(PL_tokenbuf);
13324             sv = newSVnv(nv);
13325         }
13326
13327         if ( floatit
13328              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13329             const char *const key = floatit ? "float" : "integer";
13330             const STRLEN keylen = floatit ? 5 : 7;
13331             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13332                                 key, keylen, sv, NULL, NULL, 0);
13333         }
13334         break;
13335
13336     /* if it starts with a v, it could be a v-string */
13337     case 'v':
13338 vstring:
13339                 sv = newSV(5); /* preallocate storage space */
13340                 s = scan_vstring(s, PL_bufend, sv);
13341         break;
13342     }
13343
13344     /* make the op for the constant and return */
13345
13346     if (sv)
13347         lvalp->opval = newSVOP(OP_CONST, 0, sv);
13348     else
13349         lvalp->opval = NULL;
13350
13351     return (char *)s;
13352 }
13353
13354 STATIC char *
13355 S_scan_formline(pTHX_ register char *s)
13356 {
13357     dVAR;
13358     register char *eol;
13359     register char *t;
13360     SV * const stuff = newSVpvs("");
13361     bool needargs = FALSE;
13362     bool eofmt = FALSE;
13363 #ifdef PERL_MAD
13364     char *tokenstart = s;
13365     SV* savewhite = NULL;
13366
13367     if (PL_madskills) {
13368         savewhite = PL_thiswhite;
13369         PL_thiswhite = 0;
13370     }
13371 #endif
13372
13373     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13374
13375     while (!needargs) {
13376         if (*s == '.') {
13377             t = s+1;
13378 #ifdef PERL_STRICT_CR
13379             while (SPACE_OR_TAB(*t))
13380                 t++;
13381 #else
13382             while (SPACE_OR_TAB(*t) || *t == '\r')
13383                 t++;
13384 #endif
13385             if (*t == '\n' || t == PL_bufend) {
13386                 eofmt = TRUE;
13387                 break;
13388             }
13389         }
13390         if (PL_in_eval && !PL_rsfp) {
13391             eol = (char *) memchr(s,'\n',PL_bufend-s);
13392             if (!eol++)
13393                 eol = PL_bufend;
13394         }
13395         else
13396             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13397         if (*s != '#') {
13398             for (t = s; t < eol; t++) {
13399                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13400                     needargs = FALSE;
13401                     goto enough;        /* ~~ must be first line in formline */
13402                 }
13403                 if (*t == '@' || *t == '^')
13404                     needargs = TRUE;
13405             }
13406             if (eol > s) {
13407                 sv_catpvn(stuff, s, eol-s);
13408 #ifndef PERL_STRICT_CR
13409                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13410                     char *end = SvPVX(stuff) + SvCUR(stuff);
13411                     end[-2] = '\n';
13412                     end[-1] = '\0';
13413                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13414                 }
13415 #endif
13416             }
13417             else
13418               break;
13419         }
13420         s = (char*)eol;
13421         if (PL_rsfp) {
13422             bool got_some;
13423 #ifdef PERL_MAD
13424             if (PL_madskills) {
13425                 if (PL_thistoken)
13426                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13427                 else
13428                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13429             }
13430 #endif
13431             PL_bufptr = PL_bufend;
13432             CopLINE_inc(PL_curcop);
13433             got_some = lex_next_chunk(0);
13434             CopLINE_dec(PL_curcop);
13435             s = PL_bufptr;
13436 #ifdef PERL_MAD
13437             tokenstart = PL_bufptr;
13438 #endif
13439             if (!got_some)
13440                 break;
13441         }
13442         incline(s);
13443     }
13444   enough:
13445     if (SvCUR(stuff)) {
13446         PL_expect = XTERM;
13447         if (needargs) {
13448             PL_lex_state = LEX_NORMAL;
13449             start_force(PL_curforce);
13450             NEXTVAL_NEXTTOKE.ival = 0;
13451             force_next(',');
13452         }
13453         else
13454             PL_lex_state = LEX_FORMLINE;
13455         if (!IN_BYTES) {
13456             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13457                 SvUTF8_on(stuff);
13458             else if (PL_encoding)
13459                 sv_recode_to_utf8(stuff, PL_encoding);
13460         }
13461         start_force(PL_curforce);
13462         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13463         force_next(THING);
13464         start_force(PL_curforce);
13465         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13466         force_next(LSTOP);
13467     }
13468     else {
13469         SvREFCNT_dec(stuff);
13470         if (eofmt)
13471             PL_lex_formbrack = 0;
13472         PL_bufptr = s;
13473     }
13474 #ifdef PERL_MAD
13475     if (PL_madskills) {
13476         if (PL_thistoken)
13477             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13478         else
13479             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13480         PL_thiswhite = savewhite;
13481     }
13482 #endif
13483     return s;
13484 }
13485
13486 I32
13487 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13488 {
13489     dVAR;
13490     const I32 oldsavestack_ix = PL_savestack_ix;
13491     CV* const outsidecv = PL_compcv;
13492
13493     if (PL_compcv) {
13494         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13495     }
13496     SAVEI32(PL_subline);
13497     save_item(PL_subname);
13498     SAVESPTR(PL_compcv);
13499
13500     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13501     CvFLAGS(PL_compcv) |= flags;
13502
13503     PL_subline = CopLINE(PL_curcop);
13504     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13505     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13506     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13507
13508     return oldsavestack_ix;
13509 }
13510
13511 #ifdef __SC__
13512 #pragma segment Perl_yylex
13513 #endif
13514 static int
13515 S_yywarn(pTHX_ const char *const s)
13516 {
13517     dVAR;
13518
13519     PERL_ARGS_ASSERT_YYWARN;
13520
13521     PL_in_eval |= EVAL_WARNONLY;
13522     yyerror(s);
13523     PL_in_eval &= ~EVAL_WARNONLY;
13524     return 0;
13525 }
13526
13527 int
13528 Perl_yyerror(pTHX_ const char *const s)
13529 {
13530     dVAR;
13531     const char *where = NULL;
13532     const char *context = NULL;
13533     int contlen = -1;
13534     SV *msg;
13535     int yychar  = PL_parser->yychar;
13536
13537     PERL_ARGS_ASSERT_YYERROR;
13538
13539     if (!yychar || (yychar == ';' && !PL_rsfp))
13540         where = "at EOF";
13541     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13542       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13543       PL_oldbufptr != PL_bufptr) {
13544         /*
13545                 Only for NetWare:
13546                 The code below is removed for NetWare because it abends/crashes on NetWare
13547                 when the script has error such as not having the closing quotes like:
13548                     if ($var eq "value)
13549                 Checking of white spaces is anyway done in NetWare code.
13550         */
13551 #ifndef NETWARE
13552         while (isSPACE(*PL_oldoldbufptr))
13553             PL_oldoldbufptr++;
13554 #endif
13555         context = PL_oldoldbufptr;
13556         contlen = PL_bufptr - PL_oldoldbufptr;
13557     }
13558     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13559       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13560         /*
13561                 Only for NetWare:
13562                 The code below is removed for NetWare because it abends/crashes on NetWare
13563                 when the script has error such as not having the closing quotes like:
13564                     if ($var eq "value)
13565                 Checking of white spaces is anyway done in NetWare code.
13566         */
13567 #ifndef NETWARE
13568         while (isSPACE(*PL_oldbufptr))
13569             PL_oldbufptr++;
13570 #endif
13571         context = PL_oldbufptr;
13572         contlen = PL_bufptr - PL_oldbufptr;
13573     }
13574     else if (yychar > 255)
13575         where = "next token ???";
13576     else if (yychar == -2) { /* YYEMPTY */
13577         if (PL_lex_state == LEX_NORMAL ||
13578            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13579             where = "at end of line";
13580         else if (PL_lex_inpat)
13581             where = "within pattern";
13582         else
13583             where = "within string";
13584     }
13585     else {
13586         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13587         if (yychar < 32)
13588             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13589         else if (isPRINT_LC(yychar)) {
13590             const char string = yychar;
13591             sv_catpvn(where_sv, &string, 1);
13592         }
13593         else
13594             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13595         where = SvPVX_const(where_sv);
13596     }
13597     msg = sv_2mortal(newSVpv(s, 0));
13598     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13599         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13600     if (context)
13601         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13602     else
13603         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13604     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13605         Perl_sv_catpvf(aTHX_ msg,
13606         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13607                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13608         PL_multi_end = 0;
13609     }
13610     if (PL_in_eval & EVAL_WARNONLY) {
13611         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13612     }
13613     else
13614         qerror(msg);
13615     if (PL_error_count >= 10) {
13616         if (PL_in_eval && SvCUR(ERRSV))
13617             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13618                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13619         else
13620             Perl_croak(aTHX_ "%s has too many errors.\n",
13621             OutCopFILE(PL_curcop));
13622     }
13623     PL_in_my = 0;
13624     PL_in_my_stash = NULL;
13625     return 0;
13626 }
13627 #ifdef __SC__
13628 #pragma segment Main
13629 #endif
13630
13631 STATIC char*
13632 S_swallow_bom(pTHX_ U8 *s)
13633 {
13634     dVAR;
13635     const STRLEN slen = SvCUR(PL_linestr);
13636
13637     PERL_ARGS_ASSERT_SWALLOW_BOM;
13638
13639     switch (s[0]) {
13640     case 0xFF:
13641         if (s[1] == 0xFE) {
13642             /* UTF-16 little-endian? (or UTF-32LE?) */
13643             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13644                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13645 #ifndef PERL_NO_UTF16_FILTER
13646             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13647             s += 2;
13648             if (PL_bufend > (char*)s) {
13649                 s = add_utf16_textfilter(s, TRUE);
13650             }
13651 #else
13652             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13653 #endif
13654         }
13655         break;
13656     case 0xFE:
13657         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13658 #ifndef PERL_NO_UTF16_FILTER
13659             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13660             s += 2;
13661             if (PL_bufend > (char *)s) {
13662                 s = add_utf16_textfilter(s, FALSE);
13663             }
13664 #else
13665             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13666 #endif
13667         }
13668         break;
13669     case 0xEF:
13670         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13671             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13672             s += 3;                      /* UTF-8 */
13673         }
13674         break;
13675     case 0:
13676         if (slen > 3) {
13677              if (s[1] == 0) {
13678                   if (s[2] == 0xFE && s[3] == 0xFF) {
13679                        /* UTF-32 big-endian */
13680                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13681                   }
13682              }
13683              else if (s[2] == 0 && s[3] != 0) {
13684                   /* Leading bytes
13685                    * 00 xx 00 xx
13686                    * are a good indicator of UTF-16BE. */
13687 #ifndef PERL_NO_UTF16_FILTER
13688                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13689                   s = add_utf16_textfilter(s, FALSE);
13690 #else
13691                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13692 #endif
13693              }
13694         }
13695 #ifdef EBCDIC
13696     case 0xDD:
13697         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13698             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13699             s += 4;                      /* UTF-8 */
13700         }
13701         break;
13702 #endif
13703
13704     default:
13705          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13706                   /* Leading bytes
13707                    * xx 00 xx 00
13708                    * are a good indicator of UTF-16LE. */
13709 #ifndef PERL_NO_UTF16_FILTER
13710               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13711               s = add_utf16_textfilter(s, TRUE);
13712 #else
13713               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13714 #endif
13715          }
13716     }
13717     return (char*)s;
13718 }
13719
13720
13721 #ifndef PERL_NO_UTF16_FILTER
13722 static I32
13723 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13724 {
13725     dVAR;
13726     SV *const filter = FILTER_DATA(idx);
13727     /* We re-use this each time round, throwing the contents away before we
13728        return.  */
13729     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13730     SV *const utf8_buffer = filter;
13731     IV status = IoPAGE(filter);
13732     const bool reverse = cBOOL(IoLINES(filter));
13733     I32 retval;
13734
13735     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13736
13737     /* As we're automatically added, at the lowest level, and hence only called
13738        from this file, we can be sure that we're not called in block mode. Hence
13739        don't bother writing code to deal with block mode.  */
13740     if (maxlen) {
13741         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13742     }
13743     if (status < 0) {
13744         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13745     }
13746     DEBUG_P(PerlIO_printf(Perl_debug_log,
13747                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13748                           FPTR2DPTR(void *, S_utf16_textfilter),
13749                           reverse ? 'l' : 'b', idx, maxlen, status,
13750                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13751
13752     while (1) {
13753         STRLEN chars;
13754         STRLEN have;
13755         I32 newlen;
13756         U8 *end;
13757         /* First, look in our buffer of existing UTF-8 data:  */
13758         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13759
13760         if (nl) {
13761             ++nl;
13762         } else if (status == 0) {
13763             /* EOF */
13764             IoPAGE(filter) = 0;
13765             nl = SvEND(utf8_buffer);
13766         }
13767         if (nl) {
13768             STRLEN got = nl - SvPVX(utf8_buffer);
13769             /* Did we have anything to append?  */
13770             retval = got != 0;
13771             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13772             /* Everything else in this code works just fine if SVp_POK isn't
13773                set.  This, however, needs it, and we need it to work, else
13774                we loop infinitely because the buffer is never consumed.  */
13775             sv_chop(utf8_buffer, nl);
13776             break;
13777         }
13778
13779         /* OK, not a complete line there, so need to read some more UTF-16.
13780            Read an extra octect if the buffer currently has an odd number. */
13781         while (1) {
13782             if (status <= 0)
13783                 break;
13784             if (SvCUR(utf16_buffer) >= 2) {
13785                 /* Location of the high octet of the last complete code point.
13786                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13787                    *coupled* with all the benefits of partial reads and
13788                    endianness.  */
13789                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13790                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13791
13792                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13793                     break;
13794                 }
13795
13796                 /* We have the first half of a surrogate. Read more.  */
13797                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13798             }
13799
13800             status = FILTER_READ(idx + 1, utf16_buffer,
13801                                  160 + (SvCUR(utf16_buffer) & 1));
13802             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13803             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13804             if (status < 0) {
13805                 /* Error */
13806                 IoPAGE(filter) = status;
13807                 return status;
13808             }
13809         }
13810
13811         chars = SvCUR(utf16_buffer) >> 1;
13812         have = SvCUR(utf8_buffer);
13813         SvGROW(utf8_buffer, have + chars * 3 + 1);
13814
13815         if (reverse) {
13816             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13817                                          (U8*)SvPVX_const(utf8_buffer) + have,
13818                                          chars * 2, &newlen);
13819         } else {
13820             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13821                                 (U8*)SvPVX_const(utf8_buffer) + have,
13822                                 chars * 2, &newlen);
13823         }
13824         SvCUR_set(utf8_buffer, have + newlen);
13825         *end = '\0';
13826
13827         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13828            it's private to us, and utf16_to_utf8{,reversed} take a
13829            (pointer,length) pair, rather than a NUL-terminated string.  */
13830         if(SvCUR(utf16_buffer) & 1) {
13831             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13832             SvCUR_set(utf16_buffer, 1);
13833         } else {
13834             SvCUR_set(utf16_buffer, 0);
13835         }
13836     }
13837     DEBUG_P(PerlIO_printf(Perl_debug_log,
13838                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13839                           status,
13840                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13841     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13842     return retval;
13843 }
13844
13845 static U8 *
13846 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13847 {
13848     SV *filter = filter_add(S_utf16_textfilter, NULL);
13849
13850     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13851
13852     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13853     sv_setpvs(filter, "");
13854     IoLINES(filter) = reversed;
13855     IoPAGE(filter) = 1; /* Not EOF */
13856
13857     /* Sadly, we have to return a valid pointer, come what may, so we have to
13858        ignore any error return from this.  */
13859     SvCUR_set(PL_linestr, 0);
13860     if (FILTER_READ(0, PL_linestr, 0)) {
13861         SvUTF8_on(PL_linestr);
13862     } else {
13863         SvUTF8_on(PL_linestr);
13864     }
13865     PL_bufend = SvEND(PL_linestr);
13866     return (U8*)SvPVX(PL_linestr);
13867 }
13868 #endif
13869
13870 /*
13871 Returns a pointer to the next character after the parsed
13872 vstring, as well as updating the passed in sv.
13873
13874 Function must be called like
13875
13876         sv = newSV(5);
13877         s = scan_vstring(s,e,sv);
13878
13879 where s and e are the start and end of the string.
13880 The sv should already be large enough to store the vstring
13881 passed in, for performance reasons.
13882
13883 */
13884
13885 char *
13886 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13887 {
13888     dVAR;
13889     const char *pos = s;
13890     const char *start = s;
13891
13892     PERL_ARGS_ASSERT_SCAN_VSTRING;
13893
13894     if (*pos == 'v') pos++;  /* get past 'v' */
13895     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13896         pos++;
13897     if ( *pos != '.') {
13898         /* this may not be a v-string if followed by => */
13899         const char *next = pos;
13900         while (next < e && isSPACE(*next))
13901             ++next;
13902         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13903             /* return string not v-string */
13904             sv_setpvn(sv,(char *)s,pos-s);
13905             return (char *)pos;
13906         }
13907     }
13908
13909     if (!isALPHA(*pos)) {
13910         U8 tmpbuf[UTF8_MAXBYTES+1];
13911
13912         if (*s == 'v')
13913             s++;  /* get past 'v' */
13914
13915         sv_setpvs(sv, "");
13916
13917         for (;;) {
13918             /* this is atoi() that tolerates underscores */
13919             U8 *tmpend;
13920             UV rev = 0;
13921             const char *end = pos;
13922             UV mult = 1;
13923             while (--end >= s) {
13924                 if (*end != '_') {
13925                     const UV orev = rev;
13926                     rev += (*end - '0') * mult;
13927                     mult *= 10;
13928                     if (orev > rev)
13929                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13930                                          "Integer overflow in decimal number");
13931                 }
13932             }
13933 #ifdef EBCDIC
13934             if (rev > 0x7FFFFFFF)
13935                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13936 #endif
13937             /* Append native character for the rev point */
13938             tmpend = uvchr_to_utf8(tmpbuf, rev);
13939             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13940             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13941                  SvUTF8_on(sv);
13942             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13943                  s = ++pos;
13944             else {
13945                  s = pos;
13946                  break;
13947             }
13948             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13949                  pos++;
13950         }
13951         SvPOK_on(sv);
13952         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13953         SvRMAGICAL_on(sv);
13954     }
13955     return (char *)s;
13956 }
13957
13958 int
13959 Perl_keyword_plugin_standard(pTHX_
13960         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13961 {
13962     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13963     PERL_UNUSED_CONTEXT;
13964     PERL_UNUSED_ARG(keyword_ptr);
13965     PERL_UNUSED_ARG(keyword_len);
13966     PERL_UNUSED_ARG(op_ptr);
13967     return KEYWORD_PLUGIN_DECLINE;
13968 }
13969
13970 /*
13971 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
13972
13973 Parse a single complete Perl statement.  This may be a normal imperative
13974 statement, including optional label, or a declaration that has
13975 compile-time effect.  It is up to the caller to ensure that the dynamic
13976 parser state (L</PL_parser> et al) is correctly set to reflect the source
13977 of the code to be parsed and the lexical context for the statement.
13978
13979 The op tree representing the statement is returned.  This may be a
13980 null pointer if the statement is null, for example if it was actually
13981 a subroutine definition (which has compile-time side effects).  If not
13982 null, it will be the result of a L</newSTATEOP> call, normally including
13983 a C<nextstate> or equivalent op.
13984
13985 If an error occurs in parsing or compilation, in most cases a valid op
13986 tree (most likely null) is returned anyway.  The error is reflected in
13987 the parser state, normally resulting in a single exception at the top
13988 level of parsing which covers all the compilation errors that occurred.
13989 Some compilation errors, however, will throw an exception immediately.
13990
13991 The I<flags> parameter is reserved for future use, and must always
13992 be zero.
13993
13994 =cut
13995 */
13996
13997 OP *
13998 Perl_parse_fullstmt(pTHX_ U32 flags)
13999 {
14000     OP *fullstmtop;
14001     if (flags)
14002         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
14003     ENTER;
14004     SAVEVPTR(PL_eval_root);
14005     PL_eval_root = NULL;
14006     if(yyparse(GRAMFULLSTMT) && !PL_parser->error_count)
14007         qerror(Perl_mess(aTHX_ "Parse error"));
14008     fullstmtop = PL_eval_root;
14009     LEAVE;
14010     return fullstmtop;
14011 }
14012
14013 /*
14014 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
14015
14016 Parse a sequence of zero or more Perl statements.  These may be normal
14017 imperative statements, including optional labels, or declarations
14018 that have compile-time effect, or any mixture thereof.  The statement
14019 sequence ends when a closing brace or end-of-file is encountered in a
14020 place where a new statement could have validly started.  It is up to
14021 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
14022 is correctly set to reflect the source of the code to be parsed and the
14023 lexical context for the statements.
14024
14025 The op tree representing the statement sequence is returned.  This may
14026 be a null pointer if the statements were all null, for example if there
14027 were no statements or if there were only subroutine definitions (which
14028 have compile-time side effects).  If not null, it will be a C<lineseq>
14029 list, normally including C<nextstate> or equivalent ops.
14030
14031 If an error occurs in parsing or compilation, in most cases a valid op
14032 tree is returned anyway.  The error is reflected in the parser state,
14033 normally resulting in a single exception at the top level of parsing
14034 which covers all the compilation errors that occurred.  Some compilation
14035 errors, however, will throw an exception immediately.
14036
14037 The I<flags> parameter is reserved for future use, and must always
14038 be zero.
14039
14040 =cut
14041 */
14042
14043 OP *
14044 Perl_parse_stmtseq(pTHX_ U32 flags)
14045 {
14046     OP *stmtseqop;
14047     if (flags)
14048         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
14049     ENTER;
14050     SAVEVPTR(PL_eval_root);
14051     PL_eval_root = NULL;
14052     if(yyparse(GRAMSTMTSEQ) && !PL_parser->error_count)
14053         qerror(Perl_mess(aTHX_ "Parse error"));
14054     stmtseqop = PL_eval_root;
14055     LEAVE;
14056     return stmtseqop;
14057 }
14058
14059 void
14060 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
14061 {
14062     PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
14063     deprecate("qw(...) as parentheses");
14064     force_next(')');
14065     if (qwlist->op_type == OP_STUB) {
14066         op_free(qwlist);
14067     }
14068     else {
14069         start_force(PL_curforce);
14070         NEXTVAL_NEXTTOKE.opval = qwlist;
14071         force_next(THING);
14072     }
14073     force_next('(');
14074 }
14075
14076 /*
14077  * Local variables:
14078  * c-indentation-style: bsd
14079  * c-basic-offset: 4
14080  * indent-tabs-mode: t
14081  * End:
14082  *
14083  * ex: set ts=8 sts=4 sw=4 noet:
14084  */