toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  52 #define PL_lex_casemods         (PL_parser->lex_casemods)
  53 #define PL_lex_casestack        (PL_parser->lex_casestack)
  54 #define PL_lex_defer            (PL_parser->lex_defer)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_expect           (PL_parser->lex_expect)
  57 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  58 #define PL_lex_inpat            (PL_parser->lex_inpat)
  59 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  60 #define PL_lex_op               (PL_parser->lex_op)
  61 #define PL_lex_repl             (PL_parser->lex_repl)
  62 #define PL_lex_starts           (PL_parser->lex_starts)
  63 #define PL_lex_stuff            (PL_parser->lex_stuff)
  64 #define PL_multi_start          (PL_parser->multi_start)
  65 #define PL_multi_open           (PL_parser->multi_open)
  66 #define PL_multi_close          (PL_parser->multi_close)
  67 #define PL_pending_ident        (PL_parser->pending_ident)
  68 #define PL_preambled            (PL_parser->preambled)
  69 #define PL_sublex_info          (PL_parser->sublex_info)
  70 #define PL_linestr              (PL_parser->linestr)
  71 #define PL_expect               (PL_parser->expect)
  72 #define PL_copline              (PL_parser->copline)
  73 #define PL_bufptr               (PL_parser->bufptr)
  74 #define PL_oldbufptr            (PL_parser->oldbufptr)
  75 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  76 #define PL_linestart            (PL_parser->linestart)
  77 #define PL_bufend               (PL_parser->bufend)
  78 #define PL_last_uni             (PL_parser->last_uni)
  79 #define PL_last_lop             (PL_parser->last_lop)
  80 #define PL_last_lop_op          (PL_parser->last_lop_op)
  81 #define PL_lex_state            (PL_parser->lex_state)
  82 #define PL_rsfp                 (PL_parser->rsfp)
  83 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  84 #define PL_in_my                (PL_parser->in_my)
  85 #define PL_in_my_stash          (PL_parser->in_my_stash)
  86 #define PL_tokenbuf             (PL_parser->tokenbuf)
  87 #define PL_multi_end            (PL_parser->multi_end)
  88 #define PL_error_count          (PL_parser->error_count)
  89
  90 #ifdef PERL_MAD
  91 #  define PL_endwhite           (PL_parser->endwhite)
  92 #  define PL_faketokens         (PL_parser->faketokens)
  93 #  define PL_lasttoke           (PL_parser->lasttoke)
  94 #  define PL_nextwhite          (PL_parser->nextwhite)
  95 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  96 #  define PL_skipwhite          (PL_parser->skipwhite)
  97 #  define PL_thisclose          (PL_parser->thisclose)
  98 #  define PL_thismad            (PL_parser->thismad)
  99 #  define PL_thisopen           (PL_parser->thisopen)
 100 #  define PL_thisstuff          (PL_parser->thisstuff)
 101 #  define PL_thistoken          (PL_parser->thistoken)
 102 #  define PL_thiswhite          (PL_parser->thiswhite)
 103 #  define PL_thiswhite          (PL_parser->thiswhite)
 104 #  define PL_nexttoke           (PL_parser->nexttoke)
 105 #  define PL_curforce           (PL_parser->curforce)
 106 #else
 107 #  define PL_nexttoke           (PL_parser->nexttoke)
 108 #  define PL_nexttype           (PL_parser->nexttype)
 109 #  define PL_nextval            (PL_parser->nextval)
 110 #endif
 111
 112 /* This can't be done with embed.fnc, because struct yy_parser contains a
 113    member named pending_ident, which clashes with the generated #define  */
 114 static int
 115 S_pending_ident(pTHX);
 116
 117 static const char ident_too_long[] = "Identifier too long";
 118
 119 #ifdef PERL_MAD
 120 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 121 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 122 #else
 123 #  define CURMAD(slot,sv)
 124 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 125 #endif
 126
 127 #define XFAKEBRACK 128
 128 #define XENUMMASK 127
 129
 130 #ifdef USE_UTF8_SCRIPTS
 131 #   define UTF (!IN_BYTES)
 132 #else
 133 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 134 #endif
 135
 136 /* The maximum number of characters preceding the unrecognized one to display */
 137 #define UNRECOGNIZED_PRECEDE_COUNT 10
 138
 139 /* In variables named $^X, these are the legal values for X.
 140  * 1999-02-27 mjd-perl-patch@plover.com */
 141 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 142
 143 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 144
 145 /* LEX_* are values for PL_lex_state, the state of the lexer.
 146  * They are arranged oddly so that the guard on the switch statement
 147  * can get by with a single comparison (if the compiler is smart enough).
 148  */
 149
 150 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 151
 152 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 153 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 154 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 155 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 156 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 157
 158                                    /* at end of code, eg "$x" followed by:  */
 159 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 160 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 161
 162 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 163                                         string or after \E, $foo, etc       */
 164 #define LEX_INTERPCONST          2 /* NOT USED */
 165 #define LEX_FORMLINE             1 /* expecting a format line               */
 166 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 167
 168
 169 #ifdef DEBUGGING
 170 static const char* const lex_state_names[] = {
 171     "KNOWNEXT",
 172     "FORMLINE",
 173     "INTERPCONST",
 174     "INTERPCONCAT",
 175     "INTERPENDMAYBE",
 176     "INTERPEND",
 177     "INTERPSTART",
 178     "INTERPPUSH",
 179     "INTERPCASEMOD",
 180     "INTERPNORMAL",
 181     "NORMAL"
 182 };
 183 #endif
 184
 185 #ifdef ff_next
 186 #undef ff_next
 187 #endif
 188
 189 #include "keywords.h"
 190
 191 /* CLINE is a macro that ensures PL_copline has a sane value */
 192
 193 #ifdef CLINE
 194 #undef CLINE
 195 #endif
 196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 197
 198 #ifdef PERL_MAD
 199 #  define SKIPSPACE0(s) skipspace0(s)
 200 #  define SKIPSPACE1(s) skipspace1(s)
 201 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 202 #  define PEEKSPACE(s) skipspace2(s,0)
 203 #else
 204 #  define SKIPSPACE0(s) skipspace(s)
 205 #  define SKIPSPACE1(s) skipspace(s)
 206 #  define SKIPSPACE2(s,tsv) skipspace(s)
 207 #  define PEEKSPACE(s) skipspace(s)
 208 #endif
 209
 210 /*
 211  * Convenience functions to return different tokens and prime the
 212  * lexer for the next token.  They all take an argument.
 213  *
 214  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 215  * OPERATOR     : generic operator
 216  * AOPERATOR    : assignment operator
 217  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 218  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 219  * PREREF       : *EXPR where EXPR is not a simple identifier
 220  * TERM         : expression term
 221  * LOOPX        : loop exiting command (goto, last, dump, etc)
 222  * FTST         : file test operator
 223  * FUN0         : zero-argument function
 224  * FUN1         : not used, except for not, which isn't a UNIOP
 225  * BOop         : bitwise or or xor
 226  * BAop         : bitwise and
 227  * SHop         : shift operator
 228  * PWop         : power operator
 229  * PMop         : pattern-matching operator
 230  * Aop          : addition-level operator
 231  * Mop          : multiplication-level operator
 232  * Eop          : equality-testing operator
 233  * Rop          : relational operator <= != gt
 234  *
 235  * Also see LOP and lop() below.
 236  */
 237
 238 #ifdef DEBUGGING /* Serve -DT. */
 239 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 240 #else
 241 #   define REPORT(retval) (retval)
 242 #endif
 243
 244 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 245 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 246 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 247 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 248 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 249 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 250 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 251 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 252 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 253 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 254 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 255 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 256 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 257 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 258 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 259 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 260 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 261 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 262 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 263 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 264
 265 /* This bit of chicanery makes a unary function followed by
 266  * a parenthesis into a function with one argument, highest precedence.
 267  * The UNIDOR macro is for unary functions that can be followed by the //
 268  * operator (such as C<shift // 0>).
 269  */
 270 #define UNI2(f,x) { \
 271         pl_yylval.ival = f; \
 272         PL_expect = x; \
 273         PL_bufptr = s; \
 274         PL_last_uni = PL_oldbufptr; \
 275         PL_last_lop_op = f; \
 276         if (*s == '(') \
 277             return REPORT( (int)FUNC1 ); \
 278         s = PEEKSPACE(s); \
 279         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 280         }
 281 #define UNI(f)    UNI2(f,XTERM)
 282 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 283
 284 #define UNIBRACK(f) { \
 285         pl_yylval.ival = f; \
 286         PL_bufptr = s; \
 287         PL_last_uni = PL_oldbufptr; \
 288         if (*s == '(') \
 289             return REPORT( (int)FUNC1 ); \
 290         s = PEEKSPACE(s); \
 291         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 292         }
 293
 294 /* grandfather return to old style */
 295 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 296
 297 #ifdef DEBUGGING
 298
 299 /* how to interpret the pl_yylval associated with the token */
 300 enum token_type {
 301     TOKENTYPE_NONE,
 302     TOKENTYPE_IVAL,
 303     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 304     TOKENTYPE_PVAL,
 305     TOKENTYPE_OPVAL,
 306     TOKENTYPE_GVVAL
 307 };
 308
 309 static struct debug_tokens {
 310     const int token;
 311     enum token_type type;
 312     const char *name;
 313 } const debug_tokens[] =
 314 {
 315     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 316     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 317     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 318     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 319     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 320     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 321     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 322     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 323     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 324     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 325     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 326     { DO,               TOKENTYPE_NONE,         "DO" },
 327     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 328     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 329     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 330     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 331     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 332     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 333     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 334     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 335     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 336     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 337     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 338     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 339     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 340     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 341     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 342     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 343     { IF,               TOKENTYPE_IVAL,         "IF" },
 344     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 345     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 346     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 347     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 348     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 349     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 350     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 351     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 352     { MY,               TOKENTYPE_IVAL,         "MY" },
 353     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 354     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 355     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 356     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 357     { OROR,             TOKENTYPE_NONE,         "OROR" },
 358     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 359     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 360     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 361     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 362     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 363     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 364     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 365     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 366     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 367     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 368     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 369     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 370     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 371     { SUB,              TOKENTYPE_NONE,         "SUB" },
 372     { THING,            TOKENTYPE_OPVAL,        "THING" },
 373     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 374     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 375     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 376     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 377     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 378     { USE,              TOKENTYPE_IVAL,         "USE" },
 379     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 380     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 381     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 382     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 383     { 0,                TOKENTYPE_NONE,         NULL }
 384 };
 385
 386 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 387
 388 STATIC int
 389 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 390 {
 391     dVAR;
 392
 393     PERL_ARGS_ASSERT_TOKEREPORT;
 394
 395     if (DEBUG_T_TEST) {
 396         const char *name = NULL;
 397         enum token_type type = TOKENTYPE_NONE;
 398         const struct debug_tokens *p;
 399         SV* const report = newSVpvs("<== ");
 400
 401         for (p = debug_tokens; p->token; p++) {
 402             if (p->token == (int)rv) {
 403                 name = p->name;
 404                 type = p->type;
 405                 break;
 406             }
 407         }
 408         if (name)
 409             Perl_sv_catpv(aTHX_ report, name);
 410         else if ((char)rv > ' ' && (char)rv < '~')
 411             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 412         else if (!rv)
 413             sv_catpvs(report, "EOF");
 414         else
 415             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 416         switch (type) {
 417         case TOKENTYPE_NONE:
 418         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 419             break;
 420         case TOKENTYPE_IVAL:
 421             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 422             break;
 423         case TOKENTYPE_OPNUM:
 424             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 425                                     PL_op_name[lvalp->ival]);
 426             break;
 427         case TOKENTYPE_PVAL:
 428             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 429             break;
 430         case TOKENTYPE_OPVAL:
 431             if (lvalp->opval) {
 432                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 433                                     PL_op_name[lvalp->opval->op_type]);
 434                 if (lvalp->opval->op_type == OP_CONST) {
 435                     Perl_sv_catpvf(aTHX_ report, " %s",
 436                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 437                 }
 438
 439             }
 440             else
 441                 sv_catpvs(report, "(opval=null)");
 442             break;
 443         }
 444         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 445     };
 446     return (int)rv;
 447 }
 448
 449
 450 /* print the buffer with suitable escapes */
 451
 452 STATIC void
 453 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 454 {
 455     SV* const tmp = newSVpvs("");
 456
 457     PERL_ARGS_ASSERT_PRINTBUF;
 458
 459     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 460     SvREFCNT_dec(tmp);
 461 }
 462
 463 #endif
 464
 465 static int
 466 S_deprecate_commaless_var_list(pTHX) {
 467     PL_expect = XTERM;
 468     deprecate("comma-less variable list");
 469     return REPORT(','); /* grandfather non-comma-format format */
 470 }
 471
 472 /*
 473  * S_ao
 474  *
 475  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 476  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 477  */
 478
 479 STATIC int
 480 S_ao(pTHX_ int toketype)
 481 {
 482     dVAR;
 483     if (*PL_bufptr == '=') {
 484         PL_bufptr++;
 485         if (toketype == ANDAND)
 486             pl_yylval.ival = OP_ANDASSIGN;
 487         else if (toketype == OROR)
 488             pl_yylval.ival = OP_ORASSIGN;
 489         else if (toketype == DORDOR)
 490             pl_yylval.ival = OP_DORASSIGN;
 491         toketype = ASSIGNOP;
 492     }
 493     return toketype;
 494 }
 495
 496 /*
 497  * S_no_op
 498  * When Perl expects an operator and finds something else, no_op
 499  * prints the warning.  It always prints "<something> found where
 500  * operator expected.  It prints "Missing semicolon on previous line?"
 501  * if the surprise occurs at the start of the line.  "do you need to
 502  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 503  * where the compiler doesn't know if foo is a method call or a function.
 504  * It prints "Missing operator before end of line" if there's nothing
 505  * after the missing operator, or "... before <...>" if there is something
 506  * after the missing operator.
 507  */
 508
 509 STATIC void
 510 S_no_op(pTHX_ const char *const what, char *s)
 511 {
 512     dVAR;
 513     char * const oldbp = PL_bufptr;
 514     const bool is_first = (PL_oldbufptr == PL_linestart);
 515
 516     PERL_ARGS_ASSERT_NO_OP;
 517
 518     if (!s)
 519         s = oldbp;
 520     else
 521         PL_bufptr = s;
 522     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 523     if (ckWARN_d(WARN_SYNTAX)) {
 524         if (is_first)
 525             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 526                     "\t(Missing semicolon on previous line?)\n");
 527         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 528             const char *t;
 529             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 530                 NOOP;
 531             if (t < PL_bufptr && isSPACE(*t))
 532                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 533                         "\t(Do you need to predeclare %.*s?)\n",
 534                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 535         }
 536         else {
 537             assert(s >= oldbp);
 538             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 539                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 540         }
 541     }
 542     PL_bufptr = oldbp;
 543 }
 544
 545 /*
 546  * S_missingterm
 547  * Complain about missing quote/regexp/heredoc terminator.
 548  * If it's called with NULL then it cauterizes the line buffer.
 549  * If we're in a delimited string and the delimiter is a control
 550  * character, it's reformatted into a two-char sequence like ^C.
 551  * This is fatal.
 552  */
 553
 554 STATIC void
 555 S_missingterm(pTHX_ char *s)
 556 {
 557     dVAR;
 558     char tmpbuf[3];
 559     char q;
 560     if (s) {
 561         char * const nl = strrchr(s,'\n');
 562         if (nl)
 563             *nl = '\0';
 564     }
 565     else if (isCNTRL(PL_multi_close)) {
 566         *tmpbuf = '^';
 567         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 568         tmpbuf[2] = '\0';
 569         s = tmpbuf;
 570     }
 571     else {
 572         *tmpbuf = (char)PL_multi_close;
 573         tmpbuf[1] = '\0';
 574         s = tmpbuf;
 575     }
 576     q = strchr(s,'"') ? '\'' : '"';
 577     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 578 }
 579
 580 #define FEATURE_IS_ENABLED(name)                                        \
 581         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 582             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 583 /* The longest string we pass in.  */
 584 #define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
 585
 586 /*
 587  * S_feature_is_enabled
 588  * Check whether the named feature is enabled.
 589  */
 590 STATIC bool
 591 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 592 {
 593     dVAR;
 594     HV * const hinthv = GvHV(PL_hintgv);
 595     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 596
 597     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 598
 599     assert(namelen <= MAX_FEATURE_LEN);
 600     memcpy(&he_name[8], name, namelen);
 601
 602     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 603 }
 604
 605 /*
 606  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 607  * utf16-to-utf8-reversed.
 608  */
 609
 610 #ifdef PERL_CR_FILTER
 611 static void
 612 strip_return(SV *sv)
 613 {
 614     register const char *s = SvPVX_const(sv);
 615     register const char * const e = s + SvCUR(sv);
 616
 617     PERL_ARGS_ASSERT_STRIP_RETURN;
 618
 619     /* outer loop optimized to do nothing if there are no CR-LFs */
 620     while (s < e) {
 621         if (*s++ == '\r' && *s == '\n') {
 622             /* hit a CR-LF, need to copy the rest */
 623             register char *d = s - 1;
 624             *d++ = *s++;
 625             while (s < e) {
 626                 if (*s == '\r' && s[1] == '\n')
 627                     s++;
 628                 *d++ = *s++;
 629             }
 630             SvCUR(sv) -= s - d;
 631             return;
 632         }
 633     }
 634 }
 635
 636 STATIC I32
 637 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 638 {
 639     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 640     if (count > 0 && !maxlen)
 641         strip_return(sv);
 642     return count;
 643 }
 644 #endif
 645
 646
 647
 648 /*
 649  * Perl_lex_start
 650  *
 651  * Create a parser object and initialise its parser and lexer fields
 652  *
 653  * rsfp       is the opened file handle to read from (if any),
 654  *
 655  * line       holds any initial content already read from the file (or in
 656  *            the case of no file, such as an eval, the whole contents);
 657  *
 658  * new_filter indicates that this is a new file and it shouldn't inherit
 659  *            the filters from the current parser (ie require).
 660  */
 661
 662 void
 663 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 664 {
 665     dVAR;
 666     const char *s = NULL;
 667     STRLEN len;
 668     yy_parser *parser, *oparser;
 669
 670     /* create and initialise a parser */
 671
 672     Newxz(parser, 1, yy_parser);
 673     parser->old_parser = oparser = PL_parser;
 674     PL_parser = parser;
 675
 676     parser->stack = NULL;
 677     parser->ps = NULL;
 678     parser->stack_size = 0;
 679
 680     /* on scope exit, free this parser and restore any outer one */
 681     SAVEPARSER(parser);
 682     parser->saved_curcop = PL_curcop;
 683
 684     /* initialise lexer state */
 685
 686 #ifdef PERL_MAD
 687     parser->curforce = -1;
 688 #else
 689     parser->nexttoke = 0;
 690 #endif
 691     parser->error_count = oparser ? oparser->error_count : 0;
 692     parser->copline = NOLINE;
 693     parser->lex_state = LEX_NORMAL;
 694     parser->expect = XSTATE;
 695     parser->rsfp = rsfp;
 696     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 697                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 698
 699     Newx(parser->lex_brackstack, 120, char);
 700     Newx(parser->lex_casestack, 12, char);
 701     *parser->lex_casestack = '\0';
 702
 703     if (line) {
 704         s = SvPV_const(line, len);
 705     } else {
 706         len = 0;
 707     }
 708
 709     if (!len) {
 710         parser->linestr = newSVpvs("\n;");
 711     } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
 712         /* avoid tie/overload weirdness */
 713         parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
 714         if (s[len-1] != ';')
 715             sv_catpvs(parser->linestr, "\n;");
 716     } else {
 717         SvTEMP_off(line);
 718         SvREFCNT_inc_simple_void_NN(line);
 719         parser->linestr = line;
 720     }
 721     parser->oldoldbufptr =
 722         parser->oldbufptr =
 723         parser->bufptr =
 724         parser->linestart = SvPVX(parser->linestr);
 725     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 726     parser->last_lop = parser->last_uni = NULL;
 727 }
 728
 729
 730 /* delete a parser object */
 731
 732 void
 733 Perl_parser_free(pTHX_  const yy_parser *parser)
 734 {
 735     PERL_ARGS_ASSERT_PARSER_FREE;
 736
 737     PL_curcop = parser->saved_curcop;
 738     SvREFCNT_dec(parser->linestr);
 739
 740     if (parser->rsfp == PerlIO_stdin())
 741         PerlIO_clearerr(parser->rsfp);
 742     else if (parser->rsfp && (!parser->old_parser ||
 743                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 744         PerlIO_close(parser->rsfp);
 745     SvREFCNT_dec(parser->rsfp_filters);
 746
 747     Safefree(parser->lex_brackstack);
 748     Safefree(parser->lex_casestack);
 749     PL_parser = parser->old_parser;
 750     Safefree(parser);
 751 }
 752
 753
 754 /*
 755  * Perl_lex_end
 756  * Finalizer for lexing operations.  Must be called when the parser is
 757  * done with the lexer.
 758  */
 759
 760 void
 761 Perl_lex_end(pTHX)
 762 {
 763     dVAR;
 764     PL_doextract = FALSE;
 765 }
 766
 767 /*
 768 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 769
 770 Buffer scalar containing the chunk currently under consideration of the
 771 text currently being lexed.  This is always a plain string scalar (for
 772 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 773 normal scalar means; instead refer to the buffer directly by the pointer
 774 variables described below.
 775
 776 The lexer maintains various C<char*> pointers to things in the
 777 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 778 reallocated, all of these pointers must be updated.  Don't attempt to
 779 do this manually, but rather use L</lex_grow_linestr> if you need to
 780 reallocate the buffer.
 781
 782 The content of the text chunk in the buffer is commonly exactly one
 783 complete line of input, up to and including a newline terminator,
 784 but there are situations where it is otherwise.  The octets of the
 785 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 786 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 787 flag on this scalar, which may disagree with it.
 788
 789 For direct examination of the buffer, the variable
 790 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 791 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 792 of these pointers is usually preferable to examination of the scalar
 793 through normal scalar means.
 794
 795 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 796
 797 Direct pointer to the end of the chunk of text currently being lexed, the
 798 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 799 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 800 always located at the end of the buffer, and does not count as part of
 801 the buffer's contents.
 802
 803 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 804
 805 Points to the current position of lexing inside the lexer buffer.
 806 Characters around this point may be freely examined, within
 807 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 808 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 809 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 810
 811 Lexing code (whether in the Perl core or not) moves this pointer past
 812 the characters that it consumes.  It is also expected to perform some
 813 bookkeeping whenever a newline character is consumed.  This movement
 814 can be more conveniently performed by the function L</lex_read_to>,
 815 which handles newlines appropriately.
 816
 817 Interpretation of the buffer's octets can be abstracted out by
 818 using the slightly higher-level functions L</lex_peek_unichar> and
 819 L</lex_read_unichar>.
 820
 821 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 822
 823 Points to the start of the current line inside the lexer buffer.
 824 This is useful for indicating at which column an error occurred, and
 825 not much else.  This must be updated by any lexing code that consumes
 826 a newline; the function L</lex_read_to> handles this detail.
 827
 828 =cut
 829 */
 830
 831 /*
 832 =for apidoc Amx|bool|lex_bufutf8
 833
 834 Indicates whether the octets in the lexer buffer
 835 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 836 of Unicode characters.  If not, they should be interpreted as Latin-1
 837 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 838
 839 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 840 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 841 encoding.
 842
 843 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 844 is significant, but not the whole story regarding the input character
 845 encoding.  Normally, when a file is being read, the scalar contains octets
 846 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 847 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 848 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 849 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 850 is in effect.  This logic may change in the future; use this function
 851 instead of implementing the logic yourself.
 852
 853 =cut
 854 */
 855
 856 bool
 857 Perl_lex_bufutf8(pTHX)
 858 {
 859     return UTF;
 860 }
 861
 862 /*
 863 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 864
 865 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 866 at least I<len> octets (including terminating NUL).  Returns a
 867 pointer to the reallocated buffer.  This is necessary before making
 868 any direct modification of the buffer that would increase its length.
 869 L</lex_stuff_pvn> provides a more convenient way to insert text into
 870 the buffer.
 871
 872 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 873 this function updates all of the lexer's variables that point directly
 874 into the buffer.
 875
 876 =cut
 877 */
 878
 879 char *
 880 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 881 {
 882     SV *linestr;
 883     char *buf;
 884     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 885     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 886     linestr = PL_parser->linestr;
 887     buf = SvPVX(linestr);
 888     if (len <= SvLEN(linestr))
 889         return buf;
 890     bufend_pos = PL_parser->bufend - buf;
 891     bufptr_pos = PL_parser->bufptr - buf;
 892     oldbufptr_pos = PL_parser->oldbufptr - buf;
 893     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 894     linestart_pos = PL_parser->linestart - buf;
 895     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 896     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 897     buf = sv_grow(linestr, len);
 898     PL_parser->bufend = buf + bufend_pos;
 899     PL_parser->bufptr = buf + bufptr_pos;
 900     PL_parser->oldbufptr = buf + oldbufptr_pos;
 901     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 902     PL_parser->linestart = buf + linestart_pos;
 903     if (PL_parser->last_uni)
 904         PL_parser->last_uni = buf + last_uni_pos;
 905     if (PL_parser->last_lop)
 906         PL_parser->last_lop = buf + last_lop_pos;
 907     return buf;
 908 }
 909
 910 /*
 911 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 912
 913 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 914 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 915 reallocating the buffer if necessary.  This means that lexing code that
 916 runs later will see the characters as if they had appeared in the input.
 917 It is not recommended to do this as part of normal parsing, and most
 918 uses of this facility run the risk of the inserted characters being
 919 interpreted in an unintended manner.
 920
 921 The string to be inserted is represented by I<len> octets starting
 922 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 923 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 924 The characters are recoded for the lexer buffer, according to how the
 925 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 926 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 927 function is more convenient.
 928
 929 =cut
 930 */
 931
 932 void
 933 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 934 {
 935     dVAR;
 936     char *bufptr;
 937     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 938     if (flags & ~(LEX_STUFF_UTF8))
 939         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 940     if (UTF) {
 941         if (flags & LEX_STUFF_UTF8) {
 942             goto plain_copy;
 943         } else {
 944             STRLEN highhalf = 0;
 945             const char *p, *e = pv+len;
 946             for (p = pv; p != e; p++)
 947                 highhalf += !!(((U8)*p) & 0x80);
 948             if (!highhalf)
 949                 goto plain_copy;
 950             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 951             bufptr = PL_parser->bufptr;
 952             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 953             SvCUR_set(PL_parser->linestr,
 954                 SvCUR(PL_parser->linestr) + len+highhalf);
 955             PL_parser->bufend += len+highhalf;
 956             for (p = pv; p != e; p++) {
 957                 U8 c = (U8)*p;
 958                 if (c & 0x80) {
 959                     *bufptr++ = (char)(0xc0 | (c >> 6));
 960                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 961                 } else {
 962                     *bufptr++ = (char)c;
 963                 }
 964             }
 965         }
 966     } else {
 967         if (flags & LEX_STUFF_UTF8) {
 968             STRLEN highhalf = 0;
 969             const char *p, *e = pv+len;
 970             for (p = pv; p != e; p++) {
 971                 U8 c = (U8)*p;
 972                 if (c >= 0xc4) {
 973                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 974                                 "non-Latin-1 character into Latin-1 input");
 975                 } else if (c >= 0xc2 && p+1 != e &&
 976                             (((U8)p[1]) & 0xc0) == 0x80) {
 977                     p++;
 978                     highhalf++;
 979                 } else if (c >= 0x80) {
 980                     /* malformed UTF-8 */
 981                     ENTER;
 982                     SAVESPTR(PL_warnhook);
 983                     PL_warnhook = PERL_WARNHOOK_FATAL;
 984                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 985                     LEAVE;
 986                 }
 987             }
 988             if (!highhalf)
 989                 goto plain_copy;
 990             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 991             bufptr = PL_parser->bufptr;
 992             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 993             SvCUR_set(PL_parser->linestr,
 994                 SvCUR(PL_parser->linestr) + len-highhalf);
 995             PL_parser->bufend += len-highhalf;
 996             for (p = pv; p != e; p++) {
 997                 U8 c = (U8)*p;
 998                 if (c & 0x80) {
 999                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1000                     p++;
1001                 } else {
1002                     *bufptr++ = (char)c;
1003                 }
1004             }
1005         } else {
1006             plain_copy:
1007             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1008             bufptr = PL_parser->bufptr;
1009             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1010             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1011             PL_parser->bufend += len;
1012             Copy(pv, bufptr, len, char);
1013         }
1014     }
1015 }
1016
1017 /*
1018 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1019
1020 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022 reallocating the buffer if necessary.  This means that lexing code that
1023 runs later will see the characters as if they had appeared in the input.
1024 It is not recommended to do this as part of normal parsing, and most
1025 uses of this facility run the risk of the inserted characters being
1026 interpreted in an unintended manner.
1027
1028 The string to be inserted is represented by octets starting at I<pv>
1029 and continuing to the first nul.  These octets are interpreted as either
1030 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1031 in I<flags>.  The characters are recoded for the lexer buffer, according
1032 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1033 If it is not convenient to nul-terminate a string to be inserted, the
1034 L</lex_stuff_pvn> function is more appropriate.
1035
1036 =cut
1037 */
1038
1039 void
1040 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1041 {
1042     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1043     lex_stuff_pvn(pv, strlen(pv), flags);
1044 }
1045
1046 /*
1047 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1048
1049 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1050 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1051 reallocating the buffer if necessary.  This means that lexing code that
1052 runs later will see the characters as if they had appeared in the input.
1053 It is not recommended to do this as part of normal parsing, and most
1054 uses of this facility run the risk of the inserted characters being
1055 interpreted in an unintended manner.
1056
1057 The string to be inserted is the string value of I<sv>.  The characters
1058 are recoded for the lexer buffer, according to how the buffer is currently
1059 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1060 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1061 need to construct a scalar.
1062
1063 =cut
1064 */
1065
1066 void
1067 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1068 {
1069     char *pv;
1070     STRLEN len;
1071     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1072     if (flags)
1073         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1074     pv = SvPV(sv, len);
1075     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1076 }
1077
1078 /*
1079 =for apidoc Amx|void|lex_unstuff|char *ptr
1080
1081 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1082 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1083 This hides the discarded text from any lexing code that runs later,
1084 as if the text had never appeared.
1085
1086 This is not the normal way to consume lexed text.  For that, use
1087 L</lex_read_to>.
1088
1089 =cut
1090 */
1091
1092 void
1093 Perl_lex_unstuff(pTHX_ char *ptr)
1094 {
1095     char *buf, *bufend;
1096     STRLEN unstuff_len;
1097     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1098     buf = PL_parser->bufptr;
1099     if (ptr < buf)
1100         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1101     if (ptr == buf)
1102         return;
1103     bufend = PL_parser->bufend;
1104     if (ptr > bufend)
1105         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1106     unstuff_len = ptr - buf;
1107     Move(ptr, buf, bufend+1-ptr, char);
1108     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1109     PL_parser->bufend = bufend - unstuff_len;
1110 }
1111
1112 /*
1113 =for apidoc Amx|void|lex_read_to|char *ptr
1114
1115 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1116 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1117 performing the correct bookkeeping whenever a newline character is passed.
1118 This is the normal way to consume lexed text.
1119
1120 Interpretation of the buffer's octets can be abstracted out by
1121 using the slightly higher-level functions L</lex_peek_unichar> and
1122 L</lex_read_unichar>.
1123
1124 =cut
1125 */
1126
1127 void
1128 Perl_lex_read_to(pTHX_ char *ptr)
1129 {
1130     char *s;
1131     PERL_ARGS_ASSERT_LEX_READ_TO;
1132     s = PL_parser->bufptr;
1133     if (ptr < s || ptr > PL_parser->bufend)
1134         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1135     for (; s != ptr; s++)
1136         if (*s == '\n') {
1137             CopLINE_inc(PL_curcop);
1138             PL_parser->linestart = s+1;
1139         }
1140     PL_parser->bufptr = ptr;
1141 }
1142
1143 /*
1144 =for apidoc Amx|void|lex_discard_to|char *ptr
1145
1146 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1147 up to I<ptr>.  The remaining content of the buffer will be moved, and
1148 all pointers into the buffer updated appropriately.  I<ptr> must not
1149 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1150 it is not permitted to discard text that has yet to be lexed.
1151
1152 Normally it is not necessarily to do this directly, because it suffices to
1153 use the implicit discarding behaviour of L</lex_next_chunk> and things
1154 based on it.  However, if a token stretches across multiple lines,
1155 and the lexing code has kept multiple lines of text in the buffer for
1156 that purpose, then after completion of the token it would be wise to
1157 explicitly discard the now-unneeded earlier lines, to avoid future
1158 multi-line tokens growing the buffer without bound.
1159
1160 =cut
1161 */
1162
1163 void
1164 Perl_lex_discard_to(pTHX_ char *ptr)
1165 {
1166     char *buf;
1167     STRLEN discard_len;
1168     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1169     buf = SvPVX(PL_parser->linestr);
1170     if (ptr < buf)
1171         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1172     if (ptr == buf)
1173         return;
1174     if (ptr > PL_parser->bufptr)
1175         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1176     discard_len = ptr - buf;
1177     if (PL_parser->oldbufptr < ptr)
1178         PL_parser->oldbufptr = ptr;
1179     if (PL_parser->oldoldbufptr < ptr)
1180         PL_parser->oldoldbufptr = ptr;
1181     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1182         PL_parser->last_uni = NULL;
1183     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1184         PL_parser->last_lop = NULL;
1185     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1186     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1187     PL_parser->bufend -= discard_len;
1188     PL_parser->bufptr -= discard_len;
1189     PL_parser->oldbufptr -= discard_len;
1190     PL_parser->oldoldbufptr -= discard_len;
1191     if (PL_parser->last_uni)
1192         PL_parser->last_uni -= discard_len;
1193     if (PL_parser->last_lop)
1194         PL_parser->last_lop -= discard_len;
1195 }
1196
1197 /*
1198 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1199
1200 Reads in the next chunk of text to be lexed, appending it to
1201 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1202 looked to the end of the current chunk and wants to know more.  It is
1203 usual, but not necessary, for lexing to have consumed the entirety of
1204 the current chunk at this time.
1205
1206 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1207 chunk (i.e., the current chunk has been entirely consumed), normally the
1208 current chunk will be discarded at the same time that the new chunk is
1209 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1210 will not be discarded.  If the current chunk has not been entirely
1211 consumed, then it will not be discarded regardless of the flag.
1212
1213 Returns true if some new text was added to the buffer, or false if the
1214 buffer has reached the end of the input text.
1215
1216 =cut
1217 */
1218
1219 #define LEX_FAKE_EOF 0x80000000
1220
1221 bool
1222 Perl_lex_next_chunk(pTHX_ U32 flags)
1223 {
1224     SV *linestr;
1225     char *buf;
1226     STRLEN old_bufend_pos, new_bufend_pos;
1227     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1228     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1229     bool got_some_for_debugger = 0;
1230     bool got_some;
1231     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1232         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1233     linestr = PL_parser->linestr;
1234     buf = SvPVX(linestr);
1235     if (!(flags & LEX_KEEP_PREVIOUS) &&
1236             PL_parser->bufptr == PL_parser->bufend) {
1237         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1238         linestart_pos = 0;
1239         if (PL_parser->last_uni != PL_parser->bufend)
1240             PL_parser->last_uni = NULL;
1241         if (PL_parser->last_lop != PL_parser->bufend)
1242             PL_parser->last_lop = NULL;
1243         last_uni_pos = last_lop_pos = 0;
1244         *buf = 0;
1245         SvCUR(linestr) = 0;
1246     } else {
1247         old_bufend_pos = PL_parser->bufend - buf;
1248         bufptr_pos = PL_parser->bufptr - buf;
1249         oldbufptr_pos = PL_parser->oldbufptr - buf;
1250         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1251         linestart_pos = PL_parser->linestart - buf;
1252         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1253         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1254     }
1255     if (flags & LEX_FAKE_EOF) {
1256         goto eof;
1257     } else if (!PL_parser->rsfp) {
1258         got_some = 0;
1259     } else if (filter_gets(linestr, old_bufend_pos)) {
1260         got_some = 1;
1261         got_some_for_debugger = 1;
1262     } else {
1263         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1264             sv_setpvs(linestr, "");
1265         eof:
1266         /* End of real input.  Close filehandle (unless it was STDIN),
1267          * then add implicit termination.
1268          */
1269         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1270             PerlIO_clearerr(PL_parser->rsfp);
1271         else if (PL_parser->rsfp)
1272             (void)PerlIO_close(PL_parser->rsfp);
1273         PL_parser->rsfp = NULL;
1274         PL_doextract = FALSE;
1275 #ifdef PERL_MAD
1276         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1277             PL_faketokens = 1;
1278 #endif
1279         if (!PL_in_eval && PL_minus_p) {
1280             sv_catpvs(linestr,
1281                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1282             PL_minus_n = PL_minus_p = 0;
1283         } else if (!PL_in_eval && PL_minus_n) {
1284             sv_catpvs(linestr, /*{*/";}");
1285             PL_minus_n = 0;
1286         } else
1287             sv_catpvs(linestr, ";");
1288         got_some = 1;
1289     }
1290     buf = SvPVX(linestr);
1291     new_bufend_pos = SvCUR(linestr);
1292     PL_parser->bufend = buf + new_bufend_pos;
1293     PL_parser->bufptr = buf + bufptr_pos;
1294     PL_parser->oldbufptr = buf + oldbufptr_pos;
1295     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1296     PL_parser->linestart = buf + linestart_pos;
1297     if (PL_parser->last_uni)
1298         PL_parser->last_uni = buf + last_uni_pos;
1299     if (PL_parser->last_lop)
1300         PL_parser->last_lop = buf + last_lop_pos;
1301     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1302             PL_curstash != PL_debstash) {
1303         /* debugger active and we're not compiling the debugger code,
1304          * so store the line into the debugger's array of lines
1305          */
1306         update_debugger_info(NULL, buf+old_bufend_pos,
1307             new_bufend_pos-old_bufend_pos);
1308     }
1309     return got_some;
1310 }
1311
1312 /*
1313 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1314
1315 Looks ahead one (Unicode) character in the text currently being lexed.
1316 Returns the codepoint (unsigned integer value) of the next character,
1317 or -1 if lexing has reached the end of the input text.  To consume the
1318 peeked character, use L</lex_read_unichar>.
1319
1320 If the next character is in (or extends into) the next chunk of input
1321 text, the next chunk will be read in.  Normally the current chunk will be
1322 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1323 then the current chunk will not be discarded.
1324
1325 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1326 is encountered, an exception is generated.
1327
1328 =cut
1329 */
1330
1331 I32
1332 Perl_lex_peek_unichar(pTHX_ U32 flags)
1333 {
1334     dVAR;
1335     char *s, *bufend;
1336     if (flags & ~(LEX_KEEP_PREVIOUS))
1337         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1338     s = PL_parser->bufptr;
1339     bufend = PL_parser->bufend;
1340     if (UTF) {
1341         U8 head;
1342         I32 unichar;
1343         STRLEN len, retlen;
1344         if (s == bufend) {
1345             if (!lex_next_chunk(flags))
1346                 return -1;
1347             s = PL_parser->bufptr;
1348             bufend = PL_parser->bufend;
1349         }
1350         head = (U8)*s;
1351         if (!(head & 0x80))
1352             return head;
1353         if (head & 0x40) {
1354             len = PL_utf8skip[head];
1355             while ((STRLEN)(bufend-s) < len) {
1356                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1357                     break;
1358                 s = PL_parser->bufptr;
1359                 bufend = PL_parser->bufend;
1360             }
1361         }
1362         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1363         if (retlen == (STRLEN)-1) {
1364             /* malformed UTF-8 */
1365             ENTER;
1366             SAVESPTR(PL_warnhook);
1367             PL_warnhook = PERL_WARNHOOK_FATAL;
1368             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1369             LEAVE;
1370         }
1371         return unichar;
1372     } else {
1373         if (s == bufend) {
1374             if (!lex_next_chunk(flags))
1375                 return -1;
1376             s = PL_parser->bufptr;
1377         }
1378         return (U8)*s;
1379     }
1380 }
1381
1382 /*
1383 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1384
1385 Reads the next (Unicode) character in the text currently being lexed.
1386 Returns the codepoint (unsigned integer value) of the character read,
1387 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1388 if lexing has reached the end of the input text.  To non-destructively
1389 examine the next character, use L</lex_peek_unichar> instead.
1390
1391 If the next character is in (or extends into) the next chunk of input
1392 text, the next chunk will be read in.  Normally the current chunk will be
1393 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1394 then the current chunk will not be discarded.
1395
1396 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1397 is encountered, an exception is generated.
1398
1399 =cut
1400 */
1401
1402 I32
1403 Perl_lex_read_unichar(pTHX_ U32 flags)
1404 {
1405     I32 c;
1406     if (flags & ~(LEX_KEEP_PREVIOUS))
1407         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1408     c = lex_peek_unichar(flags);
1409     if (c != -1) {
1410         if (c == '\n')
1411             CopLINE_inc(PL_curcop);
1412         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1413     }
1414     return c;
1415 }
1416
1417 /*
1418 =for apidoc Amx|void|lex_read_space|U32 flags
1419
1420 Reads optional spaces, in Perl style, in the text currently being
1421 lexed.  The spaces may include ordinary whitespace characters and
1422 Perl-style comments.  C<#line> directives are processed if encountered.
1423 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1424 at a non-space character (or the end of the input text).
1425
1426 If spaces extend into the next chunk of input text, the next chunk will
1427 be read in.  Normally the current chunk will be discarded at the same
1428 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1429 chunk will not be discarded.
1430
1431 =cut
1432 */
1433
1434 #define LEX_NO_NEXT_CHUNK 0x80000000
1435
1436 void
1437 Perl_lex_read_space(pTHX_ U32 flags)
1438 {
1439     char *s, *bufend;
1440     bool need_incline = 0;
1441     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1442         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1443 #ifdef PERL_MAD
1444     if (PL_skipwhite) {
1445         sv_free(PL_skipwhite);
1446         PL_skipwhite = NULL;
1447     }
1448     if (PL_madskills)
1449         PL_skipwhite = newSVpvs("");
1450 #endif /* PERL_MAD */
1451     s = PL_parser->bufptr;
1452     bufend = PL_parser->bufend;
1453     while (1) {
1454         char c = *s;
1455         if (c == '#') {
1456             do {
1457                 c = *++s;
1458             } while (!(c == '\n' || (c == 0 && s == bufend)));
1459         } else if (c == '\n') {
1460             s++;
1461             PL_parser->linestart = s;
1462             if (s == bufend)
1463                 need_incline = 1;
1464             else
1465                 incline(s);
1466         } else if (isSPACE(c)) {
1467             s++;
1468         } else if (c == 0 && s == bufend) {
1469             bool got_more;
1470 #ifdef PERL_MAD
1471             if (PL_madskills)
1472                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1473 #endif /* PERL_MAD */
1474             if (flags & LEX_NO_NEXT_CHUNK)
1475                 break;
1476             PL_parser->bufptr = s;
1477             CopLINE_inc(PL_curcop);
1478             got_more = lex_next_chunk(flags);
1479             CopLINE_dec(PL_curcop);
1480             s = PL_parser->bufptr;
1481             bufend = PL_parser->bufend;
1482             if (!got_more)
1483                 break;
1484             if (need_incline && PL_parser->rsfp) {
1485                 incline(s);
1486                 need_incline = 0;
1487             }
1488         } else {
1489             break;
1490         }
1491     }
1492 #ifdef PERL_MAD
1493     if (PL_madskills)
1494         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1495 #endif /* PERL_MAD */
1496     PL_parser->bufptr = s;
1497 }
1498
1499 /*
1500  * S_incline
1501  * This subroutine has nothing to do with tilting, whether at windmills
1502  * or pinball tables.  Its name is short for "increment line".  It
1503  * increments the current line number in CopLINE(PL_curcop) and checks
1504  * to see whether the line starts with a comment of the form
1505  *    # line 500 "foo.pm"
1506  * If so, it sets the current line number and file to the values in the comment.
1507  */
1508
1509 STATIC void
1510 S_incline(pTHX_ const char *s)
1511 {
1512     dVAR;
1513     const char *t;
1514     const char *n;
1515     const char *e;
1516
1517     PERL_ARGS_ASSERT_INCLINE;
1518
1519     CopLINE_inc(PL_curcop);
1520     if (*s++ != '#')
1521         return;
1522     while (SPACE_OR_TAB(*s))
1523         s++;
1524     if (strnEQ(s, "line", 4))
1525         s += 4;
1526     else
1527         return;
1528     if (SPACE_OR_TAB(*s))
1529         s++;
1530     else
1531         return;
1532     while (SPACE_OR_TAB(*s))
1533         s++;
1534     if (!isDIGIT(*s))
1535         return;
1536
1537     n = s;
1538     while (isDIGIT(*s))
1539         s++;
1540     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1541         return;
1542     while (SPACE_OR_TAB(*s))
1543         s++;
1544     if (*s == '"' && (t = strchr(s+1, '"'))) {
1545         s++;
1546         e = t + 1;
1547     }
1548     else {
1549         t = s;
1550         while (!isSPACE(*t))
1551             t++;
1552         e = t;
1553     }
1554     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1555         e++;
1556     if (*e != '\n' && *e != '\0')
1557         return;         /* false alarm */
1558
1559     if (t - s > 0) {
1560         const STRLEN len = t - s;
1561 #ifndef USE_ITHREADS
1562         SV *const temp_sv = CopFILESV(PL_curcop);
1563         const char *cf;
1564         STRLEN tmplen;
1565
1566         if (temp_sv) {
1567             cf = SvPVX(temp_sv);
1568             tmplen = SvCUR(temp_sv);
1569         } else {
1570             cf = NULL;
1571             tmplen = 0;
1572         }
1573
1574         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1575             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1576              * to *{"::_<newfilename"} */
1577             /* However, the long form of evals is only turned on by the
1578                debugger - usually they're "(eval %lu)" */
1579             char smallbuf[128];
1580             char *tmpbuf;
1581             GV **gvp;
1582             STRLEN tmplen2 = len;
1583             if (tmplen + 2 <= sizeof smallbuf)
1584                 tmpbuf = smallbuf;
1585             else
1586                 Newx(tmpbuf, tmplen + 2, char);
1587             tmpbuf[0] = '_';
1588             tmpbuf[1] = '<';
1589             memcpy(tmpbuf + 2, cf, tmplen);
1590             tmplen += 2;
1591             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1592             if (gvp) {
1593                 char *tmpbuf2;
1594                 GV *gv2;
1595
1596                 if (tmplen2 + 2 <= sizeof smallbuf)
1597                     tmpbuf2 = smallbuf;
1598                 else
1599                     Newx(tmpbuf2, tmplen2 + 2, char);
1600
1601                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1602                     /* Either they malloc'd it, or we malloc'd it,
1603                        so no prefix is present in ours.  */
1604                     tmpbuf2[0] = '_';
1605                     tmpbuf2[1] = '<';
1606                 }
1607
1608                 memcpy(tmpbuf2 + 2, s, tmplen2);
1609                 tmplen2 += 2;
1610
1611                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1612                 if (!isGV(gv2)) {
1613                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1614                     /* adjust ${"::_<newfilename"} to store the new file name */
1615                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1616                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1617                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1618                 }
1619
1620                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1621             }
1622             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1623         }
1624 #endif
1625         CopFILE_free(PL_curcop);
1626         CopFILE_setn(PL_curcop, s, len);
1627     }
1628     CopLINE_set(PL_curcop, atoi(n)-1);
1629 }
1630
1631 #ifdef PERL_MAD
1632 /* skip space before PL_thistoken */
1633
1634 STATIC char *
1635 S_skipspace0(pTHX_ register char *s)
1636 {
1637     PERL_ARGS_ASSERT_SKIPSPACE0;
1638
1639     s = skipspace(s);
1640     if (!PL_madskills)
1641         return s;
1642     if (PL_skipwhite) {
1643         if (!PL_thiswhite)
1644             PL_thiswhite = newSVpvs("");
1645         sv_catsv(PL_thiswhite, PL_skipwhite);
1646         sv_free(PL_skipwhite);
1647         PL_skipwhite = 0;
1648     }
1649     PL_realtokenstart = s - SvPVX(PL_linestr);
1650     return s;
1651 }
1652
1653 /* skip space after PL_thistoken */
1654
1655 STATIC char *
1656 S_skipspace1(pTHX_ register char *s)
1657 {
1658     const char *start = s;
1659     I32 startoff = start - SvPVX(PL_linestr);
1660
1661     PERL_ARGS_ASSERT_SKIPSPACE1;
1662
1663     s = skipspace(s);
1664     if (!PL_madskills)
1665         return s;
1666     start = SvPVX(PL_linestr) + startoff;
1667     if (!PL_thistoken && PL_realtokenstart >= 0) {
1668         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1669         PL_thistoken = newSVpvn(tstart, start - tstart);
1670     }
1671     PL_realtokenstart = -1;
1672     if (PL_skipwhite) {
1673         if (!PL_nextwhite)
1674             PL_nextwhite = newSVpvs("");
1675         sv_catsv(PL_nextwhite, PL_skipwhite);
1676         sv_free(PL_skipwhite);
1677         PL_skipwhite = 0;
1678     }
1679     return s;
1680 }
1681
1682 STATIC char *
1683 S_skipspace2(pTHX_ register char *s, SV **svp)
1684 {
1685     char *start;
1686     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1687     const I32 startoff = s - SvPVX(PL_linestr);
1688
1689     PERL_ARGS_ASSERT_SKIPSPACE2;
1690
1691     s = skipspace(s);
1692     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1693     if (!PL_madskills || !svp)
1694         return s;
1695     start = SvPVX(PL_linestr) + startoff;
1696     if (!PL_thistoken && PL_realtokenstart >= 0) {
1697         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1698         PL_thistoken = newSVpvn(tstart, start - tstart);
1699         PL_realtokenstart = -1;
1700     }
1701     if (PL_skipwhite) {
1702         if (!*svp)
1703             *svp = newSVpvs("");
1704         sv_setsv(*svp, PL_skipwhite);
1705         sv_free(PL_skipwhite);
1706         PL_skipwhite = 0;
1707     }
1708
1709     return s;
1710 }
1711 #endif
1712
1713 STATIC void
1714 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1715 {
1716     AV *av = CopFILEAVx(PL_curcop);
1717     if (av) {
1718         SV * const sv = newSV_type(SVt_PVMG);
1719         if (orig_sv)
1720             sv_setsv(sv, orig_sv);
1721         else
1722             sv_setpvn(sv, buf, len);
1723         (void)SvIOK_on(sv);
1724         SvIV_set(sv, 0);
1725         av_store(av, (I32)CopLINE(PL_curcop), sv);
1726     }
1727 }
1728
1729 /*
1730  * S_skipspace
1731  * Called to gobble the appropriate amount and type of whitespace.
1732  * Skips comments as well.
1733  */
1734
1735 STATIC char *
1736 S_skipspace(pTHX_ register char *s)
1737 {
1738 #ifdef PERL_MAD
1739     char *start = s;
1740 #endif /* PERL_MAD */
1741     PERL_ARGS_ASSERT_SKIPSPACE;
1742 #ifdef PERL_MAD
1743     if (PL_skipwhite) {
1744         sv_free(PL_skipwhite);
1745         PL_skipwhite = NULL;
1746     }
1747 #endif /* PERL_MAD */
1748     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1749         while (s < PL_bufend && SPACE_OR_TAB(*s))
1750             s++;
1751     } else {
1752         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1753         PL_bufptr = s;
1754         lex_read_space(LEX_KEEP_PREVIOUS |
1755                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1756                     LEX_NO_NEXT_CHUNK : 0));
1757         s = PL_bufptr;
1758         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1759         if (PL_linestart > PL_bufptr)
1760             PL_bufptr = PL_linestart;
1761         return s;
1762     }
1763 #ifdef PERL_MAD
1764     if (PL_madskills)
1765         PL_skipwhite = newSVpvn(start, s-start);
1766 #endif /* PERL_MAD */
1767     return s;
1768 }
1769
1770 /*
1771  * S_check_uni
1772  * Check the unary operators to ensure there's no ambiguity in how they're
1773  * used.  An ambiguous piece of code would be:
1774  *     rand + 5
1775  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1776  * the +5 is its argument.
1777  */
1778
1779 STATIC void
1780 S_check_uni(pTHX)
1781 {
1782     dVAR;
1783     const char *s;
1784     const char *t;
1785
1786     if (PL_oldoldbufptr != PL_last_uni)
1787         return;
1788     while (isSPACE(*PL_last_uni))
1789         PL_last_uni++;
1790     s = PL_last_uni;
1791     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1792         s++;
1793     if ((t = strchr(s, '(')) && t < PL_bufptr)
1794         return;
1795
1796     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1797                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1798                      (int)(s - PL_last_uni), PL_last_uni);
1799 }
1800
1801 /*
1802  * LOP : macro to build a list operator.  Its behaviour has been replaced
1803  * with a subroutine, S_lop() for which LOP is just another name.
1804  */
1805
1806 #define LOP(f,x) return lop(f,x,s)
1807
1808 /*
1809  * S_lop
1810  * Build a list operator (or something that might be one).  The rules:
1811  *  - if we have a next token, then it's a list operator [why?]
1812  *  - if the next thing is an opening paren, then it's a function
1813  *  - else it's a list operator
1814  */
1815
1816 STATIC I32
1817 S_lop(pTHX_ I32 f, int x, char *s)
1818 {
1819     dVAR;
1820
1821     PERL_ARGS_ASSERT_LOP;
1822
1823     pl_yylval.ival = f;
1824     CLINE;
1825     PL_expect = x;
1826     PL_bufptr = s;
1827     PL_last_lop = PL_oldbufptr;
1828     PL_last_lop_op = (OPCODE)f;
1829 #ifdef PERL_MAD
1830     if (PL_lasttoke)
1831         return REPORT(LSTOP);
1832 #else
1833     if (PL_nexttoke)
1834         return REPORT(LSTOP);
1835 #endif
1836     if (*s == '(')
1837         return REPORT(FUNC);
1838     s = PEEKSPACE(s);
1839     if (*s == '(')
1840         return REPORT(FUNC);
1841     else
1842         return REPORT(LSTOP);
1843 }
1844
1845 #ifdef PERL_MAD
1846  /*
1847  * S_start_force
1848  * Sets up for an eventual force_next().  start_force(0) basically does
1849  * an unshift, while start_force(-1) does a push.  yylex removes items
1850  * on the "pop" end.
1851  */
1852
1853 STATIC void
1854 S_start_force(pTHX_ int where)
1855 {
1856     int i;
1857
1858     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1859         where = PL_lasttoke;
1860     assert(PL_curforce < 0 || PL_curforce == where);
1861     if (PL_curforce != where) {
1862         for (i = PL_lasttoke; i > where; --i) {
1863             PL_nexttoke[i] = PL_nexttoke[i-1];
1864         }
1865         PL_lasttoke++;
1866     }
1867     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1868         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1869     PL_curforce = where;
1870     if (PL_nextwhite) {
1871         if (PL_madskills)
1872             curmad('^', newSVpvs(""));
1873         CURMAD('_', PL_nextwhite);
1874     }
1875 }
1876
1877 STATIC void
1878 S_curmad(pTHX_ char slot, SV *sv)
1879 {
1880     MADPROP **where;
1881
1882     if (!sv)
1883         return;
1884     if (PL_curforce < 0)
1885         where = &PL_thismad;
1886     else
1887         where = &PL_nexttoke[PL_curforce].next_mad;
1888
1889     if (PL_faketokens)
1890         sv_setpvs(sv, "");
1891     else {
1892         if (!IN_BYTES) {
1893             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1894                 SvUTF8_on(sv);
1895             else if (PL_encoding) {
1896                 sv_recode_to_utf8(sv, PL_encoding);
1897             }
1898         }
1899     }
1900
1901     /* keep a slot open for the head of the list? */
1902     if (slot != '_' && *where && (*where)->mad_key == '^') {
1903         (*where)->mad_key = slot;
1904         sv_free(MUTABLE_SV(((*where)->mad_val)));
1905         (*where)->mad_val = (void*)sv;
1906     }
1907     else
1908         addmad(newMADsv(slot, sv), where, 0);
1909 }
1910 #else
1911 #  define start_force(where)    NOOP
1912 #  define curmad(slot, sv)      NOOP
1913 #endif
1914
1915 /*
1916  * S_force_next
1917  * When the lexer realizes it knows the next token (for instance,
1918  * it is reordering tokens for the parser) then it can call S_force_next
1919  * to know what token to return the next time the lexer is called.  Caller
1920  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1921  * and possibly PL_expect to ensure the lexer handles the token correctly.
1922  */
1923
1924 STATIC void
1925 S_force_next(pTHX_ I32 type)
1926 {
1927     dVAR;
1928 #ifdef DEBUGGING
1929     if (DEBUG_T_TEST) {
1930         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1931         tokereport(type, &NEXTVAL_NEXTTOKE);
1932     }
1933 #endif
1934 #ifdef PERL_MAD
1935     if (PL_curforce < 0)
1936         start_force(PL_lasttoke);
1937     PL_nexttoke[PL_curforce].next_type = type;
1938     if (PL_lex_state != LEX_KNOWNEXT)
1939         PL_lex_defer = PL_lex_state;
1940     PL_lex_state = LEX_KNOWNEXT;
1941     PL_lex_expect = PL_expect;
1942     PL_curforce = -1;
1943 #else
1944     PL_nexttype[PL_nexttoke] = type;
1945     PL_nexttoke++;
1946     if (PL_lex_state != LEX_KNOWNEXT) {
1947         PL_lex_defer = PL_lex_state;
1948         PL_lex_expect = PL_expect;
1949         PL_lex_state = LEX_KNOWNEXT;
1950     }
1951 #endif
1952 }
1953
1954 void
1955 Perl_yyunlex(pTHX)
1956 {
1957     if (PL_parser->yychar != YYEMPTY) {
1958         start_force(-1);
1959         NEXTVAL_NEXTTOKE = PL_parser->yylval;
1960         force_next(PL_parser->yychar);
1961         PL_parser->yychar = YYEMPTY;
1962     }
1963 }
1964
1965 STATIC SV *
1966 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1967 {
1968     dVAR;
1969     SV * const sv = newSVpvn_utf8(start, len,
1970                                   !IN_BYTES
1971                                   && UTF
1972                                   && !is_ascii_string((const U8*)start, len)
1973                                   && is_utf8_string((const U8*)start, len));
1974     return sv;
1975 }
1976
1977 /*
1978  * S_force_word
1979  * When the lexer knows the next thing is a word (for instance, it has
1980  * just seen -> and it knows that the next char is a word char, then
1981  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1982  * lookahead.
1983  *
1984  * Arguments:
1985  *   char *start : buffer position (must be within PL_linestr)
1986  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1987  *   int check_keyword : if true, Perl checks to make sure the word isn't
1988  *       a keyword (do this if the word is a label, e.g. goto FOO)
1989  *   int allow_pack : if true, : characters will also be allowed (require,
1990  *       use, etc. do this)
1991  *   int allow_initial_tick : used by the "sub" lexer only.
1992  */
1993
1994 STATIC char *
1995 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1996 {
1997     dVAR;
1998     register char *s;
1999     STRLEN len;
2000
2001     PERL_ARGS_ASSERT_FORCE_WORD;
2002
2003     start = SKIPSPACE1(start);
2004     s = start;
2005     if (isIDFIRST_lazy_if(s,UTF) ||
2006         (allow_pack && *s == ':') ||
2007         (allow_initial_tick && *s == '\'') )
2008     {
2009         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2010         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2011             return start;
2012         start_force(PL_curforce);
2013         if (PL_madskills)
2014             curmad('X', newSVpvn(start,s-start));
2015         if (token == METHOD) {
2016             s = SKIPSPACE1(s);
2017             if (*s == '(')
2018                 PL_expect = XTERM;
2019             else {
2020                 PL_expect = XOPERATOR;
2021             }
2022         }
2023         if (PL_madskills)
2024             curmad('g', newSVpvs( "forced" ));
2025         NEXTVAL_NEXTTOKE.opval
2026             = (OP*)newSVOP(OP_CONST,0,
2027                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2028         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2029         force_next(token);
2030     }
2031     return s;
2032 }
2033
2034 /*
2035  * S_force_ident
2036  * Called when the lexer wants $foo *foo &foo etc, but the program
2037  * text only contains the "foo" portion.  The first argument is a pointer
2038  * to the "foo", and the second argument is the type symbol to prefix.
2039  * Forces the next token to be a "WORD".
2040  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2041  */
2042
2043 STATIC void
2044 S_force_ident(pTHX_ register const char *s, int kind)
2045 {
2046     dVAR;
2047
2048     PERL_ARGS_ASSERT_FORCE_IDENT;
2049
2050     if (*s) {
2051         const STRLEN len = strlen(s);
2052         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2053         start_force(PL_curforce);
2054         NEXTVAL_NEXTTOKE.opval = o;
2055         force_next(WORD);
2056         if (kind) {
2057             o->op_private = OPpCONST_ENTERED;
2058             /* XXX see note in pp_entereval() for why we forgo typo
2059                warnings if the symbol must be introduced in an eval.
2060                GSAR 96-10-12 */
2061             gv_fetchpvn_flags(s, len,
2062                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2063                               : GV_ADD,
2064                               kind == '$' ? SVt_PV :
2065                               kind == '@' ? SVt_PVAV :
2066                               kind == '%' ? SVt_PVHV :
2067                               SVt_PVGV
2068                               );
2069         }
2070     }
2071 }
2072
2073 NV
2074 Perl_str_to_version(pTHX_ SV *sv)
2075 {
2076     NV retval = 0.0;
2077     NV nshift = 1.0;
2078     STRLEN len;
2079     const char *start = SvPV_const(sv,len);
2080     const char * const end = start + len;
2081     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2082
2083     PERL_ARGS_ASSERT_STR_TO_VERSION;
2084
2085     while (start < end) {
2086         STRLEN skip;
2087         UV n;
2088         if (utf)
2089             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2090         else {
2091             n = *(U8*)start;
2092             skip = 1;
2093         }
2094         retval += ((NV)n)/nshift;
2095         start += skip;
2096         nshift *= 1000;
2097     }
2098     return retval;
2099 }
2100
2101 /*
2102  * S_force_version
2103  * Forces the next token to be a version number.
2104  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2105  * and if "guessing" is TRUE, then no new token is created (and the caller
2106  * must use an alternative parsing method).
2107  */
2108
2109 STATIC char *
2110 S_force_version(pTHX_ char *s, int guessing)
2111 {
2112     dVAR;
2113     OP *version = NULL;
2114     char *d;
2115 #ifdef PERL_MAD
2116     I32 startoff = s - SvPVX(PL_linestr);
2117 #endif
2118
2119     PERL_ARGS_ASSERT_FORCE_VERSION;
2120
2121     s = SKIPSPACE1(s);
2122
2123     d = s;
2124     if (*d == 'v')
2125         d++;
2126     if (isDIGIT(*d)) {
2127         while (isDIGIT(*d) || *d == '_' || *d == '.')
2128             d++;
2129 #ifdef PERL_MAD
2130         if (PL_madskills) {
2131             start_force(PL_curforce);
2132             curmad('X', newSVpvn(s,d-s));
2133         }
2134 #endif
2135         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2136             SV *ver;
2137 #ifdef USE_LOCALE_NUMERIC
2138             char *loc = setlocale(LC_NUMERIC, "C");
2139 #endif
2140             s = scan_num(s, &pl_yylval);
2141 #ifdef USE_LOCALE_NUMERIC
2142             setlocale(LC_NUMERIC, loc);
2143 #endif
2144             version = pl_yylval.opval;
2145             ver = cSVOPx(version)->op_sv;
2146             if (SvPOK(ver) && !SvNIOK(ver)) {
2147                 SvUPGRADE(ver, SVt_PVNV);
2148                 SvNV_set(ver, str_to_version(ver));
2149                 SvNOK_on(ver);          /* hint that it is a version */
2150             }
2151         }
2152         else if (guessing) {
2153 #ifdef PERL_MAD
2154             if (PL_madskills) {
2155                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2156                 PL_nextwhite = 0;
2157                 s = SvPVX(PL_linestr) + startoff;
2158             }
2159 #endif
2160             return s;
2161         }
2162     }
2163
2164 #ifdef PERL_MAD
2165     if (PL_madskills && !version) {
2166         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2167         PL_nextwhite = 0;
2168         s = SvPVX(PL_linestr) + startoff;
2169     }
2170 #endif
2171     /* NOTE: The parser sees the package name and the VERSION swapped */
2172     start_force(PL_curforce);
2173     NEXTVAL_NEXTTOKE.opval = version;
2174     force_next(WORD);
2175
2176     return s;
2177 }
2178
2179 /*
2180  * S_force_strict_version
2181  * Forces the next token to be a version number using strict syntax rules.
2182  */
2183
2184 STATIC char *
2185 S_force_strict_version(pTHX_ char *s)
2186 {
2187     dVAR;
2188     OP *version = NULL;
2189 #ifdef PERL_MAD
2190     I32 startoff = s - SvPVX(PL_linestr);
2191 #endif
2192     const char *errstr = NULL;
2193
2194     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2195
2196     while (isSPACE(*s)) /* leading whitespace */
2197         s++;
2198
2199     if (is_STRICT_VERSION(s,&errstr)) {
2200         SV *ver = newSV(0);
2201         s = (char *)scan_version(s, ver, 0);
2202         version = newSVOP(OP_CONST, 0, ver);
2203     }
2204     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2205             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2206     {
2207         PL_bufptr = s;
2208         if (errstr)
2209             yyerror(errstr); /* version required */
2210         return s;
2211     }
2212
2213 #ifdef PERL_MAD
2214     if (PL_madskills && !version) {
2215         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2216         PL_nextwhite = 0;
2217         s = SvPVX(PL_linestr) + startoff;
2218     }
2219 #endif
2220     /* NOTE: The parser sees the package name and the VERSION swapped */
2221     start_force(PL_curforce);
2222     NEXTVAL_NEXTTOKE.opval = version;
2223     force_next(WORD);
2224
2225     return s;
2226 }
2227
2228 /*
2229  * S_tokeq
2230  * Tokenize a quoted string passed in as an SV.  It finds the next
2231  * chunk, up to end of string or a backslash.  It may make a new
2232  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2233  * turns \\ into \.
2234  */
2235
2236 STATIC SV *
2237 S_tokeq(pTHX_ SV *sv)
2238 {
2239     dVAR;
2240     register char *s;
2241     register char *send;
2242     register char *d;
2243     STRLEN len = 0;
2244     SV *pv = sv;
2245
2246     PERL_ARGS_ASSERT_TOKEQ;
2247
2248     if (!SvLEN(sv))
2249         goto finish;
2250
2251     s = SvPV_force(sv, len);
2252     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2253         goto finish;
2254     send = s + len;
2255     while (s < send && *s != '\\')
2256         s++;
2257     if (s == send)
2258         goto finish;
2259     d = s;
2260     if ( PL_hints & HINT_NEW_STRING ) {
2261         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2262     }
2263     while (s < send) {
2264         if (*s == '\\') {
2265             if (s + 1 < send && (s[1] == '\\'))
2266                 s++;            /* all that, just for this */
2267         }
2268         *d++ = *s++;
2269     }
2270     *d = '\0';
2271     SvCUR_set(sv, d - SvPVX_const(sv));
2272   finish:
2273     if ( PL_hints & HINT_NEW_STRING )
2274        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2275     return sv;
2276 }
2277
2278 /*
2279  * Now come three functions related to double-quote context,
2280  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2281  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2282  * interact with PL_lex_state, and create fake ( ... ) argument lists
2283  * to handle functions and concatenation.
2284  * They assume that whoever calls them will be setting up a fake
2285  * join call, because each subthing puts a ',' after it.  This lets
2286  *   "lower \luPpEr"
2287  * become
2288  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2289  *
2290  * (I'm not sure whether the spurious commas at the end of lcfirst's
2291  * arguments and join's arguments are created or not).
2292  */
2293
2294 /*
2295  * S_sublex_start
2296  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2297  *
2298  * Pattern matching will set PL_lex_op to the pattern-matching op to
2299  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2300  *
2301  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2302  *
2303  * Everything else becomes a FUNC.
2304  *
2305  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2306  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2307  * call to S_sublex_push().
2308  */
2309
2310 STATIC I32
2311 S_sublex_start(pTHX)
2312 {
2313     dVAR;
2314     register const I32 op_type = pl_yylval.ival;
2315
2316     if (op_type == OP_NULL) {
2317         pl_yylval.opval = PL_lex_op;
2318         PL_lex_op = NULL;
2319         return THING;
2320     }
2321     if (op_type == OP_CONST || op_type == OP_READLINE) {
2322         SV *sv = tokeq(PL_lex_stuff);
2323
2324         if (SvTYPE(sv) == SVt_PVIV) {
2325             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2326             STRLEN len;
2327             const char * const p = SvPV_const(sv, len);
2328             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2329             SvREFCNT_dec(sv);
2330             sv = nsv;
2331         }
2332         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2333         PL_lex_stuff = NULL;
2334         /* Allow <FH> // "foo" */
2335         if (op_type == OP_READLINE)
2336             PL_expect = XTERMORDORDOR;
2337         return THING;
2338     }
2339     else if (op_type == OP_BACKTICK && PL_lex_op) {
2340         /* readpipe() vas overriden */
2341         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2342         pl_yylval.opval = PL_lex_op;
2343         PL_lex_op = NULL;
2344         PL_lex_stuff = NULL;
2345         return THING;
2346     }
2347
2348     PL_sublex_info.super_state = PL_lex_state;
2349     PL_sublex_info.sub_inwhat = (U16)op_type;
2350     PL_sublex_info.sub_op = PL_lex_op;
2351     PL_lex_state = LEX_INTERPPUSH;
2352
2353     PL_expect = XTERM;
2354     if (PL_lex_op) {
2355         pl_yylval.opval = PL_lex_op;
2356         PL_lex_op = NULL;
2357         return PMFUNC;
2358     }
2359     else
2360         return FUNC;
2361 }
2362
2363 /*
2364  * S_sublex_push
2365  * Create a new scope to save the lexing state.  The scope will be
2366  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2367  * to the uc, lc, etc. found before.
2368  * Sets PL_lex_state to LEX_INTERPCONCAT.
2369  */
2370
2371 STATIC I32
2372 S_sublex_push(pTHX)
2373 {
2374     dVAR;
2375     ENTER;
2376
2377     PL_lex_state = PL_sublex_info.super_state;
2378     SAVEBOOL(PL_lex_dojoin);
2379     SAVEI32(PL_lex_brackets);
2380     SAVEI32(PL_lex_casemods);
2381     SAVEI32(PL_lex_starts);
2382     SAVEI8(PL_lex_state);
2383     SAVEVPTR(PL_lex_inpat);
2384     SAVEI16(PL_lex_inwhat);
2385     SAVECOPLINE(PL_curcop);
2386     SAVEPPTR(PL_bufptr);
2387     SAVEPPTR(PL_bufend);
2388     SAVEPPTR(PL_oldbufptr);
2389     SAVEPPTR(PL_oldoldbufptr);
2390     SAVEPPTR(PL_last_lop);
2391     SAVEPPTR(PL_last_uni);
2392     SAVEPPTR(PL_linestart);
2393     SAVESPTR(PL_linestr);
2394     SAVEGENERICPV(PL_lex_brackstack);
2395     SAVEGENERICPV(PL_lex_casestack);
2396
2397     PL_linestr = PL_lex_stuff;
2398     PL_lex_stuff = NULL;
2399
2400     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2401         = SvPVX(PL_linestr);
2402     PL_bufend += SvCUR(PL_linestr);
2403     PL_last_lop = PL_last_uni = NULL;
2404     SAVEFREESV(PL_linestr);
2405
2406     PL_lex_dojoin = FALSE;
2407     PL_lex_brackets = 0;
2408     Newx(PL_lex_brackstack, 120, char);
2409     Newx(PL_lex_casestack, 12, char);
2410     PL_lex_casemods = 0;
2411     *PL_lex_casestack = '\0';
2412     PL_lex_starts = 0;
2413     PL_lex_state = LEX_INTERPCONCAT;
2414     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2415
2416     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2417     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2418         PL_lex_inpat = PL_sublex_info.sub_op;
2419     else
2420         PL_lex_inpat = NULL;
2421
2422     return '(';
2423 }
2424
2425 /*
2426  * S_sublex_done
2427  * Restores lexer state after a S_sublex_push.
2428  */
2429
2430 STATIC I32
2431 S_sublex_done(pTHX)
2432 {
2433     dVAR;
2434     if (!PL_lex_starts++) {
2435         SV * const sv = newSVpvs("");
2436         if (SvUTF8(PL_linestr))
2437             SvUTF8_on(sv);
2438         PL_expect = XOPERATOR;
2439         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2440         return THING;
2441     }
2442
2443     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2444         PL_lex_state = LEX_INTERPCASEMOD;
2445         return yylex();
2446     }
2447
2448     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2449     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2450         PL_linestr = PL_lex_repl;
2451         PL_lex_inpat = 0;
2452         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2453         PL_bufend += SvCUR(PL_linestr);
2454         PL_last_lop = PL_last_uni = NULL;
2455         SAVEFREESV(PL_linestr);
2456         PL_lex_dojoin = FALSE;
2457         PL_lex_brackets = 0;
2458         PL_lex_casemods = 0;
2459         *PL_lex_casestack = '\0';
2460         PL_lex_starts = 0;
2461         if (SvEVALED(PL_lex_repl)) {
2462             PL_lex_state = LEX_INTERPNORMAL;
2463             PL_lex_starts++;
2464             /*  we don't clear PL_lex_repl here, so that we can check later
2465                 whether this is an evalled subst; that means we rely on the
2466                 logic to ensure sublex_done() is called again only via the
2467                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2468         }
2469         else {
2470             PL_lex_state = LEX_INTERPCONCAT;
2471             PL_lex_repl = NULL;
2472         }
2473         return ',';
2474     }
2475     else {
2476 #ifdef PERL_MAD
2477         if (PL_madskills) {
2478             if (PL_thiswhite) {
2479                 if (!PL_endwhite)
2480                     PL_endwhite = newSVpvs("");
2481                 sv_catsv(PL_endwhite, PL_thiswhite);
2482                 PL_thiswhite = 0;
2483             }
2484             if (PL_thistoken)
2485                 sv_setpvs(PL_thistoken,"");
2486             else
2487                 PL_realtokenstart = -1;
2488         }
2489 #endif
2490         LEAVE;
2491         PL_bufend = SvPVX(PL_linestr);
2492         PL_bufend += SvCUR(PL_linestr);
2493         PL_expect = XOPERATOR;
2494         PL_sublex_info.sub_inwhat = 0;
2495         return ')';
2496     }
2497 }
2498
2499 /*
2500   scan_const
2501
2502   Extracts a pattern, double-quoted string, or transliteration.  This
2503   is terrifying code.
2504
2505   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2506   processing a pattern (PL_lex_inpat is true), a transliteration
2507   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2508
2509   Returns a pointer to the character scanned up to. If this is
2510   advanced from the start pointer supplied (i.e. if anything was
2511   successfully parsed), will leave an OP for the substring scanned
2512   in pl_yylval. Caller must intuit reason for not parsing further
2513   by looking at the next characters herself.
2514
2515   In patterns:
2516     backslashes:
2517       constants: \N{NAME} only
2518       case and quoting: \U \Q \E
2519     stops on @ and $, but not for $ as tail anchor
2520
2521   In transliterations:
2522     characters are VERY literal, except for - not at the start or end
2523     of the string, which indicates a range. If the range is in bytes,
2524     scan_const expands the range to the full set of intermediate
2525     characters. If the range is in utf8, the hyphen is replaced with
2526     a certain range mark which will be handled by pmtrans() in op.c.
2527
2528   In double-quoted strings:
2529     backslashes:
2530       double-quoted style: \r and \n
2531       constants: \x31, etc.
2532       deprecated backrefs: \1 (in substitution replacements)
2533       case and quoting: \U \Q \E
2534     stops on @ and $
2535
2536   scan_const does *not* construct ops to handle interpolated strings.
2537   It stops processing as soon as it finds an embedded $ or @ variable
2538   and leaves it to the caller to work out what's going on.
2539
2540   embedded arrays (whether in pattern or not) could be:
2541       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2542
2543   $ in double-quoted strings must be the symbol of an embedded scalar.
2544
2545   $ in pattern could be $foo or could be tail anchor.  Assumption:
2546   it's a tail anchor if $ is the last thing in the string, or if it's
2547   followed by one of "()| \r\n\t"
2548
2549   \1 (backreferences) are turned into $1
2550
2551   The structure of the code is
2552       while (there's a character to process) {
2553           handle transliteration ranges
2554           skip regexp comments /(?#comment)/ and codes /(?{code})/
2555           skip #-initiated comments in //x patterns
2556           check for embedded arrays
2557           check for embedded scalars
2558           if (backslash) {
2559               deprecate \1 in substitution replacements
2560               handle string-changing backslashes \l \U \Q \E, etc.
2561               switch (what was escaped) {
2562                   handle \- in a transliteration (becomes a literal -)
2563                   if a pattern and not \N{, go treat as regular character
2564                   handle \132 (octal characters)
2565                   handle \x15 and \x{1234} (hex characters)
2566                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2567                   handle \cV (control characters)
2568                   handle printf-style backslashes (\f, \r, \n, etc)
2569               } (end switch)
2570               continue
2571           } (end if backslash)
2572           handle regular character
2573     } (end while character to read)
2574
2575 */
2576
2577 STATIC char *
2578 S_scan_const(pTHX_ char *start)
2579 {
2580     dVAR;
2581     register char *send = PL_bufend;            /* end of the constant */
2582     SV *sv = newSV(send - start);               /* sv for the constant.  See
2583                                                    note below on sizing. */
2584     register char *s = start;                   /* start of the constant */
2585     register char *d = SvPVX(sv);               /* destination for copies */
2586     bool dorange = FALSE;                       /* are we in a translit range? */
2587     bool didrange = FALSE;                      /* did we just finish a range? */
2588     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2589     I32  this_utf8 = UTF;                       /* Is the source string assumed
2590                                                    to be UTF8?  But, this can
2591                                                    show as true when the source
2592                                                    isn't utf8, as for example
2593                                                    when it is entirely composed
2594                                                    of hex constants */
2595
2596     /* Note on sizing:  The scanned constant is placed into sv, which is
2597      * initialized by newSV() assuming one byte of output for every byte of
2598      * input.  This routine expects newSV() to allocate an extra byte for a
2599      * trailing NUL, which this routine will append if it gets to the end of
2600      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2601      * CAPITAL LETTER A}), or more output than input if the constant ends up
2602      * recoded to utf8, but each time a construct is found that might increase
2603      * the needed size, SvGROW() is called.  Its size parameter each time is
2604      * based on the best guess estimate at the time, namely the length used so
2605      * far, plus the length the current construct will occupy, plus room for
2606      * the trailing NUL, plus one byte for every input byte still unscanned */
2607
2608     UV uv;
2609 #ifdef EBCDIC
2610     UV literal_endpoint = 0;
2611     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2612 #endif
2613
2614     PERL_ARGS_ASSERT_SCAN_CONST;
2615
2616     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2617         /* If we are doing a trans and we know we want UTF8 set expectation */
2618         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2619         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2620     }
2621
2622
2623     while (s < send || dorange) {
2624
2625         /* get transliterations out of the way (they're most literal) */
2626         if (PL_lex_inwhat == OP_TRANS) {
2627             /* expand a range A-Z to the full set of characters.  AIE! */
2628             if (dorange) {
2629                 I32 i;                          /* current expanded character */
2630                 I32 min;                        /* first character in range */
2631                 I32 max;                        /* last character in range */
2632
2633 #ifdef EBCDIC
2634                 UV uvmax = 0;
2635 #endif
2636
2637                 if (has_utf8
2638 #ifdef EBCDIC
2639                     && !native_range
2640 #endif
2641                     ) {
2642                     char * const c = (char*)utf8_hop((U8*)d, -1);
2643                     char *e = d++;
2644                     while (e-- > c)
2645                         *(e + 1) = *e;
2646                     *c = (char)UTF_TO_NATIVE(0xff);
2647                     /* mark the range as done, and continue */
2648                     dorange = FALSE;
2649                     didrange = TRUE;
2650                     continue;
2651                 }
2652
2653                 i = d - SvPVX_const(sv);                /* remember current offset */
2654 #ifdef EBCDIC
2655                 SvGROW(sv,
2656                        SvLEN(sv) + (has_utf8 ?
2657                                     (512 - UTF_CONTINUATION_MARK +
2658                                      UNISKIP(0x100))
2659                                     : 256));
2660                 /* How many two-byte within 0..255: 128 in UTF-8,
2661                  * 96 in UTF-8-mod. */
2662 #else
2663                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2664 #endif
2665                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2666 #ifdef EBCDIC
2667                 if (has_utf8) {
2668                     int j;
2669                     for (j = 0; j <= 1; j++) {
2670                         char * const c = (char*)utf8_hop((U8*)d, -1);
2671                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2672                         if (j)
2673                             min = (U8)uv;
2674                         else if (uv < 256)
2675                             max = (U8)uv;
2676                         else {
2677                             max = (U8)0xff; /* only to \xff */
2678                             uvmax = uv; /* \x{100} to uvmax */
2679                         }
2680                         d = c; /* eat endpoint chars */
2681                      }
2682                 }
2683                else {
2684 #endif
2685                    d -= 2;              /* eat the first char and the - */
2686                    min = (U8)*d;        /* first char in range */
2687                    max = (U8)d[1];      /* last char in range  */
2688 #ifdef EBCDIC
2689                }
2690 #endif
2691
2692                 if (min > max) {
2693                     Perl_croak(aTHX_
2694                                "Invalid range \"%c-%c\" in transliteration operator",
2695                                (char)min, (char)max);
2696                 }
2697
2698 #ifdef EBCDIC
2699                 if (literal_endpoint == 2 &&
2700                     ((isLOWER(min) && isLOWER(max)) ||
2701                      (isUPPER(min) && isUPPER(max)))) {
2702                     if (isLOWER(min)) {
2703                         for (i = min; i <= max; i++)
2704                             if (isLOWER(i))
2705                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2706                     } else {
2707                         for (i = min; i <= max; i++)
2708                             if (isUPPER(i))
2709                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2710                     }
2711                 }
2712                 else
2713 #endif
2714                     for (i = min; i <= max; i++)
2715 #ifdef EBCDIC
2716                         if (has_utf8) {
2717                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2718                             if (UNI_IS_INVARIANT(ch))
2719                                 *d++ = (U8)i;
2720                             else {
2721                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2722                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2723                             }
2724                         }
2725                         else
2726 #endif
2727                             *d++ = (char)i;
2728
2729 #ifdef EBCDIC
2730                 if (uvmax) {
2731                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2732                     if (uvmax > 0x101)
2733                         *d++ = (char)UTF_TO_NATIVE(0xff);
2734                     if (uvmax > 0x100)
2735                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2736                 }
2737 #endif
2738
2739                 /* mark the range as done, and continue */
2740                 dorange = FALSE;
2741                 didrange = TRUE;
2742 #ifdef EBCDIC
2743                 literal_endpoint = 0;
2744 #endif
2745                 continue;
2746             }
2747
2748             /* range begins (ignore - as first or last char) */
2749             else if (*s == '-' && s+1 < send  && s != start) {
2750                 if (didrange) {
2751                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2752                 }
2753                 if (has_utf8
2754 #ifdef EBCDIC
2755                     && !native_range
2756 #endif
2757                     ) {
2758                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2759                     s++;
2760                     continue;
2761                 }
2762                 dorange = TRUE;
2763                 s++;
2764             }
2765             else {
2766                 didrange = FALSE;
2767 #ifdef EBCDIC
2768                 literal_endpoint = 0;
2769                 native_range = TRUE;
2770 #endif
2771             }
2772         }
2773
2774         /* if we get here, we're not doing a transliteration */
2775
2776         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2777            except for the last char, which will be done separately. */
2778         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2779             if (s[2] == '#') {
2780                 while (s+1 < send && *s != ')')
2781                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2782             }
2783             else if (s[2] == '{' /* This should match regcomp.c */
2784                     || (s[2] == '?' && s[3] == '{'))
2785             {
2786                 I32 count = 1;
2787                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2788                 char c;
2789
2790                 while (count && (c = *regparse)) {
2791                     if (c == '\\' && regparse[1])
2792                         regparse++;
2793                     else if (c == '{')
2794                         count++;
2795                     else if (c == '}')
2796                         count--;
2797                     regparse++;
2798                 }
2799                 if (*regparse != ')')
2800                     regparse--;         /* Leave one char for continuation. */
2801                 while (s < regparse)
2802                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2803             }
2804         }
2805
2806         /* likewise skip #-initiated comments in //x patterns */
2807         else if (*s == '#' && PL_lex_inpat &&
2808           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2809             while (s+1 < send && *s != '\n')
2810                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2811         }
2812
2813         /* check for embedded arrays
2814            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2815            */
2816         else if (*s == '@' && s[1]) {
2817             if (isALNUM_lazy_if(s+1,UTF))
2818                 break;
2819             if (strchr(":'{$", s[1]))
2820                 break;
2821             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2822                 break; /* in regexp, neither @+ nor @- are interpolated */
2823         }
2824
2825         /* check for embedded scalars.  only stop if we're sure it's a
2826            variable.
2827         */
2828         else if (*s == '$') {
2829             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2830                 break;
2831             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2832                 if (s[1] == '\\') {
2833                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2834                                    "Possible unintended interpolation of $\\ in regex");
2835                 }
2836                 break;          /* in regexp, $ might be tail anchor */
2837             }
2838         }
2839
2840         /* End of else if chain - OP_TRANS rejoin rest */
2841
2842         /* backslashes */
2843         if (*s == '\\' && s+1 < send) {
2844             char* e;    /* Can be used for ending '}', etc. */
2845
2846             s++;
2847
2848             /* warn on \1 - \9 in substitution replacements, but note that \11
2849              * is an octal; and \19 is \1 followed by '9' */
2850             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2851                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2852             {
2853                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2854                 *--s = '$';
2855                 break;
2856             }
2857
2858             /* string-change backslash escapes */
2859             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2860                 --s;
2861                 break;
2862             }
2863             /* In a pattern, process \N, but skip any other backslash escapes.
2864              * This is because we don't want to translate an escape sequence
2865              * into a meta symbol and have the regex compiler use the meta
2866              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2867              * in spite of this, we do have to process \N here while the proper
2868              * charnames handler is in scope.  See bugs #56444 and #62056.
2869              * There is a complication because \N in a pattern may also stand
2870              * for 'match a non-nl', and not mean a charname, in which case its
2871              * processing should be deferred to the regex compiler.  To be a
2872              * charname it must be followed immediately by a '{', and not look
2873              * like \N followed by a curly quantifier, i.e., not something like
2874              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2875              * quantifier */
2876             else if (PL_lex_inpat
2877                     && (*s != 'N'
2878                         || s[1] != '{'
2879                         || regcurly(s + 1)))
2880             {
2881                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2882                 goto default_action;
2883             }
2884
2885             switch (*s) {
2886
2887             /* quoted - in transliterations */
2888             case '-':
2889                 if (PL_lex_inwhat == OP_TRANS) {
2890                     *d++ = *s++;
2891                     continue;
2892                 }
2893                 /* FALL THROUGH */
2894             default:
2895                 {
2896                     if ((isALPHA(*s) || isDIGIT(*s)))
2897                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2898                                        "Unrecognized escape \\%c passed through",
2899                                        *s);
2900                     /* default action is to copy the quoted character */
2901                     goto default_action;
2902                 }
2903
2904             /* eg. \132 indicates the octal constant 0132 */
2905             case '0': case '1': case '2': case '3':
2906             case '4': case '5': case '6': case '7':
2907                 {
2908                     I32 flags = 0;
2909                     STRLEN len = 3;
2910                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2911                     s += len;
2912                 }
2913                 goto NUM_ESCAPE_INSERT;
2914
2915             /* eg. \o{24} indicates the octal constant \024 */
2916             case 'o':
2917                 {
2918                     STRLEN len;
2919                     const char* error;
2920
2921                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2922                     s += len;
2923                     if (! valid) {
2924                         yyerror(error);
2925                         continue;
2926                     }
2927                     goto NUM_ESCAPE_INSERT;
2928                 }
2929
2930             /* eg. \x24 indicates the hex constant 0x24 */
2931             case 'x':
2932                 ++s;
2933                 if (*s == '{') {
2934                     char* const e = strchr(s, '}');
2935                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2936                       PERL_SCAN_DISALLOW_PREFIX;
2937                     STRLEN len;
2938
2939                     ++s;
2940                     if (!e) {
2941                         yyerror("Missing right brace on \\x{}");
2942                         continue;
2943                     }
2944                     len = e - s;
2945                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2946                     s = e + 1;
2947                 }
2948                 else {
2949                     {
2950                         STRLEN len = 2;
2951                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2952                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2953                         s += len;
2954                     }
2955                 }
2956
2957               NUM_ESCAPE_INSERT:
2958                 /* Insert oct or hex escaped character.  There will always be
2959                  * enough room in sv since such escapes will be longer than any
2960                  * UTF-8 sequence they can end up as, except if they force us
2961                  * to recode the rest of the string into utf8 */
2962
2963                 /* Here uv is the ordinal of the next character being added in
2964                  * unicode (converted from native). */
2965                 if (!UNI_IS_INVARIANT(uv)) {
2966                     if (!has_utf8 && uv > 255) {
2967                         /* Might need to recode whatever we have accumulated so
2968                          * far if it contains any chars variant in utf8 or
2969                          * utf-ebcdic. */
2970
2971                         SvCUR_set(sv, d - SvPVX_const(sv));
2972                         SvPOK_on(sv);
2973                         *d = '\0';
2974                         /* See Note on sizing above.  */
2975                         sv_utf8_upgrade_flags_grow(sv,
2976                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2977                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2978                         d = SvPVX(sv) + SvCUR(sv);
2979                         has_utf8 = TRUE;
2980                     }
2981
2982                     if (has_utf8) {
2983                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2984                         if (PL_lex_inwhat == OP_TRANS &&
2985                             PL_sublex_info.sub_op) {
2986                             PL_sublex_info.sub_op->op_private |=
2987                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2988                                              : OPpTRANS_TO_UTF);
2989                         }
2990 #ifdef EBCDIC
2991                         if (uv > 255 && !dorange)
2992                             native_range = FALSE;
2993 #endif
2994                     }
2995                     else {
2996                         *d++ = (char)uv;
2997                     }
2998                 }
2999                 else {
3000                     *d++ = (char) uv;
3001                 }
3002                 continue;
3003
3004             case 'N':
3005                 /* In a non-pattern \N must be a named character, like \N{LATIN
3006                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3007                  * mean to match a non-newline.  For non-patterns, named
3008                  * characters are converted to their string equivalents. In
3009                  * patterns, named characters are not converted to their
3010                  * ultimate forms for the same reasons that other escapes
3011                  * aren't.  Instead, they are converted to the \N{U+...} form
3012                  * to get the value from the charnames that is in effect right
3013                  * now, while preserving the fact that it was a named character
3014                  * so that the regex compiler knows this */
3015
3016                 /* This section of code doesn't generally use the
3017                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3018                  * a close examination of this macro and determined it is a
3019                  * no-op except on utfebcdic variant characters.  Every
3020                  * character generated by this that would normally need to be
3021                  * enclosed by this macro is invariant, so the macro is not
3022                  * needed, and would complicate use of copy(). There are other
3023                  * parts of this file where the macro is used inconsistently,
3024                  * but are saved by it being a no-op */
3025
3026                 /* The structure of this section of code (besides checking for
3027                  * errors and upgrading to utf8) is:
3028                  *  Further disambiguate between the two meanings of \N, and if
3029                  *      not a charname, go process it elsewhere
3030                  *  If of form \N{U+...}, pass it through if a pattern;
3031                  *      otherwise convert to utf8
3032                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3033                  *  pattern; otherwise convert to utf8 */
3034
3035                 /* Here, s points to the 'N'; the test below is guaranteed to
3036                  * succeed if we are being called on a pattern as we already
3037                  * know from a test above that the next character is a '{'.
3038                  * On a non-pattern \N must mean 'named sequence, which
3039                  * requires braces */
3040                 s++;
3041                 if (*s != '{') {
3042                     yyerror("Missing braces on \\N{}");
3043                     continue;
3044                 }
3045                 s++;
3046
3047                 /* If there is no matching '}', it is an error. */
3048                 if (! (e = strchr(s, '}'))) {
3049                     if (! PL_lex_inpat) {
3050                         yyerror("Missing right brace on \\N{}");
3051                     } else {
3052                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3053                     }
3054                     continue;
3055                 }
3056
3057                 /* Here it looks like a named character */
3058
3059                 if (PL_lex_inpat) {
3060
3061                     /* XXX This block is temporary code.  \N{} implies that the
3062                      * pattern is to have Unicode semantics, and therefore
3063                      * currently has to be encoded in utf8.  By putting it in
3064                      * utf8 now, we save a whole pass in the regular expression
3065                      * compiler.  Once that code is changed so Unicode
3066                      * semantics doesn't necessarily have to be in utf8, this
3067                      * block should be removed */
3068                     if (!has_utf8) {
3069                         SvCUR_set(sv, d - SvPVX_const(sv));
3070                         SvPOK_on(sv);
3071                         *d = '\0';
3072                         /* See Note on sizing above.  */
3073                         sv_utf8_upgrade_flags_grow(sv,
3074                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3075                                         /* 5 = '\N{' + cur char + NUL */
3076                                         (STRLEN)(send - s) + 5);
3077                         d = SvPVX(sv) + SvCUR(sv);
3078                         has_utf8 = TRUE;
3079                     }
3080                 }
3081
3082                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3083                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3084                                 | PERL_SCAN_DISALLOW_PREFIX;
3085                     STRLEN len;
3086
3087                     /* For \N{U+...}, the '...' is a unicode value even on
3088                      * EBCDIC machines */
3089                     s += 2;         /* Skip to next char after the 'U+' */
3090                     len = e - s;
3091                     uv = grok_hex(s, &len, &flags, NULL);
3092                     if (len == 0 || len != (STRLEN)(e - s)) {
3093                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3094                         s = e + 1;
3095                         continue;
3096                     }
3097
3098                     if (PL_lex_inpat) {
3099
3100                         /* Pass through to the regex compiler unchanged.  The
3101                          * reason we evaluated the number above is to make sure
3102                          * there wasn't a syntax error. */
3103                         s -= 5;     /* Include the '\N{U+' */
3104                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3105                         d += e - s + 1;
3106                     }
3107                     else {  /* Not a pattern: convert the hex to string */
3108
3109                          /* If destination is not in utf8, unconditionally
3110                           * recode it to be so.  This is because \N{} implies
3111                           * Unicode semantics, and scalars have to be in utf8
3112                           * to guarantee those semantics */
3113                         if (! has_utf8) {
3114                             SvCUR_set(sv, d - SvPVX_const(sv));
3115                             SvPOK_on(sv);
3116                             *d = '\0';
3117                             /* See Note on sizing above.  */
3118                             sv_utf8_upgrade_flags_grow(
3119                                         sv,
3120                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3121                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3122                             d = SvPVX(sv) + SvCUR(sv);
3123                             has_utf8 = TRUE;
3124                         }
3125
3126                         /* Add the string to the output */
3127                         if (UNI_IS_INVARIANT(uv)) {
3128                             *d++ = (char) uv;
3129                         }
3130                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3131                     }
3132                 }
3133                 else { /* Here is \N{NAME} but not \N{U+...}. */
3134
3135                     SV *res;            /* result from charnames */
3136                     const char *str;    /* the string in 'res' */
3137                     STRLEN len;         /* its length */
3138
3139                     /* Get the value for NAME */
3140                     res = newSVpvn(s, e - s);
3141                     res = new_constant( NULL, 0, "charnames",
3142                                         /* includes all of: \N{...} */
3143                                         res, NULL, s - 3, e - s + 4 );
3144
3145                     /* Most likely res will be in utf8 already since the
3146                      * standard charnames uses pack U, but a custom translator
3147                      * can leave it otherwise, so make sure.  XXX This can be
3148                      * revisited to not have charnames use utf8 for characters
3149                      * that don't need it when regexes don't have to be in utf8
3150                      * for Unicode semantics.  If doing so, remember EBCDIC */
3151                     sv_utf8_upgrade(res);
3152                     str = SvPV_const(res, len);
3153
3154                     /* Don't accept malformed input */
3155                     if (! is_utf8_string((U8 *) str, len)) {
3156                         yyerror("Malformed UTF-8 returned by \\N");
3157                     }
3158                     else if (PL_lex_inpat) {
3159
3160                         if (! len) { /* The name resolved to an empty string */
3161                             Copy("\\N{}", d, 4, char);
3162                             d += 4;
3163                         }
3164                         else {
3165                             /* In order to not lose information for the regex
3166                             * compiler, pass the result in the specially made
3167                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3168                             * the code points in hex of each character
3169                             * returned by charnames */
3170
3171                             const char *str_end = str + len;
3172                             STRLEN char_length;     /* cur char's byte length */
3173                             STRLEN output_length;   /* and the number of bytes
3174                                                        after this is translated
3175                                                        into hex digits */
3176                             const STRLEN off = d - SvPVX_const(sv);
3177
3178                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3179                              * max('U+', '.'); and 1 for NUL */
3180                             char hex_string[2 * UTF8_MAXBYTES + 5];
3181
3182                             /* Get the first character of the result. */
3183                             U32 uv = utf8n_to_uvuni((U8 *) str,
3184                                                     len,
3185                                                     &char_length,
3186                                                     UTF8_ALLOW_ANYUV);
3187
3188                             /* The call to is_utf8_string() above hopefully
3189                              * guarantees that there won't be an error.  But
3190                              * it's easy here to make sure.  The function just
3191                              * above warns and returns 0 if invalid utf8, but
3192                              * it can also return 0 if the input is validly a
3193                              * NUL. Disambiguate */
3194                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3195                                 uv = UNICODE_REPLACEMENT;
3196                             }
3197
3198                             /* Convert first code point to hex, including the
3199                              * boiler plate before it */
3200                             sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3201                             output_length = strlen(hex_string);
3202
3203                             /* Make sure there is enough space to hold it */
3204                             d = off + SvGROW(sv, off
3205                                                  + output_length
3206                                                  + (STRLEN)(send - e)
3207                                                  + 2);  /* '}' + NUL */
3208                             /* And output it */
3209                             Copy(hex_string, d, output_length, char);
3210                             d += output_length;
3211
3212                             /* For each subsequent character, append dot and
3213                              * its ordinal in hex */
3214                             while ((str += char_length) < str_end) {
3215                                 const STRLEN off = d - SvPVX_const(sv);
3216                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3217                                                         str_end - str,
3218                                                         &char_length,
3219                                                         UTF8_ALLOW_ANYUV);
3220                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3221                                     uv = UNICODE_REPLACEMENT;
3222                                 }
3223
3224                                 sprintf(hex_string, ".%X", (unsigned int) uv);
3225                                 output_length = strlen(hex_string);
3226
3227                                 d = off + SvGROW(sv, off
3228                                                      + output_length
3229                                                      + (STRLEN)(send - e)
3230                                                      + 2);      /* '}' +  NUL */
3231                                 Copy(hex_string, d, output_length, char);
3232                                 d += output_length;
3233                             }
3234
3235                             *d++ = '}'; /* Done.  Add the trailing brace */
3236                         }
3237                     }
3238                     else { /* Here, not in a pattern.  Convert the name to a
3239                             * string. */
3240
3241                          /* If destination is not in utf8, unconditionally
3242                           * recode it to be so.  This is because \N{} implies
3243                           * Unicode semantics, and scalars have to be in utf8
3244                           * to guarantee those semantics */
3245                         if (! has_utf8) {
3246                             SvCUR_set(sv, d - SvPVX_const(sv));
3247                             SvPOK_on(sv);
3248                             *d = '\0';
3249                             /* See Note on sizing above.  */
3250                             sv_utf8_upgrade_flags_grow(sv,
3251                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3252                                                 len + (STRLEN)(send - s) + 1);
3253                             d = SvPVX(sv) + SvCUR(sv);
3254                             has_utf8 = TRUE;
3255                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3256
3257                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3258                              * set correctly here). */
3259                             const STRLEN off = d - SvPVX_const(sv);
3260                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3261                         }
3262                         Copy(str, d, len, char);
3263                         d += len;
3264                     }
3265                     SvREFCNT_dec(res);
3266
3267                     /* Deprecate non-approved name syntax */
3268                     if (ckWARN_d(WARN_DEPRECATED)) {
3269                         bool problematic = FALSE;
3270                         char* i = s;
3271
3272                         /* For non-ut8 input, look to see that the first
3273                          * character is an alpha, then loop through the rest
3274                          * checking that each is a continuation */
3275                         if (! this_utf8) {
3276                             if (! isALPHAU(*i)) problematic = TRUE;
3277                             else for (i = s + 1; i < e; i++) {
3278                                 if (isCHARNAME_CONT(*i)) continue;
3279                                 problematic = TRUE;
3280                                 break;
3281                             }
3282                         }
3283                         else {
3284                             /* Similarly for utf8.  For invariants can check
3285                              * directly.  We accept anything above the latin1
3286                              * range because it is immaterial to Perl if it is
3287                              * correct or not, and is expensive to check.  But
3288                              * it is fairly easy in the latin1 range to convert
3289                              * the variants into a single character and check
3290                              * those */
3291                             if (UTF8_IS_INVARIANT(*i)) {
3292                                 if (! isALPHAU(*i)) problematic = TRUE;
3293                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3294                                 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3295                                                                             *(i+1)))))
3296                                 {
3297                                     problematic = TRUE;
3298                                 }
3299                             }
3300                             if (! problematic) for (i = s + UTF8SKIP(s);
3301                                                     i < e;
3302                                                     i+= UTF8SKIP(i))
3303                             {
3304                                 if (UTF8_IS_INVARIANT(*i)) {
3305                                     if (isCHARNAME_CONT(*i)) continue;
3306                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3307                                     continue;
3308                                 } else if (isCHARNAME_CONT(
3309                                             UNI_TO_NATIVE(
3310                                             UTF8_ACCUMULATE(*i, *(i+1)))))
3311                                 {
3312                                     continue;
3313                                 }
3314                                 problematic = TRUE;
3315                                 break;
3316                             }
3317                         }
3318                         if (problematic) {
3319                             /* The e-i passed to the final %.*s makes sure that
3320                              * should the trailing NUL be missing that this
3321                              * print won't run off the end of the string */
3322                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3323                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3324                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3325                         }
3326                     }
3327                 } /* End \N{NAME} */
3328 #ifdef EBCDIC
3329                 if (!dorange)
3330                     native_range = FALSE; /* \N{} is defined to be Unicode */
3331 #endif
3332                 s = e + 1;  /* Point to just after the '}' */
3333                 continue;
3334
3335             /* \c is a control character */
3336             case 'c':
3337                 s++;
3338                 if (s < send) {
3339                     *d++ = grok_bslash_c(*s++, 1);
3340                 }
3341                 else {
3342                     yyerror("Missing control char name in \\c");
3343                 }
3344                 continue;
3345
3346             /* printf-style backslashes, formfeeds, newlines, etc */
3347             case 'b':
3348                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3349                 break;
3350             case 'n':
3351                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3352                 break;
3353             case 'r':
3354                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3355                 break;
3356             case 'f':
3357                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3358                 break;
3359             case 't':
3360                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3361                 break;
3362             case 'e':
3363                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3364                 break;
3365             case 'a':
3366                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3367                 break;
3368             } /* end switch */
3369
3370             s++;
3371             continue;
3372         } /* end if (backslash) */
3373 #ifdef EBCDIC
3374         else
3375             literal_endpoint++;
3376 #endif
3377
3378     default_action:
3379         /* If we started with encoded form, or already know we want it,
3380            then encode the next character */
3381         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3382             STRLEN len  = 1;
3383
3384
3385             /* One might think that it is wasted effort in the case of the
3386              * source being utf8 (this_utf8 == TRUE) to take the next character
3387              * in the source, convert it to an unsigned value, and then convert
3388              * it back again.  But the source has not been validated here.  The
3389              * routine that does the conversion checks for errors like
3390              * malformed utf8 */
3391
3392             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3393             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3394             if (!has_utf8) {
3395                 SvCUR_set(sv, d - SvPVX_const(sv));
3396                 SvPOK_on(sv);
3397                 *d = '\0';
3398                 /* See Note on sizing above.  */
3399                 sv_utf8_upgrade_flags_grow(sv,
3400                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3401                                         need + (STRLEN)(send - s) + 1);
3402                 d = SvPVX(sv) + SvCUR(sv);
3403                 has_utf8 = TRUE;
3404             } else if (need > len) {
3405                 /* encoded value larger than old, may need extra space (NOTE:
3406                  * SvCUR() is not set correctly here).   See Note on sizing
3407                  * above.  */
3408                 const STRLEN off = d - SvPVX_const(sv);
3409                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3410             }
3411             s += len;
3412
3413             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3414 #ifdef EBCDIC
3415             if (uv > 255 && !dorange)
3416                 native_range = FALSE;
3417 #endif
3418         }
3419         else {
3420             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3421         }
3422     } /* while loop to process each character */
3423
3424     /* terminate the string and set up the sv */
3425     *d = '\0';
3426     SvCUR_set(sv, d - SvPVX_const(sv));
3427     if (SvCUR(sv) >= SvLEN(sv))
3428         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3429
3430     SvPOK_on(sv);
3431     if (PL_encoding && !has_utf8) {
3432         sv_recode_to_utf8(sv, PL_encoding);
3433         if (SvUTF8(sv))
3434             has_utf8 = TRUE;
3435     }
3436     if (has_utf8) {
3437         SvUTF8_on(sv);
3438         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3439             PL_sublex_info.sub_op->op_private |=
3440                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3441         }
3442     }
3443
3444     /* shrink the sv if we allocated more than we used */
3445     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3446         SvPV_shrink_to_cur(sv);
3447     }
3448
3449     /* return the substring (via pl_yylval) only if we parsed anything */
3450     if (s > PL_bufptr) {
3451         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3452             const char *const key = PL_lex_inpat ? "qr" : "q";
3453             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3454             const char *type;
3455             STRLEN typelen;
3456
3457             if (PL_lex_inwhat == OP_TRANS) {
3458                 type = "tr";
3459                 typelen = 2;
3460             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3461                 type = "s";
3462                 typelen = 1;
3463             } else  {
3464                 type = "qq";
3465                 typelen = 2;
3466             }
3467
3468             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3469                                 type, typelen);
3470         }
3471         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3472     } else
3473         SvREFCNT_dec(sv);
3474     return s;
3475 }
3476
3477 /* S_intuit_more
3478  * Returns TRUE if there's more to the expression (e.g., a subscript),
3479  * FALSE otherwise.
3480  *
3481  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3482  *
3483  * ->[ and ->{ return TRUE
3484  * { and [ outside a pattern are always subscripts, so return TRUE
3485  * if we're outside a pattern and it's not { or [, then return FALSE
3486  * if we're in a pattern and the first char is a {
3487  *   {4,5} (any digits around the comma) returns FALSE
3488  * if we're in a pattern and the first char is a [
3489  *   [] returns FALSE
3490  *   [SOMETHING] has a funky algorithm to decide whether it's a
3491  *      character class or not.  It has to deal with things like
3492  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3493  * anything else returns TRUE
3494  */
3495
3496 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3497
3498 STATIC int
3499 S_intuit_more(pTHX_ register char *s)
3500 {
3501     dVAR;
3502
3503     PERL_ARGS_ASSERT_INTUIT_MORE;
3504
3505     if (PL_lex_brackets)
3506         return TRUE;
3507     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3508         return TRUE;
3509     if (*s != '{' && *s != '[')
3510         return FALSE;
3511     if (!PL_lex_inpat)
3512         return TRUE;
3513
3514     /* In a pattern, so maybe we have {n,m}. */
3515     if (*s == '{') {
3516         if (regcurly(s)) {
3517             return FALSE;
3518         }
3519         return TRUE;
3520     }
3521
3522     /* On the other hand, maybe we have a character class */
3523
3524     s++;
3525     if (*s == ']' || *s == '^')
3526         return FALSE;
3527     else {
3528         /* this is terrifying, and it works */
3529         int weight = 2;         /* let's weigh the evidence */
3530         char seen[256];
3531         unsigned char un_char = 255, last_un_char;
3532         const char * const send = strchr(s,']');
3533         char tmpbuf[sizeof PL_tokenbuf * 4];
3534
3535         if (!send)              /* has to be an expression */
3536             return TRUE;
3537
3538         Zero(seen,256,char);
3539         if (*s == '$')
3540             weight -= 3;
3541         else if (isDIGIT(*s)) {
3542             if (s[1] != ']') {
3543                 if (isDIGIT(s[1]) && s[2] == ']')
3544                     weight -= 10;
3545             }
3546             else
3547                 weight -= 100;
3548         }
3549         for (; s < send; s++) {
3550             last_un_char = un_char;
3551             un_char = (unsigned char)*s;
3552             switch (*s) {
3553             case '@':
3554             case '&':
3555             case '$':
3556                 weight -= seen[un_char] * 10;
3557                 if (isALNUM_lazy_if(s+1,UTF)) {
3558                     int len;
3559                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3560                     len = (int)strlen(tmpbuf);
3561                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3562                         weight -= 100;
3563                     else
3564                         weight -= 10;
3565                 }
3566                 else if (*s == '$' && s[1] &&
3567                   strchr("[#!%*<>()-=",s[1])) {
3568                     if (/*{*/ strchr("])} =",s[2]))
3569                         weight -= 10;
3570                     else
3571                         weight -= 1;
3572                 }
3573                 break;
3574             case '\\':
3575                 un_char = 254;
3576                 if (s[1]) {
3577                     if (strchr("wds]",s[1]))
3578                         weight += 100;
3579                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3580                         weight += 1;
3581                     else if (strchr("rnftbxcav",s[1]))
3582                         weight += 40;
3583                     else if (isDIGIT(s[1])) {
3584                         weight += 40;
3585                         while (s[1] && isDIGIT(s[1]))
3586                             s++;
3587                     }
3588                 }
3589                 else
3590                     weight += 100;
3591                 break;
3592             case '-':
3593                 if (s[1] == '\\')
3594                     weight += 50;
3595                 if (strchr("aA01! ",last_un_char))
3596                     weight += 30;
3597                 if (strchr("zZ79~",s[1]))
3598                     weight += 30;
3599                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3600                     weight -= 5;        /* cope with negative subscript */
3601                 break;
3602             default:
3603                 if (!isALNUM(last_un_char)
3604                     && !(last_un_char == '$' || last_un_char == '@'
3605                          || last_un_char == '&')
3606                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3607                     char *d = tmpbuf;
3608                     while (isALPHA(*s))
3609                         *d++ = *s++;
3610                     *d = '\0';
3611                     if (keyword(tmpbuf, d - tmpbuf, 0))
3612                         weight -= 150;
3613                 }
3614                 if (un_char == last_un_char + 1)
3615                     weight += 5;
3616                 weight -= seen[un_char];
3617                 break;
3618             }
3619             seen[un_char]++;
3620         }
3621         if (weight >= 0)        /* probably a character class */
3622             return FALSE;
3623     }
3624
3625     return TRUE;
3626 }
3627
3628 /*
3629  * S_intuit_method
3630  *
3631  * Does all the checking to disambiguate
3632  *   foo bar
3633  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3634  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3635  *
3636  * First argument is the stuff after the first token, e.g. "bar".
3637  *
3638  * Not a method if bar is a filehandle.
3639  * Not a method if foo is a subroutine prototyped to take a filehandle.
3640  * Not a method if it's really "Foo $bar"
3641  * Method if it's "foo $bar"
3642  * Not a method if it's really "print foo $bar"
3643  * Method if it's really "foo package::" (interpreted as package->foo)
3644  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3645  * Not a method if bar is a filehandle or package, but is quoted with
3646  *   =>
3647  */
3648
3649 STATIC int
3650 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3651 {
3652     dVAR;
3653     char *s = start + (*start == '$');
3654     char tmpbuf[sizeof PL_tokenbuf];
3655     STRLEN len;
3656     GV* indirgv;
3657 #ifdef PERL_MAD
3658     int soff;
3659 #endif
3660
3661     PERL_ARGS_ASSERT_INTUIT_METHOD;
3662
3663     if (gv) {
3664         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3665             return 0;
3666         if (cv) {
3667             if (SvPOK(cv)) {
3668                 const char *proto = SvPVX_const(cv);
3669                 if (proto) {
3670                     if (*proto == ';')
3671                         proto++;
3672                     if (*proto == '*')
3673                         return 0;
3674                 }
3675             }
3676         } else
3677             gv = NULL;
3678     }
3679     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3680     /* start is the beginning of the possible filehandle/object,
3681      * and s is the end of it
3682      * tmpbuf is a copy of it
3683      */
3684
3685     if (*start == '$') {
3686         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3687                 isUPPER(*PL_tokenbuf))
3688             return 0;
3689 #ifdef PERL_MAD
3690         len = start - SvPVX(PL_linestr);
3691 #endif
3692         s = PEEKSPACE(s);
3693 #ifdef PERL_MAD
3694         start = SvPVX(PL_linestr) + len;
3695 #endif
3696         PL_bufptr = start;
3697         PL_expect = XREF;
3698         return *s == '(' ? FUNCMETH : METHOD;
3699     }
3700     if (!keyword(tmpbuf, len, 0)) {
3701         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3702             len -= 2;
3703             tmpbuf[len] = '\0';
3704 #ifdef PERL_MAD
3705             soff = s - SvPVX(PL_linestr);
3706 #endif
3707             goto bare_package;
3708         }
3709         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3710         if (indirgv && GvCVu(indirgv))
3711             return 0;
3712         /* filehandle or package name makes it a method */
3713         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3714 #ifdef PERL_MAD
3715             soff = s - SvPVX(PL_linestr);
3716 #endif
3717             s = PEEKSPACE(s);
3718             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3719                 return 0;       /* no assumptions -- "=>" quotes bearword */
3720       bare_package:
3721             start_force(PL_curforce);
3722             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3723                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3724             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3725             if (PL_madskills)
3726                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3727             PL_expect = XTERM;
3728             force_next(WORD);
3729             PL_bufptr = s;
3730 #ifdef PERL_MAD
3731             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3732 #endif
3733             return *s == '(' ? FUNCMETH : METHOD;
3734         }
3735     }
3736     return 0;
3737 }
3738
3739 /* Encoded script support. filter_add() effectively inserts a
3740  * 'pre-processing' function into the current source input stream.
3741  * Note that the filter function only applies to the current source file
3742  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3743  *
3744  * The datasv parameter (which may be NULL) can be used to pass
3745  * private data to this instance of the filter. The filter function
3746  * can recover the SV using the FILTER_DATA macro and use it to
3747  * store private buffers and state information.
3748  *
3749  * The supplied datasv parameter is upgraded to a PVIO type
3750  * and the IoDIRP/IoANY field is used to store the function pointer,
3751  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3752  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3753  * private use must be set using malloc'd pointers.
3754  */
3755
3756 SV *
3757 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3758 {
3759     dVAR;
3760     if (!funcp)
3761         return NULL;
3762
3763     if (!PL_parser)
3764         return NULL;
3765
3766     if (!PL_rsfp_filters)
3767         PL_rsfp_filters = newAV();
3768     if (!datasv)
3769         datasv = newSV(0);
3770     SvUPGRADE(datasv, SVt_PVIO);
3771     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3772     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3773     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3774                           FPTR2DPTR(void *, IoANY(datasv)),
3775                           SvPV_nolen(datasv)));
3776     av_unshift(PL_rsfp_filters, 1);
3777     av_store(PL_rsfp_filters, 0, datasv) ;
3778     return(datasv);
3779 }
3780
3781
3782 /* Delete most recently added instance of this filter function. */
3783 void
3784 Perl_filter_del(pTHX_ filter_t funcp)
3785 {
3786     dVAR;
3787     SV *datasv;
3788
3789     PERL_ARGS_ASSERT_FILTER_DEL;
3790
3791 #ifdef DEBUGGING
3792     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3793                           FPTR2DPTR(void*, funcp)));
3794 #endif
3795     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3796         return;
3797     /* if filter is on top of stack (usual case) just pop it off */
3798     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3799     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3800         sv_free(av_pop(PL_rsfp_filters));
3801
3802         return;
3803     }
3804     /* we need to search for the correct entry and clear it     */
3805     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3806 }
3807
3808
3809 /* Invoke the idxth filter function for the current rsfp.        */
3810 /* maxlen 0 = read one text line */
3811 I32
3812 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3813 {
3814     dVAR;
3815     filter_t funcp;
3816     SV *datasv = NULL;
3817     /* This API is bad. It should have been using unsigned int for maxlen.
3818        Not sure if we want to change the API, but if not we should sanity
3819        check the value here.  */
3820     const unsigned int correct_length
3821         = maxlen < 0 ?
3822 #ifdef PERL_MICRO
3823         0x7FFFFFFF
3824 #else
3825         INT_MAX
3826 #endif
3827         : maxlen;
3828
3829     PERL_ARGS_ASSERT_FILTER_READ;
3830
3831     if (!PL_parser || !PL_rsfp_filters)
3832         return -1;
3833     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3834         /* Provide a default input filter to make life easy.    */
3835         /* Note that we append to the line. This is handy.      */
3836         DEBUG_P(PerlIO_printf(Perl_debug_log,
3837                               "filter_read %d: from rsfp\n", idx));
3838         if (correct_length) {
3839             /* Want a block */
3840             int len ;
3841             const int old_len = SvCUR(buf_sv);
3842
3843             /* ensure buf_sv is large enough */
3844             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3845             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3846                                    correct_length)) <= 0) {
3847                 if (PerlIO_error(PL_rsfp))
3848                     return -1;          /* error */
3849                 else
3850                     return 0 ;          /* end of file */
3851             }
3852             SvCUR_set(buf_sv, old_len + len) ;
3853             SvPVX(buf_sv)[old_len + len] = '\0';
3854         } else {
3855             /* Want a line */
3856             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3857                 if (PerlIO_error(PL_rsfp))
3858                     return -1;          /* error */
3859                 else
3860                     return 0 ;          /* end of file */
3861             }
3862         }
3863         return SvCUR(buf_sv);
3864     }
3865     /* Skip this filter slot if filter has been deleted */
3866     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3867         DEBUG_P(PerlIO_printf(Perl_debug_log,
3868                               "filter_read %d: skipped (filter deleted)\n",
3869                               idx));
3870         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3871     }
3872     /* Get function pointer hidden within datasv        */
3873     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3874     DEBUG_P(PerlIO_printf(Perl_debug_log,
3875                           "filter_read %d: via function %p (%s)\n",
3876                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3877     /* Call function. The function is expected to       */
3878     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3879     /* Return: <0:error, =0:eof, >0:not eof             */
3880     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3881 }
3882
3883 STATIC char *
3884 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3885 {
3886     dVAR;
3887
3888     PERL_ARGS_ASSERT_FILTER_GETS;
3889
3890 #ifdef PERL_CR_FILTER
3891     if (!PL_rsfp_filters) {
3892         filter_add(S_cr_textfilter,NULL);
3893     }
3894 #endif
3895     if (PL_rsfp_filters) {
3896         if (!append)
3897             SvCUR_set(sv, 0);   /* start with empty line        */
3898         if (FILTER_READ(0, sv, 0) > 0)
3899             return ( SvPVX(sv) ) ;
3900         else
3901             return NULL ;
3902     }
3903     else
3904         return (sv_gets(sv, PL_rsfp, append));
3905 }
3906
3907 STATIC HV *
3908 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3909 {
3910     dVAR;
3911     GV *gv;
3912
3913     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3914
3915     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3916         return PL_curstash;
3917
3918     if (len > 2 &&
3919         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3920         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3921     {
3922         return GvHV(gv);                        /* Foo:: */
3923     }
3924
3925     /* use constant CLASS => 'MyClass' */
3926     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3927     if (gv && GvCV(gv)) {
3928         SV * const sv = cv_const_sv(GvCV(gv));
3929         if (sv)
3930             pkgname = SvPV_const(sv, len);
3931     }
3932
3933     return gv_stashpvn(pkgname, len, 0);
3934 }
3935
3936 /*
3937  * S_readpipe_override
3938  * Check whether readpipe() is overriden, and generates the appropriate
3939  * optree, provided sublex_start() is called afterwards.
3940  */
3941 STATIC void
3942 S_readpipe_override(pTHX)
3943 {
3944     GV **gvp;
3945     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3946     pl_yylval.ival = OP_BACKTICK;
3947     if ((gv_readpipe
3948                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3949             ||
3950             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3951              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3952              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3953     {
3954         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3955             op_append_elem(OP_LIST,
3956                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3957                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3958     }
3959 }
3960
3961 #ifdef PERL_MAD
3962  /*
3963  * Perl_madlex
3964  * The intent of this yylex wrapper is to minimize the changes to the
3965  * tokener when we aren't interested in collecting madprops.  It remains
3966  * to be seen how successful this strategy will be...
3967  */
3968
3969 int
3970 Perl_madlex(pTHX)
3971 {
3972     int optype;
3973     char *s = PL_bufptr;
3974
3975     /* make sure PL_thiswhite is initialized */
3976     PL_thiswhite = 0;
3977     PL_thismad = 0;
3978
3979     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3980     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
3981         return S_pending_ident(aTHX);
3982
3983     /* previous token ate up our whitespace? */
3984     if (!PL_lasttoke && PL_nextwhite) {
3985         PL_thiswhite = PL_nextwhite;
3986         PL_nextwhite = 0;
3987     }
3988
3989     /* isolate the token, and figure out where it is without whitespace */
3990     PL_realtokenstart = -1;
3991     PL_thistoken = 0;
3992     optype = yylex();
3993     s = PL_bufptr;
3994     assert(PL_curforce < 0);
3995
3996     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3997         if (!PL_thistoken) {
3998             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3999                 PL_thistoken = newSVpvs("");
4000             else {
4001                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4002                 PL_thistoken = newSVpvn(tstart, s - tstart);
4003             }
4004         }
4005         if (PL_thismad) /* install head */
4006             CURMAD('X', PL_thistoken);
4007     }
4008
4009     /* last whitespace of a sublex? */
4010     if (optype == ')' && PL_endwhite) {
4011         CURMAD('X', PL_endwhite);
4012     }
4013
4014     if (!PL_thismad) {
4015
4016         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4017         if (!PL_thiswhite && !PL_endwhite && !optype) {
4018             sv_free(PL_thistoken);
4019             PL_thistoken = 0;
4020             return 0;
4021         }
4022
4023         /* put off final whitespace till peg */
4024         if (optype == ';' && !PL_rsfp) {
4025             PL_nextwhite = PL_thiswhite;
4026             PL_thiswhite = 0;
4027         }
4028         else if (PL_thisopen) {
4029             CURMAD('q', PL_thisopen);
4030             if (PL_thistoken)
4031                 sv_free(PL_thistoken);
4032             PL_thistoken = 0;
4033         }
4034         else {
4035             /* Store actual token text as madprop X */
4036             CURMAD('X', PL_thistoken);
4037         }
4038
4039         if (PL_thiswhite) {
4040             /* add preceding whitespace as madprop _ */
4041             CURMAD('_', PL_thiswhite);
4042         }
4043
4044         if (PL_thisstuff) {
4045             /* add quoted material as madprop = */
4046             CURMAD('=', PL_thisstuff);
4047         }
4048
4049         if (PL_thisclose) {
4050             /* add terminating quote as madprop Q */
4051             CURMAD('Q', PL_thisclose);
4052         }
4053     }
4054
4055     /* special processing based on optype */
4056
4057     switch (optype) {
4058
4059     /* opval doesn't need a TOKEN since it can already store mp */
4060     case WORD:
4061     case METHOD:
4062     case FUNCMETH:
4063     case THING:
4064     case PMFUNC:
4065     case PRIVATEREF:
4066     case FUNC0SUB:
4067     case UNIOPSUB:
4068     case LSTOPSUB:
4069         if (pl_yylval.opval)
4070             append_madprops(PL_thismad, pl_yylval.opval, 0);
4071         PL_thismad = 0;
4072         return optype;
4073
4074     /* fake EOF */
4075     case 0:
4076         optype = PEG;
4077         if (PL_endwhite) {
4078             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4079             PL_endwhite = 0;
4080         }
4081         break;
4082
4083     case ']':
4084     case '}':
4085         if (PL_faketokens)
4086             break;
4087         /* remember any fake bracket that lexer is about to discard */
4088         if (PL_lex_brackets == 1 &&
4089             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4090         {
4091             s = PL_bufptr;
4092             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4093                 s++;
4094             if (*s == '}') {
4095                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4096                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4097                 PL_thiswhite = 0;
4098                 PL_bufptr = s - 1;
4099                 break;  /* don't bother looking for trailing comment */
4100             }
4101             else
4102                 s = PL_bufptr;
4103         }
4104         if (optype == ']')
4105             break;
4106         /* FALLTHROUGH */
4107
4108     /* attach a trailing comment to its statement instead of next token */
4109     case ';':
4110         if (PL_faketokens)
4111             break;
4112         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4113             s = PL_bufptr;
4114             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4115                 s++;
4116             if (*s == '\n' || *s == '#') {
4117                 while (s < PL_bufend && *s != '\n')
4118                     s++;
4119                 if (s < PL_bufend)
4120                     s++;
4121                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4122                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4123                 PL_thiswhite = 0;
4124                 PL_bufptr = s;
4125             }
4126         }
4127         break;
4128
4129     /* pval */
4130     case LABEL:
4131         break;
4132
4133     /* ival */
4134     default:
4135         break;
4136
4137     }
4138
4139     /* Create new token struct.  Note: opvals return early above. */
4140     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4141     PL_thismad = 0;
4142     return optype;
4143 }
4144 #endif
4145
4146 STATIC char *
4147 S_tokenize_use(pTHX_ int is_use, char *s) {
4148     dVAR;
4149
4150     PERL_ARGS_ASSERT_TOKENIZE_USE;
4151
4152     if (PL_expect != XSTATE)
4153         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4154                     is_use ? "use" : "no"));
4155     s = SKIPSPACE1(s);
4156     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4157         s = force_version(s, TRUE);
4158         if (*s == ';' || *s == '}'
4159                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4160             start_force(PL_curforce);
4161             NEXTVAL_NEXTTOKE.opval = NULL;
4162             force_next(WORD);
4163         }
4164         else if (*s == 'v') {
4165             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4166             s = force_version(s, FALSE);
4167         }
4168     }
4169     else {
4170         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4171         s = force_version(s, FALSE);
4172     }
4173     pl_yylval.ival = is_use;
4174     return s;
4175 }
4176 #ifdef DEBUGGING
4177     static const char* const exp_name[] =
4178         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4179           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4180         };
4181 #endif
4182
4183 /*
4184   yylex
4185
4186   Works out what to call the token just pulled out of the input
4187   stream.  The yacc parser takes care of taking the ops we return and
4188   stitching them into a tree.
4189
4190   Returns:
4191     PRIVATEREF
4192
4193   Structure:
4194       if read an identifier
4195           if we're in a my declaration
4196               croak if they tried to say my($foo::bar)
4197               build the ops for a my() declaration
4198           if it's an access to a my() variable
4199               are we in a sort block?
4200                   croak if my($a); $a <=> $b
4201               build ops for access to a my() variable
4202           if in a dq string, and they've said @foo and we can't find @foo
4203               croak
4204           build ops for a bareword
4205       if we already built the token before, use it.
4206 */
4207
4208
4209 #ifdef __SC__
4210 #pragma segment Perl_yylex
4211 #endif
4212 int
4213 Perl_yylex(pTHX)
4214 {
4215     dVAR;
4216     register char *s = PL_bufptr;
4217     register char *d;
4218     STRLEN len;
4219     bool bof = FALSE;
4220     U32 fake_eof = 0;
4221
4222     /* orig_keyword, gvp, and gv are initialized here because
4223      * jump to the label just_a_word_zero can bypass their
4224      * initialization later. */
4225     I32 orig_keyword = 0;
4226     GV *gv = NULL;
4227     GV **gvp = NULL;
4228
4229     DEBUG_T( {
4230         SV* tmp = newSVpvs("");
4231         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4232             (IV)CopLINE(PL_curcop),
4233             lex_state_names[PL_lex_state],
4234             exp_name[PL_expect],
4235             pv_display(tmp, s, strlen(s), 0, 60));
4236         SvREFCNT_dec(tmp);
4237     } );
4238     /* check if there's an identifier for us to look at */
4239     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4240         return REPORT(S_pending_ident(aTHX));
4241
4242     /* no identifier pending identification */
4243
4244     switch (PL_lex_state) {
4245 #ifdef COMMENTARY
4246     case LEX_NORMAL:            /* Some compilers will produce faster */
4247     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4248         break;
4249 #endif
4250
4251     /* when we've already built the next token, just pull it out of the queue */
4252     case LEX_KNOWNEXT:
4253 #ifdef PERL_MAD
4254         PL_lasttoke--;
4255         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4256         if (PL_madskills) {
4257             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4258             PL_nexttoke[PL_lasttoke].next_mad = 0;
4259             if (PL_thismad && PL_thismad->mad_key == '_') {
4260                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4261                 PL_thismad->mad_val = 0;
4262                 mad_free(PL_thismad);
4263                 PL_thismad = 0;
4264             }
4265         }
4266         if (!PL_lasttoke) {
4267             PL_lex_state = PL_lex_defer;
4268             PL_expect = PL_lex_expect;
4269             PL_lex_defer = LEX_NORMAL;
4270             if (!PL_nexttoke[PL_lasttoke].next_type)
4271                 return yylex();
4272         }
4273 #else
4274         PL_nexttoke--;
4275         pl_yylval = PL_nextval[PL_nexttoke];
4276         if (!PL_nexttoke) {
4277             PL_lex_state = PL_lex_defer;
4278             PL_expect = PL_lex_expect;
4279             PL_lex_defer = LEX_NORMAL;
4280         }
4281 #endif
4282 #ifdef PERL_MAD
4283         /* FIXME - can these be merged?  */
4284         return(PL_nexttoke[PL_lasttoke].next_type);
4285 #else
4286         return REPORT(PL_nexttype[PL_nexttoke]);
4287 #endif
4288
4289     /* interpolated case modifiers like \L \U, including \Q and \E.
4290        when we get here, PL_bufptr is at the \
4291     */
4292     case LEX_INTERPCASEMOD:
4293 #ifdef DEBUGGING
4294         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4295             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4296 #endif
4297         /* handle \E or end of string */
4298         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4299             /* if at a \E */
4300             if (PL_lex_casemods) {
4301                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4302                 PL_lex_casestack[PL_lex_casemods] = '\0';
4303
4304                 if (PL_bufptr != PL_bufend
4305                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4306                     PL_bufptr += 2;
4307                     PL_lex_state = LEX_INTERPCONCAT;
4308 #ifdef PERL_MAD
4309                     if (PL_madskills)
4310                         PL_thistoken = newSVpvs("\\E");
4311 #endif
4312                 }
4313                 return REPORT(')');
4314             }
4315 #ifdef PERL_MAD
4316             while (PL_bufptr != PL_bufend &&
4317               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4318                 if (!PL_thiswhite)
4319                     PL_thiswhite = newSVpvs("");
4320                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4321                 PL_bufptr += 2;
4322             }
4323 #else
4324             if (PL_bufptr != PL_bufend)
4325                 PL_bufptr += 2;
4326 #endif
4327             PL_lex_state = LEX_INTERPCONCAT;
4328             return yylex();
4329         }
4330         else {
4331             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4332               "### Saw case modifier\n"); });
4333             s = PL_bufptr + 1;
4334             if (s[1] == '\\' && s[2] == 'E') {
4335 #ifdef PERL_MAD
4336                 if (!PL_thiswhite)
4337                     PL_thiswhite = newSVpvs("");
4338                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4339 #endif
4340                 PL_bufptr = s + 3;
4341                 PL_lex_state = LEX_INTERPCONCAT;
4342                 return yylex();
4343             }
4344             else {
4345                 I32 tmp;
4346                 if (!PL_madskills) /* when just compiling don't need correct */
4347                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4348                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4349                 if ((*s == 'L' || *s == 'U') &&
4350                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4351                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4352                     return REPORT(')');
4353                 }
4354                 if (PL_lex_casemods > 10)
4355                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4356                 PL_lex_casestack[PL_lex_casemods++] = *s;
4357                 PL_lex_casestack[PL_lex_casemods] = '\0';
4358                 PL_lex_state = LEX_INTERPCONCAT;
4359                 start_force(PL_curforce);
4360                 NEXTVAL_NEXTTOKE.ival = 0;
4361                 force_next('(');
4362                 start_force(PL_curforce);
4363                 if (*s == 'l')
4364                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4365                 else if (*s == 'u')
4366                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4367                 else if (*s == 'L')
4368                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4369                 else if (*s == 'U')
4370                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4371                 else if (*s == 'Q')
4372                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4373                 else
4374                     Perl_croak(aTHX_ "panic: yylex");
4375                 if (PL_madskills) {
4376                     SV* const tmpsv = newSVpvs("\\ ");
4377                     /* replace the space with the character we want to escape
4378                      */
4379                     SvPVX(tmpsv)[1] = *s;
4380                     curmad('_', tmpsv);
4381                 }
4382                 PL_bufptr = s + 1;
4383             }
4384             force_next(FUNC);
4385             if (PL_lex_starts) {
4386                 s = PL_bufptr;
4387                 PL_lex_starts = 0;
4388 #ifdef PERL_MAD
4389                 if (PL_madskills) {
4390                     if (PL_thistoken)
4391                         sv_free(PL_thistoken);
4392                     PL_thistoken = newSVpvs("");
4393                 }
4394 #endif
4395                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4396                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4397                     OPERATOR(',');
4398                 else
4399                     Aop(OP_CONCAT);
4400             }
4401             else
4402                 return yylex();
4403         }
4404
4405     case LEX_INTERPPUSH:
4406         return REPORT(sublex_push());
4407
4408     case LEX_INTERPSTART:
4409         if (PL_bufptr == PL_bufend)
4410             return REPORT(sublex_done());
4411         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4412               "### Interpolated variable\n"); });
4413         PL_expect = XTERM;
4414         PL_lex_dojoin = (*PL_bufptr == '@');
4415         PL_lex_state = LEX_INTERPNORMAL;
4416         if (PL_lex_dojoin) {
4417             start_force(PL_curforce);
4418             NEXTVAL_NEXTTOKE.ival = 0;
4419             force_next(',');
4420             start_force(PL_curforce);
4421             force_ident("\"", '$');
4422             start_force(PL_curforce);
4423             NEXTVAL_NEXTTOKE.ival = 0;
4424             force_next('$');
4425             start_force(PL_curforce);
4426             NEXTVAL_NEXTTOKE.ival = 0;
4427             force_next('(');
4428             start_force(PL_curforce);
4429             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4430             force_next(FUNC);
4431         }
4432         if (PL_lex_starts++) {
4433             s = PL_bufptr;
4434 #ifdef PERL_MAD
4435             if (PL_madskills) {
4436                 if (PL_thistoken)
4437                     sv_free(PL_thistoken);
4438                 PL_thistoken = newSVpvs("");
4439             }
4440 #endif
4441             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4442             if (!PL_lex_casemods && PL_lex_inpat)
4443                 OPERATOR(',');
4444             else
4445                 Aop(OP_CONCAT);
4446         }
4447         return yylex();
4448
4449     case LEX_INTERPENDMAYBE:
4450         if (intuit_more(PL_bufptr)) {
4451             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4452             break;
4453         }
4454         /* FALL THROUGH */
4455
4456     case LEX_INTERPEND:
4457         if (PL_lex_dojoin) {
4458             PL_lex_dojoin = FALSE;
4459             PL_lex_state = LEX_INTERPCONCAT;
4460 #ifdef PERL_MAD
4461             if (PL_madskills) {
4462                 if (PL_thistoken)
4463                     sv_free(PL_thistoken);
4464                 PL_thistoken = newSVpvs("");
4465             }
4466 #endif
4467             return REPORT(')');
4468         }
4469         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4470             && SvEVALED(PL_lex_repl))
4471         {
4472             if (PL_bufptr != PL_bufend)
4473                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4474             PL_lex_repl = NULL;
4475         }
4476         /* FALLTHROUGH */
4477     case LEX_INTERPCONCAT:
4478 #ifdef DEBUGGING
4479         if (PL_lex_brackets)
4480             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4481 #endif
4482         if (PL_bufptr == PL_bufend)
4483             return REPORT(sublex_done());
4484
4485         if (SvIVX(PL_linestr) == '\'') {
4486             SV *sv = newSVsv(PL_linestr);
4487             if (!PL_lex_inpat)
4488                 sv = tokeq(sv);
4489             else if ( PL_hints & HINT_NEW_RE )
4490                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4491             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4492             s = PL_bufend;
4493         }
4494         else {
4495             s = scan_const(PL_bufptr);
4496             if (*s == '\\')
4497                 PL_lex_state = LEX_INTERPCASEMOD;
4498             else
4499                 PL_lex_state = LEX_INTERPSTART;
4500         }
4501
4502         if (s != PL_bufptr) {
4503             start_force(PL_curforce);
4504             if (PL_madskills) {
4505                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4506             }
4507             NEXTVAL_NEXTTOKE = pl_yylval;
4508             PL_expect = XTERM;
4509             force_next(THING);
4510             if (PL_lex_starts++) {
4511 #ifdef PERL_MAD
4512                 if (PL_madskills) {
4513                     if (PL_thistoken)
4514                         sv_free(PL_thistoken);
4515                     PL_thistoken = newSVpvs("");
4516                 }
4517 #endif
4518                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4519                 if (!PL_lex_casemods && PL_lex_inpat)
4520                     OPERATOR(',');
4521                 else
4522                     Aop(OP_CONCAT);
4523             }
4524             else {
4525                 PL_bufptr = s;
4526                 return yylex();
4527             }
4528         }
4529
4530         return yylex();
4531     case LEX_FORMLINE:
4532         PL_lex_state = LEX_NORMAL;
4533         s = scan_formline(PL_bufptr);
4534         if (!PL_lex_formbrack)
4535             goto rightbracket;
4536         OPERATOR(';');
4537     }
4538
4539     s = PL_bufptr;
4540     PL_oldoldbufptr = PL_oldbufptr;
4541     PL_oldbufptr = s;
4542
4543   retry:
4544 #ifdef PERL_MAD
4545     if (PL_thistoken) {
4546         sv_free(PL_thistoken);
4547         PL_thistoken = 0;
4548     }
4549     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4550 #endif
4551     switch (*s) {
4552     default:
4553         if (isIDFIRST_lazy_if(s,UTF))
4554             goto keylookup;
4555         {
4556         unsigned char c = *s;
4557         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4558         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4559             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4560         } else {
4561             d = PL_linestart;
4562         }
4563         *s = '\0';
4564         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4565     }
4566     case 4:
4567     case 26:
4568         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4569     case 0:
4570 #ifdef PERL_MAD
4571         if (PL_madskills)
4572             PL_faketokens = 0;
4573 #endif
4574         if (!PL_rsfp) {
4575             PL_last_uni = 0;
4576             PL_last_lop = 0;
4577             if (PL_lex_brackets) {
4578                 yyerror((const char *)
4579                         (PL_lex_formbrack
4580                          ? "Format not terminated"
4581                          : "Missing right curly or square bracket"));
4582             }
4583             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4584                         "### Tokener got EOF\n");
4585             } );
4586             TOKEN(0);
4587         }
4588         if (s++ < PL_bufend)
4589             goto retry;                 /* ignore stray nulls */
4590         PL_last_uni = 0;
4591         PL_last_lop = 0;
4592         if (!PL_in_eval && !PL_preambled) {
4593             PL_preambled = TRUE;
4594 #ifdef PERL_MAD
4595             if (PL_madskills)
4596                 PL_faketokens = 1;
4597 #endif
4598             if (PL_perldb) {
4599                 /* Generate a string of Perl code to load the debugger.
4600                  * If PERL5DB is set, it will return the contents of that,
4601                  * otherwise a compile-time require of perl5db.pl.  */
4602
4603                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4604
4605                 if (pdb) {
4606                     sv_setpv(PL_linestr, pdb);
4607                     sv_catpvs(PL_linestr,";");
4608                 } else {
4609                     SETERRNO(0,SS_NORMAL);
4610                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4611                 }
4612             } else
4613                 sv_setpvs(PL_linestr,"");
4614             if (PL_preambleav) {
4615                 SV **svp = AvARRAY(PL_preambleav);
4616                 SV **const end = svp + AvFILLp(PL_preambleav);
4617                 while(svp <= end) {
4618                     sv_catsv(PL_linestr, *svp);
4619                     ++svp;
4620                     sv_catpvs(PL_linestr, ";");
4621                 }
4622                 sv_free(MUTABLE_SV(PL_preambleav));
4623                 PL_preambleav = NULL;
4624             }
4625             if (PL_minus_E)
4626                 sv_catpvs(PL_linestr,
4627                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4628             if (PL_minus_n || PL_minus_p) {
4629                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4630                 if (PL_minus_l)
4631                     sv_catpvs(PL_linestr,"chomp;");
4632                 if (PL_minus_a) {
4633                     if (PL_minus_F) {
4634                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4635                              || *PL_splitstr == '"')
4636                               && strchr(PL_splitstr + 1, *PL_splitstr))
4637                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4638                         else {
4639                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4640                                bytes can be used as quoting characters.  :-) */
4641                             const char *splits = PL_splitstr;
4642                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4643                             do {
4644                                 /* Need to \ \s  */
4645                                 if (*splits == '\\')
4646                                     sv_catpvn(PL_linestr, splits, 1);
4647                                 sv_catpvn(PL_linestr, splits, 1);
4648                             } while (*splits++);
4649                             /* This loop will embed the trailing NUL of
4650                                PL_linestr as the last thing it does before
4651                                terminating.  */
4652                             sv_catpvs(PL_linestr, ");");
4653                         }
4654                     }
4655                     else
4656                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4657                 }
4658             }
4659             sv_catpvs(PL_linestr, "\n");
4660             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4661             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4662             PL_last_lop = PL_last_uni = NULL;
4663             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4664                 update_debugger_info(PL_linestr, NULL, 0);
4665             goto retry;
4666         }
4667         do {
4668             fake_eof = 0;
4669             bof = PL_rsfp ? TRUE : FALSE;
4670             if (0) {
4671               fake_eof:
4672                 fake_eof = LEX_FAKE_EOF;
4673             }
4674             PL_bufptr = PL_bufend;
4675             CopLINE_inc(PL_curcop);
4676             if (!lex_next_chunk(fake_eof)) {
4677                 CopLINE_dec(PL_curcop);
4678                 s = PL_bufptr;
4679                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4680             }
4681             CopLINE_dec(PL_curcop);
4682 #ifdef PERL_MAD
4683             if (!PL_rsfp)
4684                 PL_realtokenstart = -1;
4685 #endif
4686             s = PL_bufptr;
4687             /* If it looks like the start of a BOM or raw UTF-16,
4688              * check if it in fact is. */
4689             if (bof && PL_rsfp &&
4690                      (*s == 0 ||
4691                       *(U8*)s == 0xEF ||
4692                       *(U8*)s >= 0xFE ||
4693                       s[1] == 0)) {
4694                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4695                 if (bof) {
4696                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4697                     s = swallow_bom((U8*)s);
4698                 }
4699             }
4700             if (PL_doextract) {
4701                 /* Incest with pod. */
4702 #ifdef PERL_MAD
4703                 if (PL_madskills)
4704                     sv_catsv(PL_thiswhite, PL_linestr);
4705 #endif
4706                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4707                     sv_setpvs(PL_linestr, "");
4708                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4709                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4710                     PL_last_lop = PL_last_uni = NULL;
4711                     PL_doextract = FALSE;
4712                 }
4713             }
4714             if (PL_rsfp)
4715                 incline(s);
4716         } while (PL_doextract);
4717         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4718         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4719         PL_last_lop = PL_last_uni = NULL;
4720         if (CopLINE(PL_curcop) == 1) {
4721             while (s < PL_bufend && isSPACE(*s))
4722                 s++;
4723             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4724                 s++;
4725 #ifdef PERL_MAD
4726             if (PL_madskills)
4727                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4728 #endif
4729             d = NULL;
4730             if (!PL_in_eval) {
4731                 if (*s == '#' && *(s+1) == '!')
4732                     d = s + 2;
4733 #ifdef ALTERNATE_SHEBANG
4734                 else {
4735                     static char const as[] = ALTERNATE_SHEBANG;
4736                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4737                         d = s + (sizeof(as) - 1);
4738                 }
4739 #endif /* ALTERNATE_SHEBANG */
4740             }
4741             if (d) {
4742                 char *ipath;
4743                 char *ipathend;
4744
4745                 while (isSPACE(*d))
4746                     d++;
4747                 ipath = d;
4748                 while (*d && !isSPACE(*d))
4749                     d++;
4750                 ipathend = d;
4751
4752 #ifdef ARG_ZERO_IS_SCRIPT
4753                 if (ipathend > ipath) {
4754                     /*
4755                      * HP-UX (at least) sets argv[0] to the script name,
4756                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4757                      * at least, set argv[0] to the basename of the Perl
4758                      * interpreter. So, having found "#!", we'll set it right.
4759                      */
4760                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4761                                                     SVt_PV)); /* $^X */
4762                     assert(SvPOK(x) || SvGMAGICAL(x));
4763                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4764                         sv_setpvn(x, ipath, ipathend - ipath);
4765                         SvSETMAGIC(x);
4766                     }
4767                     else {
4768                         STRLEN blen;
4769                         STRLEN llen;
4770                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4771                         const char * const lstart = SvPV_const(x,llen);
4772                         if (llen < blen) {
4773                             bstart += blen - llen;
4774                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4775                                 sv_setpvn(x, ipath, ipathend - ipath);
4776                                 SvSETMAGIC(x);
4777                             }
4778                         }
4779                     }
4780                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4781                 }
4782 #endif /* ARG_ZERO_IS_SCRIPT */
4783
4784                 /*
4785                  * Look for options.
4786                  */
4787                 d = instr(s,"perl -");
4788                 if (!d) {
4789                     d = instr(s,"perl");
4790 #if defined(DOSISH)
4791                     /* avoid getting into infinite loops when shebang
4792                      * line contains "Perl" rather than "perl" */
4793                     if (!d) {
4794                         for (d = ipathend-4; d >= ipath; --d) {
4795                             if ((*d == 'p' || *d == 'P')
4796                                 && !ibcmp(d, "perl", 4))
4797                             {
4798                                 break;
4799                             }
4800                         }
4801                         if (d < ipath)
4802                             d = NULL;
4803                     }
4804 #endif
4805                 }
4806 #ifdef ALTERNATE_SHEBANG
4807                 /*
4808                  * If the ALTERNATE_SHEBANG on this system starts with a
4809                  * character that can be part of a Perl expression, then if
4810                  * we see it but not "perl", we're probably looking at the
4811                  * start of Perl code, not a request to hand off to some
4812                  * other interpreter.  Similarly, if "perl" is there, but
4813                  * not in the first 'word' of the line, we assume the line
4814                  * contains the start of the Perl program.
4815                  */
4816                 if (d && *s != '#') {
4817                     const char *c = ipath;
4818                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4819                         c++;
4820                     if (c < d)
4821                         d = NULL;       /* "perl" not in first word; ignore */
4822                     else
4823                         *s = '#';       /* Don't try to parse shebang line */
4824                 }
4825 #endif /* ALTERNATE_SHEBANG */
4826                 if (!d &&
4827                     *s == '#' &&
4828                     ipathend > ipath &&
4829                     !PL_minus_c &&
4830                     !instr(s,"indir") &&
4831                     instr(PL_origargv[0],"perl"))
4832                 {
4833                     dVAR;
4834                     char **newargv;
4835
4836                     *ipathend = '\0';
4837                     s = ipathend + 1;
4838                     while (s < PL_bufend && isSPACE(*s))
4839                         s++;
4840                     if (s < PL_bufend) {
4841                         Newx(newargv,PL_origargc+3,char*);
4842                         newargv[1] = s;
4843                         while (s < PL_bufend && !isSPACE(*s))
4844                             s++;
4845                         *s = '\0';
4846                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4847                     }
4848                     else
4849                         newargv = PL_origargv;
4850                     newargv[0] = ipath;
4851                     PERL_FPU_PRE_EXEC
4852                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4853                     PERL_FPU_POST_EXEC
4854                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4855                 }
4856                 if (d) {
4857                     while (*d && !isSPACE(*d))
4858                         d++;
4859                     while (SPACE_OR_TAB(*d))
4860                         d++;
4861
4862                     if (*d++ == '-') {
4863                         const bool switches_done = PL_doswitches;
4864                         const U32 oldpdb = PL_perldb;
4865                         const bool oldn = PL_minus_n;
4866                         const bool oldp = PL_minus_p;
4867                         const char *d1 = d;
4868
4869                         do {
4870                             bool baduni = FALSE;
4871                             if (*d1 == 'C') {
4872                                 const char *d2 = d1 + 1;
4873                                 if (parse_unicode_opts((const char **)&d2)
4874                                     != PL_unicode)
4875                                     baduni = TRUE;
4876                             }
4877                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4878                                 const char * const m = d1;
4879                                 while (*d1 && !isSPACE(*d1))
4880                                     d1++;
4881                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4882                                       (int)(d1 - m), m);
4883                             }
4884                             d1 = moreswitches(d1);
4885                         } while (d1);
4886                         if (PL_doswitches && !switches_done) {
4887                             int argc = PL_origargc;
4888                             char **argv = PL_origargv;
4889                             do {
4890                                 argc--,argv++;
4891                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4892                             init_argv_symbols(argc,argv);
4893                         }
4894                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4895                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4896                               /* if we have already added "LINE: while (<>) {",
4897                                  we must not do it again */
4898                         {
4899                             sv_setpvs(PL_linestr, "");
4900                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4901                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4902                             PL_last_lop = PL_last_uni = NULL;
4903                             PL_preambled = FALSE;
4904                             if (PERLDB_LINE || PERLDB_SAVESRC)
4905                                 (void)gv_fetchfile(PL_origfilename);
4906                             goto retry;
4907                         }
4908                     }
4909                 }
4910             }
4911         }
4912         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4913             PL_bufptr = s;
4914             PL_lex_state = LEX_FORMLINE;
4915             return yylex();
4916         }
4917         goto retry;
4918     case '\r':
4919 #ifdef PERL_STRICT_CR
4920         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4921         Perl_croak(aTHX_
4922       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4923 #endif
4924     case ' ': case '\t': case '\f': case 013:
4925 #ifdef PERL_MAD
4926         PL_realtokenstart = -1;
4927         if (!PL_thiswhite)
4928             PL_thiswhite = newSVpvs("");
4929         sv_catpvn(PL_thiswhite, s, 1);
4930 #endif
4931         s++;
4932         goto retry;
4933     case '#':
4934     case '\n':
4935 #ifdef PERL_MAD
4936         PL_realtokenstart = -1;
4937         if (PL_madskills)
4938             PL_faketokens = 0;
4939 #endif
4940         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4941             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4942                 /* handle eval qq[#line 1 "foo"\n ...] */
4943                 CopLINE_dec(PL_curcop);
4944                 incline(s);
4945             }
4946             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4947                 s = SKIPSPACE0(s);
4948                 if (!PL_in_eval || PL_rsfp)
4949                     incline(s);
4950             }
4951             else {
4952                 d = s;
4953                 while (d < PL_bufend && *d != '\n')
4954                     d++;
4955                 if (d < PL_bufend)
4956                     d++;
4957                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4958                   Perl_croak(aTHX_ "panic: input overflow");
4959 #ifdef PERL_MAD
4960                 if (PL_madskills)
4961                     PL_thiswhite = newSVpvn(s, d - s);
4962 #endif
4963                 s = d;
4964                 incline(s);
4965             }
4966             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4967                 PL_bufptr = s;
4968                 PL_lex_state = LEX_FORMLINE;
4969                 return yylex();
4970             }
4971         }
4972         else {
4973 #ifdef PERL_MAD
4974             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4975                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4976                     PL_faketokens = 0;
4977                     s = SKIPSPACE0(s);
4978                     TOKEN(PEG); /* make sure any #! line is accessible */
4979                 }
4980                 s = SKIPSPACE0(s);
4981             }
4982             else {
4983 /*              if (PL_madskills && PL_lex_formbrack) { */
4984                     d = s;
4985                     while (d < PL_bufend && *d != '\n')
4986                         d++;
4987                     if (d < PL_bufend)
4988                         d++;
4989                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4990                       Perl_croak(aTHX_ "panic: input overflow");
4991                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4992                         if (!PL_thiswhite)
4993                             PL_thiswhite = newSVpvs("");
4994                         if (CopLINE(PL_curcop) == 1) {
4995                             sv_setpvs(PL_thiswhite, "");
4996                             PL_faketokens = 0;
4997                         }
4998                         sv_catpvn(PL_thiswhite, s, d - s);
4999                     }
5000                     s = d;
5001 /*              }
5002                 *s = '\0';
5003                 PL_bufend = s; */
5004             }
5005 #else
5006             *s = '\0';
5007             PL_bufend = s;
5008 #endif
5009         }
5010         goto retry;
5011     case '-':
5012         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5013             I32 ftst = 0;
5014             char tmp;
5015
5016             s++;
5017             PL_bufptr = s;
5018             tmp = *s++;
5019
5020             while (s < PL_bufend && SPACE_OR_TAB(*s))
5021                 s++;
5022
5023             if (strnEQ(s,"=>",2)) {
5024                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5025                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5026                 OPERATOR('-');          /* unary minus */
5027             }
5028             PL_last_uni = PL_oldbufptr;
5029             switch (tmp) {
5030             case 'r': ftst = OP_FTEREAD;        break;
5031             case 'w': ftst = OP_FTEWRITE;       break;
5032             case 'x': ftst = OP_FTEEXEC;        break;
5033             case 'o': ftst = OP_FTEOWNED;       break;
5034             case 'R': ftst = OP_FTRREAD;        break;
5035             case 'W': ftst = OP_FTRWRITE;       break;
5036             case 'X': ftst = OP_FTREXEC;        break;
5037             case 'O': ftst = OP_FTROWNED;       break;
5038             case 'e': ftst = OP_FTIS;           break;
5039             case 'z': ftst = OP_FTZERO;         break;
5040             case 's': ftst = OP_FTSIZE;         break;
5041             case 'f': ftst = OP_FTFILE;         break;
5042             case 'd': ftst = OP_FTDIR;          break;
5043             case 'l': ftst = OP_FTLINK;         break;
5044             case 'p': ftst = OP_FTPIPE;         break;
5045             case 'S': ftst = OP_FTSOCK;         break;
5046             case 'u': ftst = OP_FTSUID;         break;
5047             case 'g': ftst = OP_FTSGID;         break;
5048             case 'k': ftst = OP_FTSVTX;         break;
5049             case 'b': ftst = OP_FTBLK;          break;
5050             case 'c': ftst = OP_FTCHR;          break;
5051             case 't': ftst = OP_FTTTY;          break;
5052             case 'T': ftst = OP_FTTEXT;         break;
5053             case 'B': ftst = OP_FTBINARY;       break;
5054             case 'M': case 'A': case 'C':
5055                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5056                 switch (tmp) {
5057                 case 'M': ftst = OP_FTMTIME;    break;
5058                 case 'A': ftst = OP_FTATIME;    break;
5059                 case 'C': ftst = OP_FTCTIME;    break;
5060                 default:                        break;
5061                 }
5062                 break;
5063             default:
5064                 break;
5065             }
5066             if (ftst) {
5067                 PL_last_lop_op = (OPCODE)ftst;
5068                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5069                         "### Saw file test %c\n", (int)tmp);
5070                 } );
5071                 FTST(ftst);
5072             }
5073             else {
5074                 /* Assume it was a minus followed by a one-letter named
5075                  * subroutine call (or a -bareword), then. */
5076                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5077                         "### '-%c' looked like a file test but was not\n",
5078                         (int) tmp);
5079                 } );
5080                 s = --PL_bufptr;
5081             }
5082         }
5083         {
5084             const char tmp = *s++;
5085             if (*s == tmp) {
5086                 s++;
5087                 if (PL_expect == XOPERATOR)
5088                     TERM(POSTDEC);
5089                 else
5090                     OPERATOR(PREDEC);
5091             }
5092             else if (*s == '>') {
5093                 s++;
5094                 s = SKIPSPACE1(s);
5095                 if (isIDFIRST_lazy_if(s,UTF)) {
5096                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5097                     TOKEN(ARROW);
5098                 }
5099                 else if (*s == '$')
5100                     OPERATOR(ARROW);
5101                 else
5102                     TERM(ARROW);
5103             }
5104             if (PL_expect == XOPERATOR)
5105                 Aop(OP_SUBTRACT);
5106             else {
5107                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5108                     check_uni();
5109                 OPERATOR('-');          /* unary minus */
5110             }
5111         }
5112
5113     case '+':
5114         {
5115             const char tmp = *s++;
5116             if (*s == tmp) {
5117                 s++;
5118                 if (PL_expect == XOPERATOR)
5119                     TERM(POSTINC);
5120                 else
5121                     OPERATOR(PREINC);
5122             }
5123             if (PL_expect == XOPERATOR)
5124                 Aop(OP_ADD);
5125             else {
5126                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5127                     check_uni();
5128                 OPERATOR('+');
5129             }
5130         }
5131
5132     case '*':
5133         if (PL_expect != XOPERATOR) {
5134             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5135             PL_expect = XOPERATOR;
5136             force_ident(PL_tokenbuf, '*');
5137             if (!*PL_tokenbuf)
5138                 PREREF('*');
5139             TERM('*');
5140         }
5141         s++;
5142         if (*s == '*') {
5143             s++;
5144             PWop(OP_POW);
5145         }
5146         Mop(OP_MULTIPLY);
5147
5148     case '%':
5149         if (PL_expect == XOPERATOR) {
5150             ++s;
5151             Mop(OP_MODULO);
5152         }
5153         PL_tokenbuf[0] = '%';
5154         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5155                 sizeof PL_tokenbuf - 1, FALSE);
5156         if (!PL_tokenbuf[1]) {
5157             PREREF('%');
5158         }
5159         PL_pending_ident = '%';
5160         TERM('%');
5161
5162     case '^':
5163         s++;
5164         BOop(OP_BIT_XOR);
5165     case '[':
5166         PL_lex_brackets++;
5167         {
5168             const char tmp = *s++;
5169             OPERATOR(tmp);
5170         }
5171     case '~':
5172         if (s[1] == '~'
5173             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5174         {
5175             s += 2;
5176             Eop(OP_SMARTMATCH);
5177         }
5178     case ',':
5179         {
5180             const char tmp = *s++;
5181             OPERATOR(tmp);
5182         }
5183     case ':':
5184         if (s[1] == ':') {
5185             len = 0;
5186             goto just_a_word_zero_gv;
5187         }
5188         s++;
5189         switch (PL_expect) {
5190             OP *attrs;
5191 #ifdef PERL_MAD
5192             I32 stuffstart;
5193 #endif
5194         case XOPERATOR:
5195             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5196                 break;
5197             PL_bufptr = s;      /* update in case we back off */
5198             if (*s == '=') {
5199                 deprecate(":= for an empty attribute list");
5200             }
5201             goto grabattrs;
5202         case XATTRBLOCK:
5203             PL_expect = XBLOCK;
5204             goto grabattrs;
5205         case XATTRTERM:
5206             PL_expect = XTERMBLOCK;
5207          grabattrs:
5208 #ifdef PERL_MAD
5209             stuffstart = s - SvPVX(PL_linestr) - 1;
5210 #endif
5211             s = PEEKSPACE(s);
5212             attrs = NULL;
5213             while (isIDFIRST_lazy_if(s,UTF)) {
5214                 I32 tmp;
5215                 SV *sv;
5216                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5217                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5218                     if (tmp < 0) tmp = -tmp;
5219                     switch (tmp) {
5220                     case KEY_or:
5221                     case KEY_and:
5222                     case KEY_for:
5223                     case KEY_foreach:
5224                     case KEY_unless:
5225                     case KEY_if:
5226                     case KEY_while:
5227                     case KEY_until:
5228                         goto got_attrs;
5229                     default:
5230                         break;
5231                     }
5232                 }
5233                 sv = newSVpvn(s, len);
5234                 if (*d == '(') {
5235                     d = scan_str(d,TRUE,TRUE);
5236                     if (!d) {
5237                         /* MUST advance bufptr here to avoid bogus
5238                            "at end of line" context messages from yyerror().
5239                          */
5240                         PL_bufptr = s + len;
5241                         yyerror("Unterminated attribute parameter in attribute list");
5242                         if (attrs)
5243                             op_free(attrs);
5244                         sv_free(sv);
5245                         return REPORT(0);       /* EOF indicator */
5246                     }
5247                 }
5248                 if (PL_lex_stuff) {
5249                     sv_catsv(sv, PL_lex_stuff);
5250                     attrs = op_append_elem(OP_LIST, attrs,
5251                                         newSVOP(OP_CONST, 0, sv));
5252                     SvREFCNT_dec(PL_lex_stuff);
5253                     PL_lex_stuff = NULL;
5254                 }
5255                 else {
5256                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5257                         sv_free(sv);
5258                         if (PL_in_my == KEY_our) {
5259                             deprecate(":unique");
5260                         }
5261                         else
5262                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5263                     }
5264
5265                     /* NOTE: any CV attrs applied here need to be part of
5266                        the CVf_BUILTIN_ATTRS define in cv.h! */
5267                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5268                         sv_free(sv);
5269                         CvLVALUE_on(PL_compcv);
5270                     }
5271                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5272                         sv_free(sv);
5273                         deprecate(":locked");
5274                     }
5275                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5276                         sv_free(sv);
5277                         CvMETHOD_on(PL_compcv);
5278                     }
5279                     /* After we've set the flags, it could be argued that
5280                        we don't need to do the attributes.pm-based setting
5281                        process, and shouldn't bother appending recognized
5282                        flags.  To experiment with that, uncomment the
5283                        following "else".  (Note that's already been
5284                        uncommented.  That keeps the above-applied built-in
5285                        attributes from being intercepted (and possibly
5286                        rejected) by a package's attribute routines, but is
5287                        justified by the performance win for the common case
5288                        of applying only built-in attributes.) */
5289                     else
5290                         attrs = op_append_elem(OP_LIST, attrs,
5291                                             newSVOP(OP_CONST, 0,
5292                                                     sv));
5293                 }
5294                 s = PEEKSPACE(d);
5295                 if (*s == ':' && s[1] != ':')
5296                     s = PEEKSPACE(s+1);
5297                 else if (s == d)
5298                     break;      /* require real whitespace or :'s */
5299                 /* XXX losing whitespace on sequential attributes here */
5300             }
5301             {
5302                 const char tmp
5303                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5304                 if (*s != ';' && *s != '}' && *s != tmp
5305                     && (tmp != '=' || *s != ')')) {
5306                     const char q = ((*s == '\'') ? '"' : '\'');
5307                     /* If here for an expression, and parsed no attrs, back
5308                        off. */
5309                     if (tmp == '=' && !attrs) {
5310                         s = PL_bufptr;
5311                         break;
5312                     }
5313                     /* MUST advance bufptr here to avoid bogus "at end of line"
5314                        context messages from yyerror().
5315                     */
5316                     PL_bufptr = s;
5317                     yyerror( (const char *)
5318                              (*s
5319                               ? Perl_form(aTHX_ "Invalid separator character "
5320                                           "%c%c%c in attribute list", q, *s, q)
5321                               : "Unterminated attribute list" ) );
5322                     if (attrs)
5323                         op_free(attrs);
5324                     OPERATOR(':');
5325                 }
5326             }
5327         got_attrs:
5328             if (attrs) {
5329                 start_force(PL_curforce);
5330                 NEXTVAL_NEXTTOKE.opval = attrs;
5331                 CURMAD('_', PL_nextwhite);
5332                 force_next(THING);
5333             }
5334 #ifdef PERL_MAD
5335             if (PL_madskills) {
5336                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5337                                      (s - SvPVX(PL_linestr)) - stuffstart);
5338             }
5339 #endif
5340             TOKEN(COLONATTR);
5341         }
5342         OPERATOR(':');
5343     case '(':
5344         s++;
5345         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5346             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5347         else
5348             PL_expect = XTERM;
5349         s = SKIPSPACE1(s);
5350         TOKEN('(');
5351     case ';':
5352         CLINE;
5353         {
5354             const char tmp = *s++;
5355             OPERATOR(tmp);
5356         }
5357     case ')':
5358         {
5359             const char tmp = *s++;
5360             s = SKIPSPACE1(s);
5361             if (*s == '{')
5362                 PREBLOCK(tmp);
5363             TERM(tmp);
5364         }
5365     case ']':
5366         s++;
5367         if (PL_lex_brackets <= 0)
5368             yyerror("Unmatched right square bracket");
5369         else
5370             --PL_lex_brackets;
5371         if (PL_lex_state == LEX_INTERPNORMAL) {
5372             if (PL_lex_brackets == 0) {
5373                 if (*s == '-' && s[1] == '>')
5374                     PL_lex_state = LEX_INTERPENDMAYBE;
5375                 else if (*s != '[' && *s != '{')
5376                     PL_lex_state = LEX_INTERPEND;
5377             }
5378         }
5379         TERM(']');
5380     case '{':
5381       leftbracket:
5382         s++;
5383         if (PL_lex_brackets > 100) {
5384             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5385         }
5386         switch (PL_expect) {
5387         case XTERM:
5388             if (PL_lex_formbrack) {
5389                 s--;
5390                 PRETERMBLOCK(DO);
5391             }
5392             if (PL_oldoldbufptr == PL_last_lop)
5393                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5394             else
5395                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5396             OPERATOR(HASHBRACK);
5397         case XOPERATOR:
5398             while (s < PL_bufend && SPACE_OR_TAB(*s))
5399                 s++;
5400             d = s;
5401             PL_tokenbuf[0] = '\0';
5402             if (d < PL_bufend && *d == '-') {
5403                 PL_tokenbuf[0] = '-';
5404                 d++;
5405                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5406                     d++;
5407             }
5408             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5409                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5410                               FALSE, &len);
5411                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5412                     d++;
5413                 if (*d == '}') {
5414                     const char minus = (PL_tokenbuf[0] == '-');
5415                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5416                     if (minus)
5417                         force_next('-');
5418                 }
5419             }
5420             /* FALL THROUGH */
5421         case XATTRBLOCK:
5422         case XBLOCK:
5423             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5424             PL_expect = XSTATE;
5425             break;
5426         case XATTRTERM:
5427         case XTERMBLOCK:
5428             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5429             PL_expect = XSTATE;
5430             break;
5431         default: {
5432                 const char *t;
5433                 if (PL_oldoldbufptr == PL_last_lop)
5434                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5435                 else
5436                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5437                 s = SKIPSPACE1(s);
5438                 if (*s == '}') {
5439                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5440                         PL_expect = XTERM;
5441                         /* This hack is to get the ${} in the message. */
5442                         PL_bufptr = s+1;
5443                         yyerror("syntax error");
5444                         break;
5445                     }
5446                     OPERATOR(HASHBRACK);
5447                 }
5448                 /* This hack serves to disambiguate a pair of curlies
5449                  * as being a block or an anon hash.  Normally, expectation
5450                  * determines that, but in cases where we're not in a
5451                  * position to expect anything in particular (like inside
5452                  * eval"") we have to resolve the ambiguity.  This code
5453                  * covers the case where the first term in the curlies is a
5454                  * quoted string.  Most other cases need to be explicitly
5455                  * disambiguated by prepending a "+" before the opening
5456                  * curly in order to force resolution as an anon hash.
5457                  *
5458                  * XXX should probably propagate the outer expectation
5459                  * into eval"" to rely less on this hack, but that could
5460                  * potentially break current behavior of eval"".
5461                  * GSAR 97-07-21
5462                  */
5463                 t = s;
5464                 if (*s == '\'' || *s == '"' || *s == '`') {
5465                     /* common case: get past first string, handling escapes */
5466                     for (t++; t < PL_bufend && *t != *s;)
5467                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5468                             t++;
5469                     t++;
5470                 }
5471                 else if (*s == 'q') {
5472                     if (++t < PL_bufend
5473                         && (!isALNUM(*t)
5474                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5475                                 && !isALNUM(*t))))
5476                     {
5477                         /* skip q//-like construct */
5478                         const char *tmps;
5479                         char open, close, term;
5480                         I32 brackets = 1;
5481
5482                         while (t < PL_bufend && isSPACE(*t))
5483                             t++;
5484                         /* check for q => */
5485                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5486                             OPERATOR(HASHBRACK);
5487                         }
5488                         term = *t;
5489                         open = term;
5490                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5491                             term = tmps[5];
5492                         close = term;
5493                         if (open == close)
5494                             for (t++; t < PL_bufend; t++) {
5495                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5496                                     t++;
5497                                 else if (*t == open)
5498                                     break;
5499                             }
5500                         else {
5501                             for (t++; t < PL_bufend; t++) {
5502                                 if (*t == '\\' && t+1 < PL_bufend)
5503                                     t++;
5504                                 else if (*t == close && --brackets <= 0)
5505                                     break;
5506                                 else if (*t == open)
5507                                     brackets++;
5508                             }
5509                         }
5510                         t++;
5511                     }
5512                     else
5513                         /* skip plain q word */
5514                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5515                              t += UTF8SKIP(t);
5516                 }
5517                 else if (isALNUM_lazy_if(t,UTF)) {
5518                     t += UTF8SKIP(t);
5519                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5520                          t += UTF8SKIP(t);
5521                 }
5522                 while (t < PL_bufend && isSPACE(*t))
5523                     t++;
5524                 /* if comma follows first term, call it an anon hash */
5525                 /* XXX it could be a comma expression with loop modifiers */
5526                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5527                                    || (*t == '=' && t[1] == '>')))
5528                     OPERATOR(HASHBRACK);
5529                 if (PL_expect == XREF)
5530                     PL_expect = XTERM;
5531                 else {
5532                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5533                     PL_expect = XSTATE;
5534                 }
5535             }
5536             break;
5537         }
5538         pl_yylval.ival = CopLINE(PL_curcop);
5539         if (isSPACE(*s) || *s == '#')
5540             PL_copline = NOLINE;   /* invalidate current command line number */
5541         TOKEN('{');
5542     case '}':
5543       rightbracket:
5544         s++;
5545         if (PL_lex_brackets <= 0)
5546             yyerror("Unmatched right curly bracket");
5547         else
5548             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5549         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5550             PL_lex_formbrack = 0;
5551         if (PL_lex_state == LEX_INTERPNORMAL) {
5552             if (PL_lex_brackets == 0) {
5553                 if (PL_expect & XFAKEBRACK) {
5554                     PL_expect &= XENUMMASK;
5555                     PL_lex_state = LEX_INTERPEND;
5556                     PL_bufptr = s;
5557 #if 0
5558                     if (PL_madskills) {
5559                         if (!PL_thiswhite)
5560                             PL_thiswhite = newSVpvs("");
5561                         sv_catpvs(PL_thiswhite,"}");
5562                     }
5563 #endif
5564                     return yylex();     /* ignore fake brackets */
5565                 }
5566                 if (*s == '-' && s[1] == '>')
5567                     PL_lex_state = LEX_INTERPENDMAYBE;
5568                 else if (*s != '[' && *s != '{')
5569                     PL_lex_state = LEX_INTERPEND;
5570             }
5571         }
5572         if (PL_expect & XFAKEBRACK) {
5573             PL_expect &= XENUMMASK;
5574             PL_bufptr = s;
5575             return yylex();             /* ignore fake brackets */
5576         }
5577         start_force(PL_curforce);
5578         if (PL_madskills) {
5579             curmad('X', newSVpvn(s-1,1));
5580             CURMAD('_', PL_thiswhite);
5581         }
5582         force_next('}');
5583 #ifdef PERL_MAD
5584         if (!PL_thistoken)
5585             PL_thistoken = newSVpvs("");
5586 #endif
5587         TOKEN(';');
5588     case '&':
5589         s++;
5590         if (*s++ == '&')
5591             AOPERATOR(ANDAND);
5592         s--;
5593         if (PL_expect == XOPERATOR) {
5594             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5595                 && isIDFIRST_lazy_if(s,UTF))
5596             {
5597                 CopLINE_dec(PL_curcop);
5598                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5599                 CopLINE_inc(PL_curcop);
5600             }
5601             BAop(OP_BIT_AND);
5602         }
5603
5604         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5605         if (*PL_tokenbuf) {
5606             PL_expect = XOPERATOR;
5607             force_ident(PL_tokenbuf, '&');
5608         }
5609         else
5610             PREREF('&');
5611         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5612         TERM('&');
5613
5614     case '|':
5615         s++;
5616         if (*s++ == '|')
5617             AOPERATOR(OROR);
5618         s--;
5619         BOop(OP_BIT_OR);
5620     case '=':
5621         s++;
5622         {
5623             const char tmp = *s++;
5624             if (tmp == '=')
5625                 Eop(OP_EQ);
5626             if (tmp == '>')
5627                 OPERATOR(',');
5628             if (tmp == '~')
5629                 PMop(OP_MATCH);
5630             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5631                 && strchr("+-*/%.^&|<",tmp))
5632                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5633                             "Reversed %c= operator",(int)tmp);
5634             s--;
5635             if (PL_expect == XSTATE && isALPHA(tmp) &&
5636                 (s == PL_linestart+1 || s[-2] == '\n') )
5637                 {
5638                     if (PL_in_eval && !PL_rsfp) {
5639                         d = PL_bufend;
5640                         while (s < d) {
5641                             if (*s++ == '\n') {
5642                                 incline(s);
5643                                 if (strnEQ(s,"=cut",4)) {
5644                                     s = strchr(s,'\n');
5645                                     if (s)
5646                                         s++;
5647                                     else
5648                                         s = d;
5649                                     incline(s);
5650                                     goto retry;
5651                                 }
5652                             }
5653                         }
5654                         goto retry;
5655                     }
5656 #ifdef PERL_MAD
5657                     if (PL_madskills) {
5658                         if (!PL_thiswhite)
5659                             PL_thiswhite = newSVpvs("");
5660                         sv_catpvn(PL_thiswhite, PL_linestart,
5661                                   PL_bufend - PL_linestart);
5662                     }
5663 #endif
5664                     s = PL_bufend;
5665                     PL_doextract = TRUE;
5666                     goto retry;
5667                 }
5668         }
5669         if (PL_lex_brackets < PL_lex_formbrack) {
5670             const char *t = s;
5671 #ifdef PERL_STRICT_CR
5672             while (SPACE_OR_TAB(*t))
5673 #else
5674             while (SPACE_OR_TAB(*t) || *t == '\r')
5675 #endif
5676                 t++;
5677             if (*t == '\n' || *t == '#') {
5678                 s--;
5679                 PL_expect = XBLOCK;
5680                 goto leftbracket;
5681             }
5682         }
5683         pl_yylval.ival = 0;
5684         OPERATOR(ASSIGNOP);
5685     case '!':
5686         s++;
5687         {
5688             const char tmp = *s++;
5689             if (tmp == '=') {
5690                 /* was this !=~ where !~ was meant?
5691                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5692
5693                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5694                     const char *t = s+1;
5695
5696                     while (t < PL_bufend && isSPACE(*t))
5697                         ++t;
5698
5699                     if (*t == '/' || *t == '?' ||
5700                         ((*t == 'm' || *t == 's' || *t == 'y')
5701                          && !isALNUM(t[1])) ||
5702                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5703                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5704                                     "!=~ should be !~");
5705                 }
5706                 Eop(OP_NE);
5707             }
5708             if (tmp == '~')
5709                 PMop(OP_NOT);
5710         }
5711         s--;
5712         OPERATOR('!');
5713     case '<':
5714         if (PL_expect != XOPERATOR) {
5715             if (s[1] != '<' && !strchr(s,'>'))
5716                 check_uni();
5717             if (s[1] == '<')
5718                 s = scan_heredoc(s);
5719             else
5720                 s = scan_inputsymbol(s);
5721             TERM(sublex_start());
5722         }
5723         s++;
5724         {
5725             char tmp = *s++;
5726             if (tmp == '<')
5727                 SHop(OP_LEFT_SHIFT);
5728             if (tmp == '=') {
5729                 tmp = *s++;
5730                 if (tmp == '>')
5731                     Eop(OP_NCMP);
5732                 s--;
5733                 Rop(OP_LE);
5734             }
5735         }
5736         s--;
5737         Rop(OP_LT);
5738     case '>':
5739         s++;
5740         {
5741             const char tmp = *s++;
5742             if (tmp == '>')
5743                 SHop(OP_RIGHT_SHIFT);
5744             else if (tmp == '=')
5745                 Rop(OP_GE);
5746         }
5747         s--;
5748         Rop(OP_GT);
5749
5750     case '$':
5751         CLINE;
5752
5753         if (PL_expect == XOPERATOR) {
5754             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5755                 return deprecate_commaless_var_list();
5756             }
5757         }
5758
5759         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5760             PL_tokenbuf[0] = '@';
5761             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5762                            sizeof PL_tokenbuf - 1, FALSE);
5763             if (PL_expect == XOPERATOR)
5764                 no_op("Array length", s);
5765             if (!PL_tokenbuf[1])
5766                 PREREF(DOLSHARP);
5767             PL_expect = XOPERATOR;
5768             PL_pending_ident = '#';
5769             TOKEN(DOLSHARP);
5770         }
5771
5772         PL_tokenbuf[0] = '$';
5773         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5774                        sizeof PL_tokenbuf - 1, FALSE);
5775         if (PL_expect == XOPERATOR)
5776             no_op("Scalar", s);
5777         if (!PL_tokenbuf[1]) {
5778             if (s == PL_bufend)
5779                 yyerror("Final $ should be \\$ or $name");
5780             PREREF('$');
5781         }
5782
5783         /* This kludge not intended to be bulletproof. */
5784         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5785             pl_yylval.opval = newSVOP(OP_CONST, 0,
5786                                    newSViv(CopARYBASE_get(&PL_compiling)));
5787             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5788             TERM(THING);
5789         }
5790
5791         d = s;
5792         {
5793             const char tmp = *s;
5794             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5795                 s = SKIPSPACE1(s);
5796
5797             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5798                 && intuit_more(s)) {
5799                 if (*s == '[') {
5800                     PL_tokenbuf[0] = '@';
5801                     if (ckWARN(WARN_SYNTAX)) {
5802                         char *t = s+1;
5803
5804                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5805                             t++;
5806                         if (*t++ == ',') {
5807                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5808                             while (t < PL_bufend && *t != ']')
5809                                 t++;
5810                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5811                                         "Multidimensional syntax %.*s not supported",
5812                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5813                         }
5814                     }
5815                 }
5816                 else if (*s == '{') {
5817                     char *t;
5818                     PL_tokenbuf[0] = '%';
5819                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5820                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5821                         {
5822                             char tmpbuf[sizeof PL_tokenbuf];
5823                             do {
5824                                 t++;
5825                             } while (isSPACE(*t));
5826                             if (isIDFIRST_lazy_if(t,UTF)) {
5827                                 STRLEN len;
5828                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5829                                               &len);
5830                                 while (isSPACE(*t))
5831                                     t++;
5832                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5833                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5834                                                 "You need to quote \"%s\"",
5835                                                 tmpbuf);
5836                             }
5837                         }
5838                 }
5839             }
5840
5841             PL_expect = XOPERATOR;
5842             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5843                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5844                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5845                     PL_expect = XOPERATOR;
5846                 else if (strchr("$@\"'`q", *s))
5847                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5848                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5849                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5850                 else if (isIDFIRST_lazy_if(s,UTF)) {
5851                     char tmpbuf[sizeof PL_tokenbuf];
5852                     int t2;
5853                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5854                     if ((t2 = keyword(tmpbuf, len, 0))) {
5855                         /* binary operators exclude handle interpretations */
5856                         switch (t2) {
5857                         case -KEY_x:
5858                         case -KEY_eq:
5859                         case -KEY_ne:
5860                         case -KEY_gt:
5861                         case -KEY_lt:
5862                         case -KEY_ge:
5863                         case -KEY_le:
5864                         case -KEY_cmp:
5865                             break;
5866                         default:
5867                             PL_expect = XTERM;  /* e.g. print $fh length() */
5868                             break;
5869                         }
5870                     }
5871                     else {
5872                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5873                     }
5874                 }
5875                 else if (isDIGIT(*s))
5876                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5877                 else if (*s == '.' && isDIGIT(s[1]))
5878                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5879                 else if ((*s == '?' || *s == '-' || *s == '+')
5880                          && !isSPACE(s[1]) && s[1] != '=')
5881                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5882                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5883                          && s[1] != '/')
5884                     PL_expect = XTERM;          /* e.g. print $fh /.../
5885                                                    XXX except DORDOR operator
5886                                                 */
5887                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5888                          && s[2] != '=')
5889                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5890             }
5891         }
5892         PL_pending_ident = '$';
5893         TOKEN('$');
5894
5895     case '@':
5896         if (PL_expect == XOPERATOR)
5897             no_op("Array", s);
5898         PL_tokenbuf[0] = '@';
5899         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5900         if (!PL_tokenbuf[1]) {
5901             PREREF('@');
5902         }
5903         if (PL_lex_state == LEX_NORMAL)
5904             s = SKIPSPACE1(s);
5905         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5906             if (*s == '{')
5907                 PL_tokenbuf[0] = '%';
5908
5909             /* Warn about @ where they meant $. */
5910             if (*s == '[' || *s == '{') {
5911                 if (ckWARN(WARN_SYNTAX)) {
5912                     const char *t = s + 1;
5913                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5914                         t++;
5915                     if (*t == '}' || *t == ']') {
5916                         t++;
5917                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5918                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5919                             "Scalar value %.*s better written as $%.*s",
5920                             (int)(t-PL_bufptr), PL_bufptr,
5921                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5922                     }
5923                 }
5924             }
5925         }
5926         PL_pending_ident = '@';
5927         TERM('@');
5928
5929      case '/':                  /* may be division, defined-or, or pattern */
5930         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5931             s += 2;
5932             AOPERATOR(DORDOR);
5933         }
5934      case '?':                  /* may either be conditional or pattern */
5935         if (PL_expect == XOPERATOR) {
5936              char tmp = *s++;
5937              if(tmp == '?') {
5938                 OPERATOR('?');
5939              }
5940              else {
5941                  tmp = *s++;
5942                  if(tmp == '/') {
5943                      /* A // operator. */
5944                     AOPERATOR(DORDOR);
5945                  }
5946                  else {
5947                      s--;
5948                      Mop(OP_DIVIDE);
5949                  }
5950              }
5951          }
5952          else {
5953              /* Disable warning on "study /blah/" */
5954              if (PL_oldoldbufptr == PL_last_uni
5955               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5956                   || memNE(PL_last_uni, "study", 5)
5957                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5958               ))
5959                  check_uni();
5960              s = scan_pat(s,OP_MATCH);
5961              TERM(sublex_start());
5962          }
5963
5964     case '.':
5965         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5966 #ifdef PERL_STRICT_CR
5967             && s[1] == '\n'
5968 #else
5969             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5970 #endif
5971             && (s == PL_linestart || s[-1] == '\n') )
5972         {
5973             PL_lex_formbrack = 0;
5974             PL_expect = XSTATE;
5975             goto rightbracket;
5976         }
5977         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5978             s += 3;
5979             OPERATOR(YADAYADA);
5980         }
5981         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5982             char tmp = *s++;
5983             if (*s == tmp) {
5984                 s++;
5985                 if (*s == tmp) {
5986                     s++;
5987                     pl_yylval.ival = OPf_SPECIAL;
5988                 }
5989                 else
5990                     pl_yylval.ival = 0;
5991                 OPERATOR(DOTDOT);
5992             }
5993             Aop(OP_CONCAT);
5994         }
5995         /* FALL THROUGH */
5996     case '0': case '1': case '2': case '3': case '4':
5997     case '5': case '6': case '7': case '8': case '9':
5998         s = scan_num(s, &pl_yylval);
5999         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6000         if (PL_expect == XOPERATOR)
6001             no_op("Number",s);
6002         TERM(THING);
6003
6004     case '\'':
6005         s = scan_str(s,!!PL_madskills,FALSE);
6006         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6007         if (PL_expect == XOPERATOR) {
6008             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6009                 return deprecate_commaless_var_list();
6010             }
6011             else
6012                 no_op("String",s);
6013         }
6014         if (!s)
6015             missingterm(NULL);
6016         pl_yylval.ival = OP_CONST;
6017         TERM(sublex_start());
6018
6019     case '"':
6020         s = scan_str(s,!!PL_madskills,FALSE);
6021         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6022         if (PL_expect == XOPERATOR) {
6023             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6024                 return deprecate_commaless_var_list();
6025             }
6026             else
6027                 no_op("String",s);
6028         }
6029         if (!s)
6030             missingterm(NULL);
6031         pl_yylval.ival = OP_CONST;
6032         /* FIXME. I think that this can be const if char *d is replaced by
6033            more localised variables.  */
6034         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6035             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6036                 pl_yylval.ival = OP_STRINGIFY;
6037                 break;
6038             }
6039         }
6040         TERM(sublex_start());
6041
6042     case '`':
6043         s = scan_str(s,!!PL_madskills,FALSE);
6044         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6045         if (PL_expect == XOPERATOR)
6046             no_op("Backticks",s);
6047         if (!s)
6048             missingterm(NULL);
6049         readpipe_override();
6050         TERM(sublex_start());
6051
6052     case '\\':
6053         s++;
6054         if (PL_lex_inwhat && isDIGIT(*s))
6055             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6056                            *s, *s);
6057         if (PL_expect == XOPERATOR)
6058             no_op("Backslash",s);
6059         OPERATOR(REFGEN);
6060
6061     case 'v':
6062         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6063             char *start = s + 2;
6064             while (isDIGIT(*start) || *start == '_')
6065                 start++;
6066             if (*start == '.' && isDIGIT(start[1])) {
6067                 s = scan_num(s, &pl_yylval);
6068                 TERM(THING);
6069             }
6070             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6071             else if (!isALPHA(*start) && (PL_expect == XTERM
6072                         || PL_expect == XREF || PL_expect == XSTATE
6073                         || PL_expect == XTERMORDORDOR)) {
6074                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6075                 if (!gv) {
6076                     s = scan_num(s, &pl_yylval);
6077                     TERM(THING);
6078                 }
6079             }
6080         }
6081         goto keylookup;
6082     case 'x':
6083         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6084             s++;
6085             Mop(OP_REPEAT);
6086         }
6087         goto keylookup;
6088
6089     case '_':
6090     case 'a': case 'A':
6091     case 'b': case 'B':
6092     case 'c': case 'C':
6093     case 'd': case 'D':
6094     case 'e': case 'E':
6095     case 'f': case 'F':
6096     case 'g': case 'G':
6097     case 'h': case 'H':
6098     case 'i': case 'I':
6099     case 'j': case 'J':
6100     case 'k': case 'K':
6101     case 'l': case 'L':
6102     case 'm': case 'M':
6103     case 'n': case 'N':
6104     case 'o': case 'O':
6105     case 'p': case 'P':
6106     case 'q': case 'Q':
6107     case 'r': case 'R':
6108     case 's': case 'S':
6109     case 't': case 'T':
6110     case 'u': case 'U':
6111               case 'V':
6112     case 'w': case 'W':
6113               case 'X':
6114     case 'y': case 'Y':
6115     case 'z': case 'Z':
6116
6117       keylookup: {
6118         bool anydelim;
6119         I32 tmp;
6120
6121         orig_keyword = 0;
6122         gv = NULL;
6123         gvp = NULL;
6124
6125         PL_bufptr = s;
6126         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6127
6128         /* Some keywords can be followed by any delimiter, including ':' */
6129         anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
6130                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6131                              (PL_tokenbuf[0] == 'q' &&
6132                               strchr("qwxr", PL_tokenbuf[1])))));
6133
6134         /* x::* is just a word, unless x is "CORE" */
6135         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6136             goto just_a_word;
6137
6138         d = s;
6139         while (d < PL_bufend && isSPACE(*d))
6140                 d++;    /* no comments skipped here, or s### is misparsed */
6141
6142         /* Is this a word before a => operator? */
6143         if (*d == '=' && d[1] == '>') {
6144             CLINE;
6145             pl_yylval.opval
6146                 = (OP*)newSVOP(OP_CONST, 0,
6147                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6148             pl_yylval.opval->op_private = OPpCONST_BARE;
6149             TERM(WORD);
6150         }
6151
6152         /* Check for plugged-in keyword */
6153         {
6154             OP *o;
6155             int result;
6156             char *saved_bufptr = PL_bufptr;
6157             PL_bufptr = s;
6158             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6159             s = PL_bufptr;
6160             if (result == KEYWORD_PLUGIN_DECLINE) {
6161                 /* not a plugged-in keyword */
6162                 PL_bufptr = saved_bufptr;
6163             } else if (result == KEYWORD_PLUGIN_STMT) {
6164                 pl_yylval.opval = o;
6165                 CLINE;
6166                 PL_expect = XSTATE;
6167                 return REPORT(PLUGSTMT);
6168             } else if (result == KEYWORD_PLUGIN_EXPR) {
6169                 pl_yylval.opval = o;
6170                 CLINE;
6171                 PL_expect = XOPERATOR;
6172                 return REPORT(PLUGEXPR);
6173             } else {
6174                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6175                                         PL_tokenbuf);
6176             }
6177         }
6178
6179         /* Check for built-in keyword */
6180         tmp = keyword(PL_tokenbuf, len, 0);
6181
6182         /* Is this a label? */
6183         if (!anydelim && PL_expect == XSTATE
6184               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6185             s = d + 1;
6186             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6187             CLINE;
6188             TOKEN(LABEL);
6189         }
6190
6191         if (tmp < 0) {                  /* second-class keyword? */
6192             GV *ogv = NULL;     /* override (winner) */
6193             GV *hgv = NULL;     /* hidden (loser) */
6194             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6195                 CV *cv;
6196                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6197                     (cv = GvCVu(gv)))
6198                 {
6199                     if (GvIMPORTED_CV(gv))
6200                         ogv = gv;
6201                     else if (! CvMETHOD(cv))
6202                         hgv = gv;
6203                 }
6204                 if (!ogv &&
6205                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6206                     (gv = *gvp) && isGV_with_GP(gv) &&
6207                     GvCVu(gv) && GvIMPORTED_CV(gv))
6208                 {
6209                     ogv = gv;
6210                 }
6211             }
6212             if (ogv) {
6213                 orig_keyword = tmp;
6214                 tmp = 0;                /* overridden by import or by GLOBAL */
6215             }
6216             else if (gv && !gvp
6217                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6218                      && GvCVu(gv))
6219             {
6220                 tmp = 0;                /* any sub overrides "weak" keyword */
6221             }
6222             else {                      /* no override */
6223                 tmp = -tmp;
6224                 if (tmp == KEY_dump) {
6225                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6226                                    "dump() better written as CORE::dump()");
6227                 }
6228                 gv = NULL;
6229                 gvp = 0;
6230                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6231                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6232                                    "Ambiguous call resolved as CORE::%s(), "
6233                                    "qualify as such or use &",
6234                                    GvENAME(hgv));
6235             }
6236         }
6237
6238       reserved_word:
6239         switch (tmp) {
6240
6241         default:                        /* not a keyword */
6242             /* Trade off - by using this evil construction we can pull the
6243                variable gv into the block labelled keylookup. If not, then
6244                we have to give it function scope so that the goto from the
6245                earlier ':' case doesn't bypass the initialisation.  */
6246             if (0) {
6247             just_a_word_zero_gv:
6248                 gv = NULL;
6249                 gvp = NULL;
6250                 orig_keyword = 0;
6251             }
6252           just_a_word: {
6253                 SV *sv;
6254                 int pkgname = 0;
6255                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6256                 OP *rv2cv_op;
6257                 CV *cv;
6258 #ifdef PERL_MAD
6259                 SV *nextPL_nextwhite = 0;
6260 #endif
6261
6262
6263                 /* Get the rest if it looks like a package qualifier */
6264
6265                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6266                     STRLEN morelen;
6267                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6268                                   TRUE, &morelen);
6269                     if (!morelen)
6270                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6271                                 *s == '\'' ? "'" : "::");
6272                     len += morelen;
6273                     pkgname = 1;
6274                 }
6275
6276                 if (PL_expect == XOPERATOR) {
6277                     if (PL_bufptr == PL_linestart) {
6278                         CopLINE_dec(PL_curcop);
6279                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6280                         CopLINE_inc(PL_curcop);
6281                     }
6282                     else
6283                         no_op("Bareword",s);
6284                 }
6285
6286                 /* Look for a subroutine with this name in current package,
6287                    unless name is "Foo::", in which case Foo is a bearword
6288                    (and a package name). */
6289
6290                 if (len > 2 && !PL_madskills &&
6291                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6292                 {
6293                     if (ckWARN(WARN_BAREWORD)
6294                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6295                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6296                             "Bareword \"%s\" refers to nonexistent package",
6297                              PL_tokenbuf);
6298                     len -= 2;
6299                     PL_tokenbuf[len] = '\0';
6300                     gv = NULL;
6301                     gvp = 0;
6302                 }
6303                 else {
6304                     if (!gv) {
6305                         /* Mustn't actually add anything to a symbol table.
6306                            But also don't want to "initialise" any placeholder
6307                            constants that might already be there into full
6308                            blown PVGVs with attached PVCV.  */
6309                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6310                                                GV_NOADD_NOINIT, SVt_PVCV);
6311                     }
6312                     len = 0;
6313                 }
6314
6315                 /* if we saw a global override before, get the right name */
6316
6317                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6318                     len ? len : strlen(PL_tokenbuf));
6319                 if (gvp) {
6320                     SV * const tmp_sv = sv;
6321                     sv = newSVpvs("CORE::GLOBAL::");
6322                     sv_catsv(sv, tmp_sv);
6323                     SvREFCNT_dec(tmp_sv);
6324                 }
6325
6326 #ifdef PERL_MAD
6327                 if (PL_madskills && !PL_thistoken) {
6328                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6329                     PL_thistoken = newSVpvn(start,s - start);
6330                     PL_realtokenstart = s - SvPVX(PL_linestr);
6331                 }
6332 #endif
6333
6334                 /* Presume this is going to be a bareword of some sort. */
6335                 CLINE;
6336                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6337                 pl_yylval.opval->op_private = OPpCONST_BARE;
6338
6339                 /* And if "Foo::", then that's what it certainly is. */
6340                 if (len)
6341                     goto safe_bareword;
6342
6343                 {
6344                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6345                     const_op->op_private = OPpCONST_BARE;
6346                     rv2cv_op = newCVREF(0, const_op);
6347                 }
6348                 cv = rv2cv_op_cv(rv2cv_op, 0);
6349
6350                 /* See if it's the indirect object for a list operator. */
6351
6352                 if (PL_oldoldbufptr &&
6353                     PL_oldoldbufptr < PL_bufptr &&
6354                     (PL_oldoldbufptr == PL_last_lop
6355                      || PL_oldoldbufptr == PL_last_uni) &&
6356                     /* NO SKIPSPACE BEFORE HERE! */
6357                     (PL_expect == XREF ||
6358                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6359                 {
6360                     bool immediate_paren = *s == '(';
6361
6362                     /* (Now we can afford to cross potential line boundary.) */
6363                     s = SKIPSPACE2(s,nextPL_nextwhite);
6364 #ifdef PERL_MAD
6365                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6366 #endif
6367
6368                     /* Two barewords in a row may indicate method call. */
6369
6370                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6371                         (tmp = intuit_method(s, gv, cv))) {
6372                         op_free(rv2cv_op);
6373                         return REPORT(tmp);
6374                     }
6375
6376                     /* If not a declared subroutine, it's an indirect object. */
6377                     /* (But it's an indir obj regardless for sort.) */
6378                     /* Also, if "_" follows a filetest operator, it's a bareword */
6379
6380                     if (
6381                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6382                          (!cv &&
6383                         (PL_last_lop_op != OP_MAPSTART &&
6384                          PL_last_lop_op != OP_GREPSTART))))
6385                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6386                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6387                        )
6388                     {
6389                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6390                         goto bareword;
6391                     }
6392                 }
6393
6394                 PL_expect = XOPERATOR;
6395 #ifdef PERL_MAD
6396                 if (isSPACE(*s))
6397                     s = SKIPSPACE2(s,nextPL_nextwhite);
6398                 PL_nextwhite = nextPL_nextwhite;
6399 #else
6400                 s = skipspace(s);
6401 #endif
6402
6403                 /* Is this a word before a => operator? */
6404                 if (*s == '=' && s[1] == '>' && !pkgname) {
6405                     op_free(rv2cv_op);
6406                     CLINE;
6407                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6408                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6409                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6410                     TERM(WORD);
6411                 }
6412
6413                 /* If followed by a paren, it's certainly a subroutine. */
6414                 if (*s == '(') {
6415                     CLINE;
6416                     if (cv) {
6417                         d = s + 1;
6418                         while (SPACE_OR_TAB(*d))
6419                             d++;
6420                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6421                             s = d + 1;
6422                             goto its_constant;
6423                         }
6424                     }
6425 #ifdef PERL_MAD
6426                     if (PL_madskills) {
6427                         PL_nextwhite = PL_thiswhite;
6428                         PL_thiswhite = 0;
6429                     }
6430                     start_force(PL_curforce);
6431 #endif
6432                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6433                     PL_expect = XOPERATOR;
6434 #ifdef PERL_MAD
6435                     if (PL_madskills) {
6436                         PL_nextwhite = nextPL_nextwhite;
6437                         curmad('X', PL_thistoken);
6438                         PL_thistoken = newSVpvs("");
6439                     }
6440 #endif
6441                     op_free(rv2cv_op);
6442                     force_next(WORD);
6443                     pl_yylval.ival = 0;
6444                     TOKEN('&');
6445                 }
6446
6447                 /* If followed by var or block, call it a method (unless sub) */
6448
6449                 if ((*s == '$' || *s == '{') && !cv) {
6450                     op_free(rv2cv_op);
6451                     PL_last_lop = PL_oldbufptr;
6452                     PL_last_lop_op = OP_METHOD;
6453                     PREBLOCK(METHOD);
6454                 }
6455
6456                 /* If followed by a bareword, see if it looks like indir obj. */
6457
6458                 if (!orig_keyword
6459                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6460                         && (tmp = intuit_method(s, gv, cv))) {
6461                     op_free(rv2cv_op);
6462                     return REPORT(tmp);
6463                 }
6464
6465                 /* Not a method, so call it a subroutine (if defined) */
6466
6467                 if (cv) {
6468                     if (lastchar == '-')
6469                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6470                                          "Ambiguous use of -%s resolved as -&%s()",
6471                                          PL_tokenbuf, PL_tokenbuf);
6472                     /* Check for a constant sub */
6473                     if ((sv = cv_const_sv(cv))) {
6474                   its_constant:
6475                         op_free(rv2cv_op);
6476                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6477                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6478                         pl_yylval.opval->op_private = 0;
6479                         TOKEN(WORD);
6480                     }
6481
6482                     op_free(pl_yylval.opval);
6483                     pl_yylval.opval = rv2cv_op;
6484                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6485                     PL_last_lop = PL_oldbufptr;
6486                     PL_last_lop_op = OP_ENTERSUB;
6487                     /* Is there a prototype? */
6488                     if (
6489 #ifdef PERL_MAD
6490                         cv &&
6491 #endif
6492                         SvPOK(cv))
6493                     {
6494                         STRLEN protolen;
6495                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6496                         if (!protolen)
6497                             TERM(FUNC0SUB);
6498                         while (*proto == ';')
6499                             proto++;
6500                         if (
6501                             (
6502                                 (
6503                                     *proto == '$' || *proto == '_'
6504                                  || *proto == '*'
6505                                 )
6506                              && proto[1] == '\0'
6507                             )
6508                          || (
6509                              *proto == '\\' && proto[1] && proto[2] == '\0'
6510                             )
6511                         )
6512                             OPERATOR(UNIOPSUB);
6513                         if (*proto == '\\' && proto[1] == '[') {
6514                             const char *p = proto + 2;
6515                             while(*p && *p != ']')
6516                                 ++p;
6517                             if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6518                         }
6519                         if (*proto == '&' && *s == '{') {
6520                             if (PL_curstash)
6521                                 sv_setpvs(PL_subname, "__ANON__");
6522                             else
6523                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6524                             PREBLOCK(LSTOPSUB);
6525                         }
6526                     }
6527 #ifdef PERL_MAD
6528                     {
6529                         if (PL_madskills) {
6530                             PL_nextwhite = PL_thiswhite;
6531                             PL_thiswhite = 0;
6532                         }
6533                         start_force(PL_curforce);
6534                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6535                         PL_expect = XTERM;
6536                         if (PL_madskills) {
6537                             PL_nextwhite = nextPL_nextwhite;
6538                             curmad('X', PL_thistoken);
6539                             PL_thistoken = newSVpvs("");
6540                         }
6541                         force_next(WORD);
6542                         TOKEN(NOAMP);
6543                     }
6544                 }
6545
6546                 /* Guess harder when madskills require "best effort". */
6547                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6548                     int probable_sub = 0;
6549                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6550                         probable_sub = 1;
6551                     else if (isALPHA(*s)) {
6552                         char tmpbuf[1024];
6553                         STRLEN tmplen;
6554                         d = s;
6555                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6556                         if (!keyword(tmpbuf, tmplen, 0))
6557                             probable_sub = 1;
6558                         else {
6559                             while (d < PL_bufend && isSPACE(*d))
6560                                 d++;
6561                             if (*d == '=' && d[1] == '>')
6562                                 probable_sub = 1;
6563                         }
6564                     }
6565                     if (probable_sub) {
6566                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6567                         op_free(pl_yylval.opval);
6568                         pl_yylval.opval = rv2cv_op;
6569                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6570                         PL_last_lop = PL_oldbufptr;
6571                         PL_last_lop_op = OP_ENTERSUB;
6572                         PL_nextwhite = PL_thiswhite;
6573                         PL_thiswhite = 0;
6574                         start_force(PL_curforce);
6575                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6576                         PL_expect = XTERM;
6577                         PL_nextwhite = nextPL_nextwhite;
6578                         curmad('X', PL_thistoken);
6579                         PL_thistoken = newSVpvs("");
6580                         force_next(WORD);
6581                         TOKEN(NOAMP);
6582                     }
6583 #else
6584                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6585                     PL_expect = XTERM;
6586                     force_next(WORD);
6587                     TOKEN(NOAMP);
6588 #endif
6589                 }
6590
6591                 /* Call it a bare word */
6592
6593                 if (PL_hints & HINT_STRICT_SUBS)
6594                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6595                 else {
6596                 bareword:
6597                     /* after "print" and similar functions (corresponding to
6598                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6599                      * a filehandle should be subject to "strict subs".
6600                      * Likewise for the optional indirect-object argument to system
6601                      * or exec, which can't be a bareword */
6602                     if ((PL_last_lop_op == OP_PRINT
6603                             || PL_last_lop_op == OP_PRTF
6604                             || PL_last_lop_op == OP_SAY
6605                             || PL_last_lop_op == OP_SYSTEM
6606                             || PL_last_lop_op == OP_EXEC)
6607                             && (PL_hints & HINT_STRICT_SUBS))
6608                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6609                     if (lastchar != '-') {
6610                         if (ckWARN(WARN_RESERVED)) {
6611                             d = PL_tokenbuf;
6612                             while (isLOWER(*d))
6613                                 d++;
6614                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6615                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6616                                        PL_tokenbuf);
6617                         }
6618                     }
6619                 }
6620                 op_free(rv2cv_op);
6621
6622             safe_bareword:
6623                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6624                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6625                                      "Operator or semicolon missing before %c%s",
6626                                      lastchar, PL_tokenbuf);
6627                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6628                                      "Ambiguous use of %c resolved as operator %c",
6629                                      lastchar, lastchar);
6630                 }
6631                 TOKEN(WORD);
6632             }
6633
6634         case KEY___FILE__:
6635             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6636                                         newSVpv(CopFILE(PL_curcop),0));
6637             TERM(THING);
6638
6639         case KEY___LINE__:
6640             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6641                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6642             TERM(THING);
6643
6644         case KEY___PACKAGE__:
6645             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6646                                         (PL_curstash
6647                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6648                                          : &PL_sv_undef));
6649             TERM(THING);
6650
6651         case KEY___DATA__:
6652         case KEY___END__: {
6653             GV *gv;
6654             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6655                 const char *pname = "main";
6656                 if (PL_tokenbuf[2] == 'D')
6657                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6658                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6659                                 SVt_PVIO);
6660                 GvMULTI_on(gv);
6661                 if (!GvIO(gv))
6662                     GvIOp(gv) = newIO();
6663                 IoIFP(GvIOp(gv)) = PL_rsfp;
6664 #if defined(HAS_FCNTL) && defined(F_SETFD)
6665                 {
6666                     const int fd = PerlIO_fileno(PL_rsfp);
6667                     fcntl(fd,F_SETFD,fd >= 3);
6668                 }
6669 #endif
6670                 /* Mark this internal pseudo-handle as clean */
6671                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6672                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6673                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6674                 else
6675                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6676 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6677                 /* if the script was opened in binmode, we need to revert
6678                  * it to text mode for compatibility; but only iff it has CRs
6679                  * XXX this is a questionable hack at best. */
6680                 if (PL_bufend-PL_bufptr > 2
6681                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6682                 {
6683                     Off_t loc = 0;
6684                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6685                         loc = PerlIO_tell(PL_rsfp);
6686                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6687                     }
6688 #ifdef NETWARE
6689                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6690 #else
6691                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6692 #endif  /* NETWARE */
6693 #ifdef PERLIO_IS_STDIO /* really? */
6694 #  if defined(__BORLANDC__)
6695                         /* XXX see note in do_binmode() */
6696                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6697 #  endif
6698 #endif
6699                         if (loc > 0)
6700                             PerlIO_seek(PL_rsfp, loc, 0);
6701                     }
6702                 }
6703 #endif
6704 #ifdef PERLIO_LAYERS
6705                 if (!IN_BYTES) {
6706                     if (UTF)
6707                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6708                     else if (PL_encoding) {
6709                         SV *name;
6710                         dSP;
6711                         ENTER;
6712                         SAVETMPS;
6713                         PUSHMARK(sp);
6714                         EXTEND(SP, 1);
6715                         XPUSHs(PL_encoding);
6716                         PUTBACK;
6717                         call_method("name", G_SCALAR);
6718                         SPAGAIN;
6719                         name = POPs;
6720                         PUTBACK;
6721                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6722                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6723                                                       SVfARG(name)));
6724                         FREETMPS;
6725                         LEAVE;
6726                     }
6727                 }
6728 #endif
6729 #ifdef PERL_MAD
6730                 if (PL_madskills) {
6731                     if (PL_realtokenstart >= 0) {
6732                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6733                         if (!PL_endwhite)
6734                             PL_endwhite = newSVpvs("");
6735                         sv_catsv(PL_endwhite, PL_thiswhite);
6736                         PL_thiswhite = 0;
6737                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6738                         PL_realtokenstart = -1;
6739                     }
6740                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6741                            != NULL) ;
6742                 }
6743 #endif
6744                 PL_rsfp = NULL;
6745             }
6746             goto fake_eof;
6747         }
6748
6749         case KEY_AUTOLOAD:
6750         case KEY_DESTROY:
6751         case KEY_BEGIN:
6752         case KEY_UNITCHECK:
6753         case KEY_CHECK:
6754         case KEY_INIT:
6755         case KEY_END:
6756             if (PL_expect == XSTATE) {
6757                 s = PL_bufptr;
6758                 goto really_sub;
6759             }
6760             goto just_a_word;
6761
6762         case KEY_CORE:
6763             if (*s == ':' && s[1] == ':') {
6764                 s += 2;
6765                 d = s;
6766                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6767                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6768                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6769                 if (tmp < 0)
6770                     tmp = -tmp;
6771                 else if (tmp == KEY_require || tmp == KEY_do)
6772                     /* that's a way to remember we saw "CORE::" */
6773                     orig_keyword = tmp;
6774                 goto reserved_word;
6775             }
6776             goto just_a_word;
6777
6778         case KEY_abs:
6779             UNI(OP_ABS);
6780
6781         case KEY_alarm:
6782             UNI(OP_ALARM);
6783
6784         case KEY_accept:
6785             LOP(OP_ACCEPT,XTERM);
6786
6787         case KEY_and:
6788             OPERATOR(ANDOP);
6789
6790         case KEY_atan2:
6791             LOP(OP_ATAN2,XTERM);
6792
6793         case KEY_bind:
6794             LOP(OP_BIND,XTERM);
6795
6796         case KEY_binmode:
6797             LOP(OP_BINMODE,XTERM);
6798
6799         case KEY_bless:
6800             LOP(OP_BLESS,XTERM);
6801
6802         case KEY_break:
6803             FUN0(OP_BREAK);
6804
6805         case KEY_chop:
6806             UNI(OP_CHOP);
6807
6808         case KEY_continue:
6809             /* When 'use switch' is in effect, continue has a dual
6810                life as a control operator. */
6811             {
6812                 if (!FEATURE_IS_ENABLED("switch"))
6813                     PREBLOCK(CONTINUE);
6814                 else {
6815                     /* We have to disambiguate the two senses of
6816                       "continue". If the next token is a '{' then
6817                       treat it as the start of a continue block;
6818                       otherwise treat it as a control operator.
6819                      */
6820                     s = skipspace(s);
6821                     if (*s == '{')
6822             PREBLOCK(CONTINUE);
6823                     else
6824                         FUN0(OP_CONTINUE);
6825                 }
6826             }
6827
6828         case KEY_chdir:
6829             /* may use HOME */
6830             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6831             UNI(OP_CHDIR);
6832
6833         case KEY_close:
6834             UNI(OP_CLOSE);
6835
6836         case KEY_closedir:
6837             UNI(OP_CLOSEDIR);
6838
6839         case KEY_cmp:
6840             Eop(OP_SCMP);
6841
6842         case KEY_caller:
6843             UNI(OP_CALLER);
6844
6845         case KEY_crypt:
6846 #ifdef FCRYPT
6847             if (!PL_cryptseen) {
6848                 PL_cryptseen = TRUE;
6849                 init_des();
6850             }
6851 #endif
6852             LOP(OP_CRYPT,XTERM);
6853
6854         case KEY_chmod:
6855             LOP(OP_CHMOD,XTERM);
6856
6857         case KEY_chown:
6858             LOP(OP_CHOWN,XTERM);
6859
6860         case KEY_connect:
6861             LOP(OP_CONNECT,XTERM);
6862
6863         case KEY_chr:
6864             UNI(OP_CHR);
6865
6866         case KEY_cos:
6867             UNI(OP_COS);
6868
6869         case KEY_chroot:
6870             UNI(OP_CHROOT);
6871
6872         case KEY_default:
6873             PREBLOCK(DEFAULT);
6874
6875         case KEY_do:
6876             s = SKIPSPACE1(s);
6877             if (*s == '{')
6878                 PRETERMBLOCK(DO);
6879             if (*s != '\'')
6880                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6881             if (orig_keyword == KEY_do) {
6882                 orig_keyword = 0;
6883                 pl_yylval.ival = 1;
6884             }
6885             else
6886                 pl_yylval.ival = 0;
6887             OPERATOR(DO);
6888
6889         case KEY_die:
6890             PL_hints |= HINT_BLOCK_SCOPE;
6891             LOP(OP_DIE,XTERM);
6892
6893         case KEY_defined:
6894             UNI(OP_DEFINED);
6895
6896         case KEY_delete:
6897             UNI(OP_DELETE);
6898
6899         case KEY_dbmopen:
6900             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
6901                               STR_WITH_LEN("NDBM_File::"),
6902                               STR_WITH_LEN("DB_File::"),
6903                               STR_WITH_LEN("GDBM_File::"),
6904                               STR_WITH_LEN("SDBM_File::"),
6905                               STR_WITH_LEN("ODBM_File::"),
6906                               NULL);
6907             LOP(OP_DBMOPEN,XTERM);
6908
6909         case KEY_dbmclose:
6910             UNI(OP_DBMCLOSE);
6911
6912         case KEY_dump:
6913             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6914             LOOPX(OP_DUMP);
6915
6916         case KEY_else:
6917             PREBLOCK(ELSE);
6918
6919         case KEY_elsif:
6920             pl_yylval.ival = CopLINE(PL_curcop);
6921             OPERATOR(ELSIF);
6922
6923         case KEY_eq:
6924             Eop(OP_SEQ);
6925
6926         case KEY_exists:
6927             UNI(OP_EXISTS);
6928
6929         case KEY_exit:
6930             if (PL_madskills)
6931                 UNI(OP_INT);
6932             UNI(OP_EXIT);
6933
6934         case KEY_eval:
6935             s = SKIPSPACE1(s);
6936             if (*s == '{') { /* block eval */
6937                 PL_expect = XTERMBLOCK;
6938                 UNIBRACK(OP_ENTERTRY);
6939             }
6940             else { /* string eval */
6941                 PL_expect = XTERM;
6942                 UNIBRACK(OP_ENTEREVAL);
6943             }
6944
6945         case KEY_eof:
6946             UNI(OP_EOF);
6947
6948         case KEY_exp:
6949             UNI(OP_EXP);
6950
6951         case KEY_each:
6952             UNI(OP_EACH);
6953
6954         case KEY_exec:
6955             LOP(OP_EXEC,XREF);
6956
6957         case KEY_endhostent:
6958             FUN0(OP_EHOSTENT);
6959
6960         case KEY_endnetent:
6961             FUN0(OP_ENETENT);
6962
6963         case KEY_endservent:
6964             FUN0(OP_ESERVENT);
6965
6966         case KEY_endprotoent:
6967             FUN0(OP_EPROTOENT);
6968
6969         case KEY_endpwent:
6970             FUN0(OP_EPWENT);
6971
6972         case KEY_endgrent:
6973             FUN0(OP_EGRENT);
6974
6975         case KEY_for:
6976         case KEY_foreach:
6977             pl_yylval.ival = CopLINE(PL_curcop);
6978             s = SKIPSPACE1(s);
6979             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6980                 char *p = s;
6981 #ifdef PERL_MAD
6982                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6983 #endif
6984
6985                 if ((PL_bufend - p) >= 3 &&
6986                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6987                     p += 2;
6988                 else if ((PL_bufend - p) >= 4 &&
6989                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6990                     p += 3;
6991                 p = PEEKSPACE(p);
6992                 if (isIDFIRST_lazy_if(p,UTF)) {
6993                     p = scan_ident(p, PL_bufend,
6994                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6995                     p = PEEKSPACE(p);
6996                 }
6997                 if (*p != '$')
6998                     Perl_croak(aTHX_ "Missing $ on loop variable");
6999 #ifdef PERL_MAD
7000                 s = SvPVX(PL_linestr) + soff;
7001 #endif
7002             }
7003             OPERATOR(FOR);
7004
7005         case KEY_formline:
7006             LOP(OP_FORMLINE,XTERM);
7007
7008         case KEY_fork:
7009             FUN0(OP_FORK);
7010
7011         case KEY_fcntl:
7012             LOP(OP_FCNTL,XTERM);
7013
7014         case KEY_fileno:
7015             UNI(OP_FILENO);
7016
7017         case KEY_flock:
7018             LOP(OP_FLOCK,XTERM);
7019
7020         case KEY_gt:
7021             Rop(OP_SGT);
7022
7023         case KEY_ge:
7024             Rop(OP_SGE);
7025
7026         case KEY_grep:
7027             LOP(OP_GREPSTART, XREF);
7028
7029         case KEY_goto:
7030             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7031             LOOPX(OP_GOTO);
7032
7033         case KEY_gmtime:
7034             UNI(OP_GMTIME);
7035
7036         case KEY_getc:
7037             UNIDOR(OP_GETC);
7038
7039         case KEY_getppid:
7040             FUN0(OP_GETPPID);
7041
7042         case KEY_getpgrp:
7043             UNI(OP_GETPGRP);
7044
7045         case KEY_getpriority:
7046             LOP(OP_GETPRIORITY,XTERM);
7047
7048         case KEY_getprotobyname:
7049             UNI(OP_GPBYNAME);
7050
7051         case KEY_getprotobynumber:
7052             LOP(OP_GPBYNUMBER,XTERM);
7053
7054         case KEY_getprotoent:
7055             FUN0(OP_GPROTOENT);
7056
7057         case KEY_getpwent:
7058             FUN0(OP_GPWENT);
7059
7060         case KEY_getpwnam:
7061             UNI(OP_GPWNAM);
7062
7063         case KEY_getpwuid:
7064             UNI(OP_GPWUID);
7065
7066         case KEY_getpeername:
7067             UNI(OP_GETPEERNAME);
7068
7069         case KEY_gethostbyname:
7070             UNI(OP_GHBYNAME);
7071
7072         case KEY_gethostbyaddr:
7073             LOP(OP_GHBYADDR,XTERM);
7074
7075         case KEY_gethostent:
7076             FUN0(OP_GHOSTENT);
7077
7078         case KEY_getnetbyname:
7079             UNI(OP_GNBYNAME);
7080
7081         case KEY_getnetbyaddr:
7082             LOP(OP_GNBYADDR,XTERM);
7083
7084         case KEY_getnetent:
7085             FUN0(OP_GNETENT);
7086
7087         case KEY_getservbyname:
7088             LOP(OP_GSBYNAME,XTERM);
7089
7090         case KEY_getservbyport:
7091             LOP(OP_GSBYPORT,XTERM);
7092
7093         case KEY_getservent:
7094             FUN0(OP_GSERVENT);
7095
7096         case KEY_getsockname:
7097             UNI(OP_GETSOCKNAME);
7098
7099         case KEY_getsockopt:
7100             LOP(OP_GSOCKOPT,XTERM);
7101
7102         case KEY_getgrent:
7103             FUN0(OP_GGRENT);
7104
7105         case KEY_getgrnam:
7106             UNI(OP_GGRNAM);
7107
7108         case KEY_getgrgid:
7109             UNI(OP_GGRGID);
7110
7111         case KEY_getlogin:
7112             FUN0(OP_GETLOGIN);
7113
7114         case KEY_given:
7115             pl_yylval.ival = CopLINE(PL_curcop);
7116             OPERATOR(GIVEN);
7117
7118         case KEY_glob:
7119             LOP(OP_GLOB,XTERM);
7120
7121         case KEY_hex:
7122             UNI(OP_HEX);
7123
7124         case KEY_if:
7125             pl_yylval.ival = CopLINE(PL_curcop);
7126             OPERATOR(IF);
7127
7128         case KEY_index:
7129             LOP(OP_INDEX,XTERM);
7130
7131         case KEY_int:
7132             UNI(OP_INT);
7133
7134         case KEY_ioctl:
7135             LOP(OP_IOCTL,XTERM);
7136
7137         case KEY_join:
7138             LOP(OP_JOIN,XTERM);
7139
7140         case KEY_keys:
7141             UNI(OP_KEYS);
7142
7143         case KEY_kill:
7144             LOP(OP_KILL,XTERM);
7145
7146         case KEY_last:
7147             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7148             LOOPX(OP_LAST);
7149
7150         case KEY_lc:
7151             UNI(OP_LC);
7152
7153         case KEY_lcfirst:
7154             UNI(OP_LCFIRST);
7155
7156         case KEY_local:
7157             pl_yylval.ival = 0;
7158             OPERATOR(LOCAL);
7159
7160         case KEY_length:
7161             UNI(OP_LENGTH);
7162
7163         case KEY_lt:
7164             Rop(OP_SLT);
7165
7166         case KEY_le:
7167             Rop(OP_SLE);
7168
7169         case KEY_localtime:
7170             UNI(OP_LOCALTIME);
7171
7172         case KEY_log:
7173             UNI(OP_LOG);
7174
7175         case KEY_link:
7176             LOP(OP_LINK,XTERM);
7177
7178         case KEY_listen:
7179             LOP(OP_LISTEN,XTERM);
7180
7181         case KEY_lock:
7182             UNI(OP_LOCK);
7183
7184         case KEY_lstat:
7185             UNI(OP_LSTAT);
7186
7187         case KEY_m:
7188             s = scan_pat(s,OP_MATCH);
7189             TERM(sublex_start());
7190
7191         case KEY_map:
7192             LOP(OP_MAPSTART, XREF);
7193
7194         case KEY_mkdir:
7195             LOP(OP_MKDIR,XTERM);
7196
7197         case KEY_msgctl:
7198             LOP(OP_MSGCTL,XTERM);
7199
7200         case KEY_msgget:
7201             LOP(OP_MSGGET,XTERM);
7202
7203         case KEY_msgrcv:
7204             LOP(OP_MSGRCV,XTERM);
7205
7206         case KEY_msgsnd:
7207             LOP(OP_MSGSND,XTERM);
7208
7209         case KEY_our:
7210         case KEY_my:
7211         case KEY_state:
7212             PL_in_my = (U16)tmp;
7213             s = SKIPSPACE1(s);
7214             if (isIDFIRST_lazy_if(s,UTF)) {
7215 #ifdef PERL_MAD
7216                 char* start = s;
7217 #endif
7218                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7219                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7220                     goto really_sub;
7221                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7222                 if (!PL_in_my_stash) {
7223                     char tmpbuf[1024];
7224                     PL_bufptr = s;
7225                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7226                     yyerror(tmpbuf);
7227                 }
7228 #ifdef PERL_MAD
7229                 if (PL_madskills) {     /* just add type to declarator token */
7230                     sv_catsv(PL_thistoken, PL_nextwhite);
7231                     PL_nextwhite = 0;
7232                     sv_catpvn(PL_thistoken, start, s - start);
7233                 }
7234 #endif
7235             }
7236             pl_yylval.ival = 1;
7237             OPERATOR(MY);
7238
7239         case KEY_next:
7240             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7241             LOOPX(OP_NEXT);
7242
7243         case KEY_ne:
7244             Eop(OP_SNE);
7245
7246         case KEY_no:
7247             s = tokenize_use(0, s);
7248             OPERATOR(USE);
7249
7250         case KEY_not:
7251             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7252                 FUN1(OP_NOT);
7253             else
7254                 OPERATOR(NOTOP);
7255
7256         case KEY_open:
7257             s = SKIPSPACE1(s);
7258             if (isIDFIRST_lazy_if(s,UTF)) {
7259                 const char *t;
7260                 for (d = s; isALNUM_lazy_if(d,UTF);)
7261                     d++;
7262                 for (t=d; isSPACE(*t);)
7263                     t++;
7264                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7265                     /* [perl #16184] */
7266                     && !(t[0] == '=' && t[1] == '>')
7267                 ) {
7268                     int parms_len = (int)(d-s);
7269                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7270                            "Precedence problem: open %.*s should be open(%.*s)",
7271                             parms_len, s, parms_len, s);
7272                 }
7273             }
7274             LOP(OP_OPEN,XTERM);
7275
7276         case KEY_or:
7277             pl_yylval.ival = OP_OR;
7278             OPERATOR(OROP);
7279
7280         case KEY_ord:
7281             UNI(OP_ORD);
7282
7283         case KEY_oct:
7284             UNI(OP_OCT);
7285
7286         case KEY_opendir:
7287             LOP(OP_OPEN_DIR,XTERM);
7288
7289         case KEY_print:
7290             checkcomma(s,PL_tokenbuf,"filehandle");
7291             LOP(OP_PRINT,XREF);
7292
7293         case KEY_printf:
7294             checkcomma(s,PL_tokenbuf,"filehandle");
7295             LOP(OP_PRTF,XREF);
7296
7297         case KEY_prototype:
7298             UNI(OP_PROTOTYPE);
7299
7300         case KEY_push:
7301             LOP(OP_PUSH,XTERM);
7302
7303         case KEY_pop:
7304             UNIDOR(OP_POP);
7305
7306         case KEY_pos:
7307             UNIDOR(OP_POS);
7308
7309         case KEY_pack:
7310             LOP(OP_PACK,XTERM);
7311
7312         case KEY_package:
7313             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7314             s = SKIPSPACE1(s);
7315             s = force_strict_version(s);
7316             PL_lex_expect = XBLOCK;
7317             OPERATOR(PACKAGE);
7318
7319         case KEY_pipe:
7320             LOP(OP_PIPE_OP,XTERM);
7321
7322         case KEY_q:
7323             s = scan_str(s,!!PL_madskills,FALSE);
7324             if (!s)
7325                 missingterm(NULL);
7326             pl_yylval.ival = OP_CONST;
7327             TERM(sublex_start());
7328
7329         case KEY_quotemeta:
7330             UNI(OP_QUOTEMETA);
7331
7332         case KEY_qw: {
7333             OP *words = NULL;
7334             s = scan_str(s,!!PL_madskills,FALSE);
7335             if (!s)
7336                 missingterm(NULL);
7337             PL_expect = XOPERATOR;
7338             if (SvCUR(PL_lex_stuff)) {
7339                 int warned = 0;
7340                 d = SvPV_force(PL_lex_stuff, len);
7341                 while (len) {
7342                     for (; isSPACE(*d) && len; --len, ++d)
7343                         /**/;
7344                     if (len) {
7345                         SV *sv;
7346                         const char *b = d;
7347                         if (!warned && ckWARN(WARN_QW)) {
7348                             for (; !isSPACE(*d) && len; --len, ++d) {
7349                                 if (*d == ',') {
7350                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7351                                         "Possible attempt to separate words with commas");
7352                                     ++warned;
7353                                 }
7354                                 else if (*d == '#') {
7355                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7356                                         "Possible attempt to put comments in qw() list");
7357                                     ++warned;
7358                                 }
7359                             }
7360                         }
7361                         else {
7362                             for (; !isSPACE(*d) && len; --len, ++d)
7363                                 /**/;
7364                         }
7365                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7366                         words = op_append_elem(OP_LIST, words,
7367                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7368                     }
7369                 }
7370             }
7371             if (!words)
7372                 words = newNULLLIST();
7373             if (PL_lex_stuff) {
7374                 SvREFCNT_dec(PL_lex_stuff);
7375                 PL_lex_stuff = NULL;
7376             }
7377             PL_expect = XOPERATOR;
7378             pl_yylval.opval = sawparens(words);
7379             TOKEN(QWLIST);
7380         }
7381
7382         case KEY_qq:
7383             s = scan_str(s,!!PL_madskills,FALSE);
7384             if (!s)
7385                 missingterm(NULL);
7386             pl_yylval.ival = OP_STRINGIFY;
7387             if (SvIVX(PL_lex_stuff) == '\'')
7388                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
7389             TERM(sublex_start());
7390
7391         case KEY_qr:
7392             s = scan_pat(s,OP_QR);
7393             TERM(sublex_start());
7394
7395         case KEY_qx:
7396             s = scan_str(s,!!PL_madskills,FALSE);
7397             if (!s)
7398                 missingterm(NULL);
7399             readpipe_override();
7400             TERM(sublex_start());
7401
7402         case KEY_return:
7403             OLDLOP(OP_RETURN);
7404
7405         case KEY_require:
7406             s = SKIPSPACE1(s);
7407             if (isDIGIT(*s)) {
7408                 s = force_version(s, FALSE);
7409             }
7410             else if (*s != 'v' || !isDIGIT(s[1])
7411                     || (s = force_version(s, TRUE), *s == 'v'))
7412             {
7413                 *PL_tokenbuf = '\0';
7414                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7415                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7416                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7417                 else if (*s == '<')
7418                     yyerror("<> should be quotes");
7419             }
7420             if (orig_keyword == KEY_require) {
7421                 orig_keyword = 0;
7422                 pl_yylval.ival = 1;
7423             }
7424             else
7425                 pl_yylval.ival = 0;
7426             PL_expect = XTERM;
7427             PL_bufptr = s;
7428             PL_last_uni = PL_oldbufptr;
7429             PL_last_lop_op = OP_REQUIRE;
7430             s = skipspace(s);
7431             return REPORT( (int)REQUIRE );
7432
7433         case KEY_reset:
7434             UNI(OP_RESET);
7435
7436         case KEY_redo:
7437             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7438             LOOPX(OP_REDO);
7439
7440         case KEY_rename:
7441             LOP(OP_RENAME,XTERM);
7442
7443         case KEY_rand:
7444             UNI(OP_RAND);
7445
7446         case KEY_rmdir:
7447             UNI(OP_RMDIR);
7448
7449         case KEY_rindex:
7450             LOP(OP_RINDEX,XTERM);
7451
7452         case KEY_read:
7453             LOP(OP_READ,XTERM);
7454
7455         case KEY_readdir:
7456             UNI(OP_READDIR);
7457
7458         case KEY_readline:
7459             UNIDOR(OP_READLINE);
7460
7461         case KEY_readpipe:
7462             UNIDOR(OP_BACKTICK);
7463
7464         case KEY_rewinddir:
7465             UNI(OP_REWINDDIR);
7466
7467         case KEY_recv:
7468             LOP(OP_RECV,XTERM);
7469
7470         case KEY_reverse:
7471             LOP(OP_REVERSE,XTERM);
7472
7473         case KEY_readlink:
7474             UNIDOR(OP_READLINK);
7475
7476         case KEY_ref:
7477             UNI(OP_REF);
7478
7479         case KEY_s:
7480             s = scan_subst(s);
7481             if (pl_yylval.opval)
7482                 TERM(sublex_start());
7483             else
7484                 TOKEN(1);       /* force error */
7485
7486         case KEY_say:
7487             checkcomma(s,PL_tokenbuf,"filehandle");
7488             LOP(OP_SAY,XREF);
7489
7490         case KEY_chomp:
7491             UNI(OP_CHOMP);
7492
7493         case KEY_scalar:
7494             UNI(OP_SCALAR);
7495
7496         case KEY_select:
7497             LOP(OP_SELECT,XTERM);
7498
7499         case KEY_seek:
7500             LOP(OP_SEEK,XTERM);
7501
7502         case KEY_semctl:
7503             LOP(OP_SEMCTL,XTERM);
7504
7505         case KEY_semget:
7506             LOP(OP_SEMGET,XTERM);
7507
7508         case KEY_semop:
7509             LOP(OP_SEMOP,XTERM);
7510
7511         case KEY_send:
7512             LOP(OP_SEND,XTERM);
7513
7514         case KEY_setpgrp:
7515             LOP(OP_SETPGRP,XTERM);
7516
7517         case KEY_setpriority:
7518             LOP(OP_SETPRIORITY,XTERM);
7519
7520         case KEY_sethostent:
7521             UNI(OP_SHOSTENT);
7522
7523         case KEY_setnetent:
7524             UNI(OP_SNETENT);
7525
7526         case KEY_setservent:
7527             UNI(OP_SSERVENT);
7528
7529         case KEY_setprotoent:
7530             UNI(OP_SPROTOENT);
7531
7532         case KEY_setpwent:
7533             FUN0(OP_SPWENT);
7534
7535         case KEY_setgrent:
7536             FUN0(OP_SGRENT);
7537
7538         case KEY_seekdir:
7539             LOP(OP_SEEKDIR,XTERM);
7540
7541         case KEY_setsockopt:
7542             LOP(OP_SSOCKOPT,XTERM);
7543
7544         case KEY_shift:
7545             UNIDOR(OP_SHIFT);
7546
7547         case KEY_shmctl:
7548             LOP(OP_SHMCTL,XTERM);
7549
7550         case KEY_shmget:
7551             LOP(OP_SHMGET,XTERM);
7552
7553         case KEY_shmread:
7554             LOP(OP_SHMREAD,XTERM);
7555
7556         case KEY_shmwrite:
7557             LOP(OP_SHMWRITE,XTERM);
7558
7559         case KEY_shutdown:
7560             LOP(OP_SHUTDOWN,XTERM);
7561
7562         case KEY_sin:
7563             UNI(OP_SIN);
7564
7565         case KEY_sleep:
7566             UNI(OP_SLEEP);
7567
7568         case KEY_socket:
7569             LOP(OP_SOCKET,XTERM);
7570
7571         case KEY_socketpair:
7572             LOP(OP_SOCKPAIR,XTERM);
7573
7574         case KEY_sort:
7575             checkcomma(s,PL_tokenbuf,"subroutine name");
7576             s = SKIPSPACE1(s);
7577             if (*s == ';' || *s == ')')         /* probably a close */
7578                 Perl_croak(aTHX_ "sort is now a reserved word");
7579             PL_expect = XTERM;
7580             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7581             LOP(OP_SORT,XREF);
7582
7583         case KEY_split:
7584             LOP(OP_SPLIT,XTERM);
7585
7586         case KEY_sprintf:
7587             LOP(OP_SPRINTF,XTERM);
7588
7589         case KEY_splice:
7590             LOP(OP_SPLICE,XTERM);
7591
7592         case KEY_sqrt:
7593             UNI(OP_SQRT);
7594
7595         case KEY_srand:
7596             UNI(OP_SRAND);
7597
7598         case KEY_stat:
7599             UNI(OP_STAT);
7600
7601         case KEY_study:
7602             UNI(OP_STUDY);
7603
7604         case KEY_substr:
7605             LOP(OP_SUBSTR,XTERM);
7606
7607         case KEY_format:
7608         case KEY_sub:
7609           really_sub:
7610             {
7611                 char tmpbuf[sizeof PL_tokenbuf];
7612                 SSize_t tboffset = 0;
7613                 expectation attrful;
7614                 bool have_name, have_proto;
7615                 const int key = tmp;
7616
7617 #ifdef PERL_MAD
7618                 SV *tmpwhite = 0;
7619
7620                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7621                 SV *subtoken = newSVpvn(tstart, s - tstart);
7622                 PL_thistoken = 0;
7623
7624                 d = s;
7625                 s = SKIPSPACE2(s,tmpwhite);
7626 #else
7627                 s = skipspace(s);
7628 #endif
7629
7630                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7631                     (*s == ':' && s[1] == ':'))
7632                 {
7633 #ifdef PERL_MAD
7634                     SV *nametoke = NULL;
7635 #endif
7636
7637                     PL_expect = XBLOCK;
7638                     attrful = XATTRBLOCK;
7639                     /* remember buffer pos'n for later force_word */
7640                     tboffset = s - PL_oldbufptr;
7641                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7642 #ifdef PERL_MAD
7643                     if (PL_madskills)
7644                         nametoke = newSVpvn(s, d - s);
7645 #endif
7646                     if (memchr(tmpbuf, ':', len))
7647                         sv_setpvn(PL_subname, tmpbuf, len);
7648                     else {
7649                         sv_setsv(PL_subname,PL_curstname);
7650                         sv_catpvs(PL_subname,"::");
7651                         sv_catpvn(PL_subname,tmpbuf,len);
7652                     }
7653                     have_name = TRUE;
7654
7655 #ifdef PERL_MAD
7656
7657                     start_force(0);
7658                     CURMAD('X', nametoke);
7659                     CURMAD('_', tmpwhite);
7660                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7661                                       FALSE, TRUE, TRUE);
7662
7663                     s = SKIPSPACE2(d,tmpwhite);
7664 #else
7665                     s = skipspace(d);
7666 #endif
7667                 }
7668                 else {
7669                     if (key == KEY_my)
7670                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7671                     PL_expect = XTERMBLOCK;
7672                     attrful = XATTRTERM;
7673                     sv_setpvs(PL_subname,"?");
7674                     have_name = FALSE;
7675                 }
7676
7677                 if (key == KEY_format) {
7678                     if (*s == '=')
7679                         PL_lex_formbrack = PL_lex_brackets + 1;
7680 #ifdef PERL_MAD
7681                     PL_thistoken = subtoken;
7682                     s = d;
7683 #else
7684                     if (have_name)
7685                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7686                                           FALSE, TRUE, TRUE);
7687 #endif
7688                     OPERATOR(FORMAT);
7689                 }
7690
7691                 /* Look for a prototype */
7692                 if (*s == '(') {
7693                     char *p;
7694                     bool bad_proto = FALSE;
7695                     bool in_brackets = FALSE;
7696                     char greedy_proto = ' ';
7697                     bool proto_after_greedy_proto = FALSE;
7698                     bool must_be_last = FALSE;
7699                     bool underscore = FALSE;
7700                     bool seen_underscore = FALSE;
7701                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7702
7703                     s = scan_str(s,!!PL_madskills,FALSE);
7704                     if (!s)
7705                         Perl_croak(aTHX_ "Prototype not terminated");
7706                     /* strip spaces and check for bad characters */
7707                     d = SvPVX(PL_lex_stuff);
7708                     tmp = 0;
7709                     for (p = d; *p; ++p) {
7710                         if (!isSPACE(*p)) {
7711                             d[tmp++] = *p;
7712
7713                             if (warnillegalproto) {
7714                                 if (must_be_last)
7715                                     proto_after_greedy_proto = TRUE;
7716                                 if (!strchr("$@%*;[]&\\_", *p)) {
7717                                     bad_proto = TRUE;
7718                                 }
7719                                 else {
7720                                     if ( underscore ) {
7721                                         if ( *p != ';' )
7722                                             bad_proto = TRUE;
7723                                         underscore = FALSE;
7724                                     }
7725                                     if ( *p == '[' ) {
7726                                         in_brackets = TRUE;
7727                                     }
7728                                     else if ( *p == ']' ) {
7729                                         in_brackets = FALSE;
7730                                     }
7731                                     else if ( (*p == '@' || *p == '%') &&
7732                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
7733                                          !in_brackets ) {
7734                                         must_be_last = TRUE;
7735                                         greedy_proto = *p;
7736                                     }
7737                                     else if ( *p == '_' ) {
7738                                         underscore = seen_underscore = TRUE;
7739                                     }
7740                                 }
7741                             }
7742                         }
7743                     }
7744                     d[tmp] = '\0';
7745                     if (proto_after_greedy_proto)
7746                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7747                                     "Prototype after '%c' for %"SVf" : %s",
7748                                     greedy_proto, SVfARG(PL_subname), d);
7749                     if (bad_proto)
7750                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7751                                     "Illegal character %sin prototype for %"SVf" : %s",
7752                                     seen_underscore ? "after '_' " : "",
7753                                     SVfARG(PL_subname), d);
7754                     SvCUR_set(PL_lex_stuff, tmp);
7755                     have_proto = TRUE;
7756
7757 #ifdef PERL_MAD
7758                     start_force(0);
7759                     CURMAD('q', PL_thisopen);
7760                     CURMAD('_', tmpwhite);
7761                     CURMAD('=', PL_thisstuff);
7762                     CURMAD('Q', PL_thisclose);
7763                     NEXTVAL_NEXTTOKE.opval =
7764                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7765                     PL_lex_stuff = NULL;
7766                     force_next(THING);
7767
7768                     s = SKIPSPACE2(s,tmpwhite);
7769 #else
7770                     s = skipspace(s);
7771 #endif
7772                 }
7773                 else
7774                     have_proto = FALSE;
7775
7776                 if (*s == ':' && s[1] != ':')
7777                     PL_expect = attrful;
7778                 else if (*s != '{' && key == KEY_sub) {
7779                     if (!have_name)
7780                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7781                     else if (*s != ';' && *s != '}')
7782                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7783                 }
7784
7785 #ifdef PERL_MAD
7786                 start_force(0);
7787                 if (tmpwhite) {
7788                     if (PL_madskills)
7789                         curmad('^', newSVpvs(""));
7790                     CURMAD('_', tmpwhite);
7791                 }
7792                 force_next(0);
7793
7794                 PL_thistoken = subtoken;
7795 #else
7796                 if (have_proto) {
7797                     NEXTVAL_NEXTTOKE.opval =
7798                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7799                     PL_lex_stuff = NULL;
7800                     force_next(THING);
7801                 }
7802 #endif
7803                 if (!have_name) {
7804                     if (PL_curstash)
7805                         sv_setpvs(PL_subname, "__ANON__");
7806                     else
7807                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7808                     TOKEN(ANONSUB);
7809                 }
7810 #ifndef PERL_MAD
7811                 (void) force_word(PL_oldbufptr + tboffset, WORD,
7812                                   FALSE, TRUE, TRUE);
7813 #endif
7814                 if (key == KEY_my)
7815                     TOKEN(MYSUB);
7816                 TOKEN(SUB);
7817             }
7818
7819         case KEY_system:
7820             LOP(OP_SYSTEM,XREF);
7821
7822         case KEY_symlink:
7823             LOP(OP_SYMLINK,XTERM);
7824
7825         case KEY_syscall:
7826             LOP(OP_SYSCALL,XTERM);
7827
7828         case KEY_sysopen:
7829             LOP(OP_SYSOPEN,XTERM);
7830
7831         case KEY_sysseek:
7832             LOP(OP_SYSSEEK,XTERM);
7833
7834         case KEY_sysread:
7835             LOP(OP_SYSREAD,XTERM);
7836
7837         case KEY_syswrite:
7838             LOP(OP_SYSWRITE,XTERM);
7839
7840         case KEY_tr:
7841             s = scan_trans(s);
7842             TERM(sublex_start());
7843
7844         case KEY_tell:
7845             UNI(OP_TELL);
7846
7847         case KEY_telldir:
7848             UNI(OP_TELLDIR);
7849
7850         case KEY_tie:
7851             LOP(OP_TIE,XTERM);
7852
7853         case KEY_tied:
7854             UNI(OP_TIED);
7855
7856         case KEY_time:
7857             FUN0(OP_TIME);
7858
7859         case KEY_times:
7860             FUN0(OP_TMS);
7861
7862         case KEY_truncate:
7863             LOP(OP_TRUNCATE,XTERM);
7864
7865         case KEY_uc:
7866             UNI(OP_UC);
7867
7868         case KEY_ucfirst:
7869             UNI(OP_UCFIRST);
7870
7871         case KEY_untie:
7872             UNI(OP_UNTIE);
7873
7874         case KEY_until:
7875             pl_yylval.ival = CopLINE(PL_curcop);
7876             OPERATOR(UNTIL);
7877
7878         case KEY_unless:
7879             pl_yylval.ival = CopLINE(PL_curcop);
7880             OPERATOR(UNLESS);
7881
7882         case KEY_unlink:
7883             LOP(OP_UNLINK,XTERM);
7884
7885         case KEY_undef:
7886             UNIDOR(OP_UNDEF);
7887
7888         case KEY_unpack:
7889             LOP(OP_UNPACK,XTERM);
7890
7891         case KEY_utime:
7892             LOP(OP_UTIME,XTERM);
7893
7894         case KEY_umask:
7895             UNIDOR(OP_UMASK);
7896
7897         case KEY_unshift:
7898             LOP(OP_UNSHIFT,XTERM);
7899
7900         case KEY_use:
7901             s = tokenize_use(1, s);
7902             OPERATOR(USE);
7903
7904         case KEY_values:
7905             UNI(OP_VALUES);
7906
7907         case KEY_vec:
7908             LOP(OP_VEC,XTERM);
7909
7910         case KEY_when:
7911             pl_yylval.ival = CopLINE(PL_curcop);
7912             OPERATOR(WHEN);
7913
7914         case KEY_while:
7915             pl_yylval.ival = CopLINE(PL_curcop);
7916             OPERATOR(WHILE);
7917
7918         case KEY_warn:
7919             PL_hints |= HINT_BLOCK_SCOPE;
7920             LOP(OP_WARN,XTERM);
7921
7922         case KEY_wait:
7923             FUN0(OP_WAIT);
7924
7925         case KEY_waitpid:
7926             LOP(OP_WAITPID,XTERM);
7927
7928         case KEY_wantarray:
7929             FUN0(OP_WANTARRAY);
7930
7931         case KEY_write:
7932 #ifdef EBCDIC
7933         {
7934             char ctl_l[2];
7935             ctl_l[0] = toCTRL('L');
7936             ctl_l[1] = '\0';
7937             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7938         }
7939 #else
7940             /* Make sure $^L is defined */
7941             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7942 #endif
7943             UNI(OP_ENTERWRITE);
7944
7945         case KEY_x:
7946             if (PL_expect == XOPERATOR)
7947                 Mop(OP_REPEAT);
7948             check_uni();
7949             goto just_a_word;
7950
7951         case KEY_xor:
7952             pl_yylval.ival = OP_XOR;
7953             OPERATOR(OROP);
7954
7955         case KEY_y:
7956             s = scan_trans(s);
7957             TERM(sublex_start());
7958         }
7959     }}
7960 }
7961 #ifdef __SC__
7962 #pragma segment Main
7963 #endif
7964
7965 static int
7966 S_pending_ident(pTHX)
7967 {
7968     dVAR;
7969     register char *d;
7970     PADOFFSET tmp = 0;
7971     /* pit holds the identifier we read and pending_ident is reset */
7972     char pit = PL_pending_ident;
7973     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7974     /* All routes through this function want to know if there is a colon.  */
7975     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7976     PL_pending_ident = 0;
7977
7978     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7979     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7980           "### Pending identifier '%s'\n", PL_tokenbuf); });
7981
7982     /* if we're in a my(), we can't allow dynamics here.
7983        $foo'bar has already been turned into $foo::bar, so
7984        just check for colons.
7985
7986        if it's a legal name, the OP is a PADANY.
7987     */
7988     if (PL_in_my) {
7989         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
7990             if (has_colon)
7991                 yyerror(Perl_form(aTHX_ "No package name allowed for "
7992                                   "variable %s in \"our\"",
7993                                   PL_tokenbuf));
7994             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7995         }
7996         else {
7997             if (has_colon)
7998                 yyerror(Perl_form(aTHX_ PL_no_myglob,
7999                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8000
8001             pl_yylval.opval = newOP(OP_PADANY, 0);
8002             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8003             return PRIVATEREF;
8004         }
8005     }
8006
8007     /*
8008        build the ops for accesses to a my() variable.
8009
8010        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8011        then used in a comparison.  This catches most, but not
8012        all cases.  For instance, it catches
8013            sort { my($a); $a <=> $b }
8014        but not
8015            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8016        (although why you'd do that is anyone's guess).
8017     */
8018
8019     if (!has_colon) {
8020         if (!PL_in_my)
8021             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8022         if (tmp != NOT_IN_PAD) {
8023             /* might be an "our" variable" */
8024             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8025                 /* build ops for a bareword */
8026                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8027                 HEK * const stashname = HvNAME_HEK(stash);
8028                 SV *  const sym = newSVhek(stashname);
8029                 sv_catpvs(sym, "::");
8030                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8031                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8032                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8033                 gv_fetchsv(sym,
8034                     (PL_in_eval
8035                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8036                         : GV_ADDMULTI
8037                     ),
8038                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8039                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8040                      : SVt_PVHV));
8041                 return WORD;
8042             }
8043
8044             /* if it's a sort block and they're naming $a or $b */
8045             if (PL_last_lop_op == OP_SORT &&
8046                 PL_tokenbuf[0] == '$' &&
8047                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8048                 && !PL_tokenbuf[2])
8049             {
8050                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8051                      d < PL_bufend && *d != '\n';
8052                      d++)
8053                 {
8054                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8055                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8056                               PL_tokenbuf);
8057                     }
8058                 }
8059             }
8060
8061             pl_yylval.opval = newOP(OP_PADANY, 0);
8062             pl_yylval.opval->op_targ = tmp;
8063             return PRIVATEREF;
8064         }
8065     }
8066
8067     /*
8068        Whine if they've said @foo in a doublequoted string,
8069        and @foo isn't a variable we can find in the symbol
8070        table.
8071     */
8072     if (ckWARN(WARN_AMBIGUOUS) &&
8073         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8074         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8075                                          SVt_PVAV);
8076         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8077                 /* DO NOT warn for @- and @+ */
8078                 && !( PL_tokenbuf[2] == '\0' &&
8079                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8080            )
8081         {
8082             /* Downgraded from fatal to warning 20000522 mjd */
8083             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8084                         "Possible unintended interpolation of %s in string",
8085                         PL_tokenbuf);
8086         }
8087     }
8088
8089     /* build ops for a bareword */
8090     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8091                                                       tokenbuf_len - 1));
8092     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8093     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8094                      PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8095                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8096                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8097                       : SVt_PVHV));
8098     return WORD;
8099 }
8100
8101 /*
8102  *  The following code was generated by perl_keyword.pl.
8103  */
8104
8105 I32
8106 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
8107 {
8108     dVAR;
8109
8110     PERL_ARGS_ASSERT_KEYWORD;
8111
8112   switch (len)
8113   {
8114     case 1: /* 5 tokens of length 1 */
8115       switch (name[0])
8116       {
8117         case 'm':
8118           {                                       /* m          */
8119             return KEY_m;
8120           }
8121
8122         case 'q':
8123           {                                       /* q          */
8124             return KEY_q;
8125           }
8126
8127         case 's':
8128           {                                       /* s          */
8129             return KEY_s;
8130           }
8131
8132         case 'x':
8133           {                                       /* x          */
8134             return -KEY_x;
8135           }
8136
8137         case 'y':
8138           {                                       /* y          */
8139             return KEY_y;
8140           }
8141
8142         default:
8143           goto unknown;
8144       }
8145
8146     case 2: /* 18 tokens of length 2 */
8147       switch (name[0])
8148       {
8149         case 'd':
8150           if (name[1] == 'o')
8151           {                                       /* do         */
8152             return KEY_do;
8153           }
8154
8155           goto unknown;
8156
8157         case 'e':
8158           if (name[1] == 'q')
8159           {                                       /* eq         */
8160             return -KEY_eq;
8161           }
8162
8163           goto unknown;
8164
8165         case 'g':
8166           switch (name[1])
8167           {
8168             case 'e':
8169               {                                   /* ge         */
8170                 return -KEY_ge;
8171               }
8172
8173             case 't':
8174               {                                   /* gt         */
8175                 return -KEY_gt;
8176               }
8177
8178             default:
8179               goto unknown;
8180           }
8181
8182         case 'i':
8183           if (name[1] == 'f')
8184           {                                       /* if         */
8185             return KEY_if;
8186           }
8187
8188           goto unknown;
8189
8190         case 'l':
8191           switch (name[1])
8192           {
8193             case 'c':
8194               {                                   /* lc         */
8195                 return -KEY_lc;
8196               }
8197
8198             case 'e':
8199               {                                   /* le         */
8200                 return -KEY_le;
8201               }
8202
8203             case 't':
8204               {                                   /* lt         */
8205                 return -KEY_lt;
8206               }
8207
8208             default:
8209               goto unknown;
8210           }
8211
8212         case 'm':
8213           if (name[1] == 'y')
8214           {                                       /* my         */
8215             return KEY_my;
8216           }
8217
8218           goto unknown;
8219
8220         case 'n':
8221           switch (name[1])
8222           {
8223             case 'e':
8224               {                                   /* ne         */
8225                 return -KEY_ne;
8226               }
8227
8228             case 'o':
8229               {                                   /* no         */
8230                 return KEY_no;
8231               }
8232
8233             default:
8234               goto unknown;
8235           }
8236
8237         case 'o':
8238           if (name[1] == 'r')
8239           {                                       /* or         */
8240             return -KEY_or;
8241           }
8242
8243           goto unknown;
8244
8245         case 'q':
8246           switch (name[1])
8247           {
8248             case 'q':
8249               {                                   /* qq         */
8250                 return KEY_qq;
8251               }
8252
8253             case 'r':
8254               {                                   /* qr         */
8255                 return KEY_qr;
8256               }
8257
8258             case 'w':
8259               {                                   /* qw         */
8260                 return KEY_qw;
8261               }
8262
8263             case 'x':
8264               {                                   /* qx         */
8265                 return KEY_qx;
8266               }
8267
8268             default:
8269               goto unknown;
8270           }
8271
8272         case 't':
8273           if (name[1] == 'r')
8274           {                                       /* tr         */
8275             return KEY_tr;
8276           }
8277
8278           goto unknown;
8279
8280         case 'u':
8281           if (name[1] == 'c')
8282           {                                       /* uc         */
8283             return -KEY_uc;
8284           }
8285
8286           goto unknown;
8287
8288         default:
8289           goto unknown;
8290       }
8291
8292     case 3: /* 29 tokens of length 3 */
8293       switch (name[0])
8294       {
8295         case 'E':
8296           if (name[1] == 'N' &&
8297               name[2] == 'D')
8298           {                                       /* END        */
8299             return KEY_END;
8300           }
8301
8302           goto unknown;
8303
8304         case 'a':
8305           switch (name[1])
8306           {
8307             case 'b':
8308               if (name[2] == 's')
8309               {                                   /* abs        */
8310                 return -KEY_abs;
8311               }
8312
8313               goto unknown;
8314
8315             case 'n':
8316               if (name[2] == 'd')
8317               {                                   /* and        */
8318                 return -KEY_and;
8319               }
8320
8321               goto unknown;
8322
8323             default:
8324               goto unknown;
8325           }
8326
8327         case 'c':
8328           switch (name[1])
8329           {
8330             case 'h':
8331               if (name[2] == 'r')
8332               {                                   /* chr        */
8333                 return -KEY_chr;
8334               }
8335
8336               goto unknown;
8337
8338             case 'm':
8339               if (name[2] == 'p')
8340               {                                   /* cmp        */
8341                 return -KEY_cmp;
8342               }
8343
8344               goto unknown;
8345
8346             case 'o':
8347               if (name[2] == 's')
8348               {                                   /* cos        */
8349                 return -KEY_cos;
8350               }
8351
8352               goto unknown;
8353
8354             default:
8355               goto unknown;
8356           }
8357
8358         case 'd':
8359           if (name[1] == 'i' &&
8360               name[2] == 'e')
8361           {                                       /* die        */
8362             return -KEY_die;
8363           }
8364
8365           goto unknown;
8366
8367         case 'e':
8368           switch (name[1])
8369           {
8370             case 'o':
8371               if (name[2] == 'f')
8372               {                                   /* eof        */
8373                 return -KEY_eof;
8374               }
8375
8376               goto unknown;
8377
8378             case 'x':
8379               if (name[2] == 'p')
8380               {                                   /* exp        */
8381                 return -KEY_exp;
8382               }
8383
8384               goto unknown;
8385
8386             default:
8387               goto unknown;
8388           }
8389
8390         case 'f':
8391           if (name[1] == 'o' &&
8392               name[2] == 'r')
8393           {                                       /* for        */
8394             return KEY_for;
8395           }
8396
8397           goto unknown;
8398
8399         case 'h':
8400           if (name[1] == 'e' &&
8401               name[2] == 'x')
8402           {                                       /* hex        */
8403             return -KEY_hex;
8404           }
8405
8406           goto unknown;
8407
8408         case 'i':
8409           if (name[1] == 'n' &&
8410               name[2] == 't')
8411           {                                       /* int        */
8412             return -KEY_int;
8413           }
8414
8415           goto unknown;
8416
8417         case 'l':
8418           if (name[1] == 'o' &&
8419               name[2] == 'g')
8420           {                                       /* log        */
8421             return -KEY_log;
8422           }
8423
8424           goto unknown;
8425
8426         case 'm':
8427           if (name[1] == 'a' &&
8428               name[2] == 'p')
8429           {                                       /* map        */
8430             return KEY_map;
8431           }
8432
8433           goto unknown;
8434
8435         case 'n':
8436           if (name[1] == 'o' &&
8437               name[2] == 't')
8438           {                                       /* not        */
8439             return -KEY_not;
8440           }
8441
8442           goto unknown;
8443
8444         case 'o':
8445           switch (name[1])
8446           {
8447             case 'c':
8448               if (name[2] == 't')
8449               {                                   /* oct        */
8450                 return -KEY_oct;
8451               }
8452
8453               goto unknown;
8454
8455             case 'r':
8456               if (name[2] == 'd')
8457               {                                   /* ord        */
8458                 return -KEY_ord;
8459               }
8460
8461               goto unknown;
8462
8463             case 'u':
8464               if (name[2] == 'r')
8465               {                                   /* our        */
8466                 return KEY_our;
8467               }
8468
8469               goto unknown;
8470
8471             default:
8472               goto unknown;
8473           }
8474
8475         case 'p':
8476           if (name[1] == 'o')
8477           {
8478             switch (name[2])
8479             {
8480               case 'p':
8481                 {                                 /* pop        */
8482                   return -KEY_pop;
8483                 }
8484
8485               case 's':
8486                 {                                 /* pos        */
8487                   return KEY_pos;
8488                 }
8489
8490               default:
8491                 goto unknown;
8492             }
8493           }
8494
8495           goto unknown;
8496
8497         case 'r':
8498           if (name[1] == 'e' &&
8499               name[2] == 'f')
8500           {                                       /* ref        */
8501             return -KEY_ref;
8502           }
8503
8504           goto unknown;
8505
8506         case 's':
8507           switch (name[1])
8508           {
8509             case 'a':
8510               if (name[2] == 'y')
8511               {                                   /* say        */
8512                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8513               }
8514
8515               goto unknown;
8516
8517             case 'i':
8518               if (name[2] == 'n')
8519               {                                   /* sin        */
8520                 return -KEY_sin;
8521               }
8522
8523               goto unknown;
8524
8525             case 'u':
8526               if (name[2] == 'b')
8527               {                                   /* sub        */
8528                 return KEY_sub;
8529               }
8530
8531               goto unknown;
8532
8533             default:
8534               goto unknown;
8535           }
8536
8537         case 't':
8538           if (name[1] == 'i' &&
8539               name[2] == 'e')
8540           {                                       /* tie        */
8541             return -KEY_tie;
8542           }
8543
8544           goto unknown;
8545
8546         case 'u':
8547           if (name[1] == 's' &&
8548               name[2] == 'e')
8549           {                                       /* use        */
8550             return KEY_use;
8551           }
8552
8553           goto unknown;
8554
8555         case 'v':
8556           if (name[1] == 'e' &&
8557               name[2] == 'c')
8558           {                                       /* vec        */
8559             return -KEY_vec;
8560           }
8561
8562           goto unknown;
8563
8564         case 'x':
8565           if (name[1] == 'o' &&
8566               name[2] == 'r')
8567           {                                       /* xor        */
8568             return -KEY_xor;
8569           }
8570
8571           goto unknown;
8572
8573         default:
8574           goto unknown;
8575       }
8576
8577     case 4: /* 41 tokens of length 4 */
8578       switch (name[0])
8579       {
8580         case 'C':
8581           if (name[1] == 'O' &&
8582               name[2] == 'R' &&
8583               name[3] == 'E')
8584           {                                       /* CORE       */
8585             return -KEY_CORE;
8586           }
8587
8588           goto unknown;
8589
8590         case 'I':
8591           if (name[1] == 'N' &&
8592               name[2] == 'I' &&
8593               name[3] == 'T')
8594           {                                       /* INIT       */
8595             return KEY_INIT;
8596           }
8597
8598           goto unknown;
8599
8600         case 'b':
8601           if (name[1] == 'i' &&
8602               name[2] == 'n' &&
8603               name[3] == 'd')
8604           {                                       /* bind       */
8605             return -KEY_bind;
8606           }
8607
8608           goto unknown;
8609
8610         case 'c':
8611           if (name[1] == 'h' &&
8612               name[2] == 'o' &&
8613               name[3] == 'p')
8614           {                                       /* chop       */
8615             return -KEY_chop;
8616           }
8617
8618           goto unknown;
8619
8620         case 'd':
8621           if (name[1] == 'u' &&
8622               name[2] == 'm' &&
8623               name[3] == 'p')
8624           {                                       /* dump       */
8625             return -KEY_dump;
8626           }
8627
8628           goto unknown;
8629
8630         case 'e':
8631           switch (name[1])
8632           {
8633             case 'a':
8634               if (name[2] == 'c' &&
8635                   name[3] == 'h')
8636               {                                   /* each       */
8637                 return -KEY_each;
8638               }
8639
8640               goto unknown;
8641
8642             case 'l':
8643               if (name[2] == 's' &&
8644                   name[3] == 'e')
8645               {                                   /* else       */
8646                 return KEY_else;
8647               }
8648
8649               goto unknown;
8650
8651             case 'v':
8652               if (name[2] == 'a' &&
8653                   name[3] == 'l')
8654               {                                   /* eval       */
8655                 return KEY_eval;
8656               }
8657
8658               goto unknown;
8659
8660             case 'x':
8661               switch (name[2])
8662               {
8663                 case 'e':
8664                   if (name[3] == 'c')
8665                   {                               /* exec       */
8666                     return -KEY_exec;
8667                   }
8668
8669                   goto unknown;
8670
8671                 case 'i':
8672                   if (name[3] == 't')
8673                   {                               /* exit       */
8674                     return -KEY_exit;
8675                   }
8676
8677                   goto unknown;
8678
8679                 default:
8680                   goto unknown;
8681               }
8682
8683             default:
8684               goto unknown;
8685           }
8686
8687         case 'f':
8688           if (name[1] == 'o' &&
8689               name[2] == 'r' &&
8690               name[3] == 'k')
8691           {                                       /* fork       */
8692             return -KEY_fork;
8693           }
8694
8695           goto unknown;
8696
8697         case 'g':
8698           switch (name[1])
8699           {
8700             case 'e':
8701               if (name[2] == 't' &&
8702                   name[3] == 'c')
8703               {                                   /* getc       */
8704                 return -KEY_getc;
8705               }
8706
8707               goto unknown;
8708
8709             case 'l':
8710               if (name[2] == 'o' &&
8711                   name[3] == 'b')
8712               {                                   /* glob       */
8713                 return KEY_glob;
8714               }
8715
8716               goto unknown;
8717
8718             case 'o':
8719               if (name[2] == 't' &&
8720                   name[3] == 'o')
8721               {                                   /* goto       */
8722                 return KEY_goto;
8723               }
8724
8725               goto unknown;
8726
8727             case 'r':
8728               if (name[2] == 'e' &&
8729                   name[3] == 'p')
8730               {                                   /* grep       */
8731                 return KEY_grep;
8732               }
8733
8734               goto unknown;
8735
8736             default:
8737               goto unknown;
8738           }
8739
8740         case 'j':
8741           if (name[1] == 'o' &&
8742               name[2] == 'i' &&
8743               name[3] == 'n')
8744           {                                       /* join       */
8745             return -KEY_join;
8746           }
8747
8748           goto unknown;
8749
8750         case 'k':
8751           switch (name[1])
8752           {
8753             case 'e':
8754               if (name[2] == 'y' &&
8755                   name[3] == 's')
8756               {                                   /* keys       */
8757                 return -KEY_keys;
8758               }
8759
8760               goto unknown;
8761
8762             case 'i':
8763               if (name[2] == 'l' &&
8764                   name[3] == 'l')
8765               {                                   /* kill       */
8766                 return -KEY_kill;
8767               }
8768
8769               goto unknown;
8770
8771             default:
8772               goto unknown;
8773           }
8774
8775         case 'l':
8776           switch (name[1])
8777           {
8778             case 'a':
8779               if (name[2] == 's' &&
8780                   name[3] == 't')
8781               {                                   /* last       */
8782                 return KEY_last;
8783               }
8784
8785               goto unknown;
8786
8787             case 'i':
8788               if (name[2] == 'n' &&
8789                   name[3] == 'k')
8790               {                                   /* link       */
8791                 return -KEY_link;
8792               }
8793
8794               goto unknown;
8795
8796             case 'o':
8797               if (name[2] == 'c' &&
8798                   name[3] == 'k')
8799               {                                   /* lock       */
8800                 return -KEY_lock;
8801               }
8802
8803               goto unknown;
8804
8805             default:
8806               goto unknown;
8807           }
8808
8809         case 'n':
8810           if (name[1] == 'e' &&
8811               name[2] == 'x' &&
8812               name[3] == 't')
8813           {                                       /* next       */
8814             return KEY_next;
8815           }
8816
8817           goto unknown;
8818
8819         case 'o':
8820           if (name[1] == 'p' &&
8821               name[2] == 'e' &&
8822               name[3] == 'n')
8823           {                                       /* open       */
8824             return -KEY_open;
8825           }
8826
8827           goto unknown;
8828
8829         case 'p':
8830           switch (name[1])
8831           {
8832             case 'a':
8833               if (name[2] == 'c' &&
8834                   name[3] == 'k')
8835               {                                   /* pack       */
8836                 return -KEY_pack;
8837               }
8838
8839               goto unknown;
8840
8841             case 'i':
8842               if (name[2] == 'p' &&
8843                   name[3] == 'e')
8844               {                                   /* pipe       */
8845                 return -KEY_pipe;
8846               }
8847
8848               goto unknown;
8849
8850             case 'u':
8851               if (name[2] == 's' &&
8852                   name[3] == 'h')
8853               {                                   /* push       */
8854                 return -KEY_push;
8855               }
8856
8857               goto unknown;
8858
8859             default:
8860               goto unknown;
8861           }
8862
8863         case 'r':
8864           switch (name[1])
8865           {
8866             case 'a':
8867               if (name[2] == 'n' &&
8868                   name[3] == 'd')
8869               {                                   /* rand       */
8870                 return -KEY_rand;
8871               }
8872
8873               goto unknown;
8874
8875             case 'e':
8876               switch (name[2])
8877               {
8878                 case 'a':
8879                   if (name[3] == 'd')
8880                   {                               /* read       */
8881                     return -KEY_read;
8882                   }
8883
8884                   goto unknown;
8885
8886                 case 'c':
8887                   if (name[3] == 'v')
8888                   {                               /* recv       */
8889                     return -KEY_recv;
8890                   }
8891
8892                   goto unknown;
8893
8894                 case 'd':
8895                   if (name[3] == 'o')
8896                   {                               /* redo       */
8897                     return KEY_redo;
8898                   }
8899
8900                   goto unknown;
8901
8902                 default:
8903                   goto unknown;
8904               }
8905
8906             default:
8907               goto unknown;
8908           }
8909
8910         case 's':
8911           switch (name[1])
8912           {
8913             case 'e':
8914               switch (name[2])
8915               {
8916                 case 'e':
8917                   if (name[3] == 'k')
8918                   {                               /* seek       */
8919                     return -KEY_seek;
8920                   }
8921
8922                   goto unknown;
8923
8924                 case 'n':
8925                   if (name[3] == 'd')
8926                   {                               /* send       */
8927                     return -KEY_send;
8928                   }
8929
8930                   goto unknown;
8931
8932                 default:
8933                   goto unknown;
8934               }
8935
8936             case 'o':
8937               if (name[2] == 'r' &&
8938                   name[3] == 't')
8939               {                                   /* sort       */
8940                 return KEY_sort;
8941               }
8942
8943               goto unknown;
8944
8945             case 'q':
8946               if (name[2] == 'r' &&
8947                   name[3] == 't')
8948               {                                   /* sqrt       */
8949                 return -KEY_sqrt;
8950               }
8951
8952               goto unknown;
8953
8954             case 't':
8955               if (name[2] == 'a' &&
8956                   name[3] == 't')
8957               {                                   /* stat       */
8958                 return -KEY_stat;
8959               }
8960
8961               goto unknown;
8962
8963             default:
8964               goto unknown;
8965           }
8966
8967         case 't':
8968           switch (name[1])
8969           {
8970             case 'e':
8971               if (name[2] == 'l' &&
8972                   name[3] == 'l')
8973               {                                   /* tell       */
8974                 return -KEY_tell;
8975               }
8976
8977               goto unknown;
8978
8979             case 'i':
8980               switch (name[2])
8981               {
8982                 case 'e':
8983                   if (name[3] == 'd')
8984                   {                               /* tied       */
8985                     return -KEY_tied;
8986                   }
8987
8988                   goto unknown;
8989
8990                 case 'm':
8991                   if (name[3] == 'e')
8992                   {                               /* time       */
8993                     return -KEY_time;
8994                   }
8995
8996                   goto unknown;
8997
8998                 default:
8999                   goto unknown;
9000               }
9001
9002             default:
9003               goto unknown;
9004           }
9005
9006         case 'w':
9007           switch (name[1])
9008           {
9009             case 'a':
9010               switch (name[2])
9011               {
9012                 case 'i':
9013                   if (name[3] == 't')
9014                   {                               /* wait       */
9015                     return -KEY_wait;
9016                   }
9017
9018                   goto unknown;
9019
9020                 case 'r':
9021                   if (name[3] == 'n')
9022                   {                               /* warn       */
9023                     return -KEY_warn;
9024                   }
9025
9026                   goto unknown;
9027
9028                 default:
9029                   goto unknown;
9030               }
9031
9032             case 'h':
9033               if (name[2] == 'e' &&
9034                   name[3] == 'n')
9035               {                                   /* when       */
9036                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
9037               }
9038
9039               goto unknown;
9040
9041             default:
9042               goto unknown;
9043           }
9044
9045         default:
9046           goto unknown;
9047       }
9048
9049     case 5: /* 39 tokens of length 5 */
9050       switch (name[0])
9051       {
9052         case 'B':
9053           if (name[1] == 'E' &&
9054               name[2] == 'G' &&
9055               name[3] == 'I' &&
9056               name[4] == 'N')
9057           {                                       /* BEGIN      */
9058             return KEY_BEGIN;
9059           }
9060
9061           goto unknown;
9062
9063         case 'C':
9064           if (name[1] == 'H' &&
9065               name[2] == 'E' &&
9066               name[3] == 'C' &&
9067               name[4] == 'K')
9068           {                                       /* CHECK      */
9069             return KEY_CHECK;
9070           }
9071
9072           goto unknown;
9073
9074         case 'a':
9075           switch (name[1])
9076           {
9077             case 'l':
9078               if (name[2] == 'a' &&
9079                   name[3] == 'r' &&
9080                   name[4] == 'm')
9081               {                                   /* alarm      */
9082                 return -KEY_alarm;
9083               }
9084
9085               goto unknown;
9086
9087             case 't':
9088               if (name[2] == 'a' &&
9089                   name[3] == 'n' &&
9090                   name[4] == '2')
9091               {                                   /* atan2      */
9092                 return -KEY_atan2;
9093               }
9094
9095               goto unknown;
9096
9097             default:
9098               goto unknown;
9099           }
9100
9101         case 'b':
9102           switch (name[1])
9103           {
9104             case 'l':
9105               if (name[2] == 'e' &&
9106                   name[3] == 's' &&
9107                   name[4] == 's')
9108               {                                   /* bless      */
9109                 return -KEY_bless;
9110               }
9111
9112               goto unknown;
9113
9114             case 'r':
9115               if (name[2] == 'e' &&
9116                   name[3] == 'a' &&
9117                   name[4] == 'k')
9118               {                                   /* break      */
9119                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
9120               }
9121
9122               goto unknown;
9123
9124             default:
9125               goto unknown;
9126           }
9127
9128         case 'c':
9129           switch (name[1])
9130           {
9131             case 'h':
9132               switch (name[2])
9133               {
9134                 case 'd':
9135                   if (name[3] == 'i' &&
9136                       name[4] == 'r')
9137                   {                               /* chdir      */
9138                     return -KEY_chdir;
9139                   }
9140
9141                   goto unknown;
9142
9143                 case 'm':
9144                   if (name[3] == 'o' &&
9145                       name[4] == 'd')
9146                   {                               /* chmod      */
9147                     return -KEY_chmod;
9148                   }
9149
9150                   goto unknown;
9151
9152                 case 'o':
9153                   switch (name[3])
9154                   {
9155                     case 'm':
9156                       if (name[4] == 'p')
9157                       {                           /* chomp      */
9158                         return -KEY_chomp;
9159                       }
9160
9161                       goto unknown;
9162
9163                     case 'w':
9164                       if (name[4] == 'n')
9165                       {                           /* chown      */
9166                         return -KEY_chown;
9167                       }
9168
9169                       goto unknown;
9170
9171                     default:
9172                       goto unknown;
9173                   }
9174
9175                 default:
9176                   goto unknown;
9177               }
9178
9179             case 'l':
9180               if (name[2] == 'o' &&
9181                   name[3] == 's' &&
9182                   name[4] == 'e')
9183               {                                   /* close      */
9184                 return -KEY_close;
9185               }
9186
9187               goto unknown;
9188
9189             case 'r':
9190               if (name[2] == 'y' &&
9191                   name[3] == 'p' &&
9192                   name[4] == 't')
9193               {                                   /* crypt      */
9194                 return -KEY_crypt;
9195               }
9196
9197               goto unknown;
9198
9199             default:
9200               goto unknown;
9201           }
9202
9203         case 'e':
9204           if (name[1] == 'l' &&
9205               name[2] == 's' &&
9206               name[3] == 'i' &&
9207               name[4] == 'f')
9208           {                                       /* elsif      */
9209             return KEY_elsif;
9210           }
9211
9212           goto unknown;
9213
9214         case 'f':
9215           switch (name[1])
9216           {
9217             case 'c':
9218               if (name[2] == 'n' &&
9219                   name[3] == 't' &&
9220                   name[4] == 'l')
9221               {                                   /* fcntl      */
9222                 return -KEY_fcntl;
9223               }
9224
9225               goto unknown;
9226
9227             case 'l':
9228               if (name[2] == 'o' &&
9229                   name[3] == 'c' &&
9230                   name[4] == 'k')
9231               {                                   /* flock      */
9232                 return -KEY_flock;
9233               }
9234
9235               goto unknown;
9236
9237             default:
9238               goto unknown;
9239           }
9240
9241         case 'g':
9242           if (name[1] == 'i' &&
9243               name[2] == 'v' &&
9244               name[3] == 'e' &&
9245               name[4] == 'n')
9246           {                                       /* given      */
9247             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
9248           }
9249
9250           goto unknown;
9251
9252         case 'i':
9253           switch (name[1])
9254           {
9255             case 'n':
9256               if (name[2] == 'd' &&
9257                   name[3] == 'e' &&
9258                   name[4] == 'x')
9259               {                                   /* index      */
9260                 return -KEY_index;
9261               }
9262
9263               goto unknown;
9264
9265             case 'o':
9266               if (name[2] == 'c' &&
9267                   name[3] == 't' &&
9268                   name[4] == 'l')
9269               {                                   /* ioctl      */
9270                 return -KEY_ioctl;
9271               }
9272
9273               goto unknown;
9274
9275             default:
9276               goto unknown;
9277           }
9278
9279         case 'l':
9280           switch (name[1])
9281           {
9282             case 'o':
9283               if (name[2] == 'c' &&
9284                   name[3] == 'a' &&
9285                   name[4] == 'l')
9286               {                                   /* local      */
9287                 return KEY_local;
9288               }
9289
9290               goto unknown;
9291
9292             case 's':
9293               if (name[2] == 't' &&
9294                   name[3] == 'a' &&
9295                   name[4] == 't')
9296               {                                   /* lstat      */
9297                 return -KEY_lstat;
9298               }
9299
9300               goto unknown;
9301
9302             default:
9303               goto unknown;
9304           }
9305
9306         case 'm':
9307           if (name[1] == 'k' &&
9308               name[2] == 'd' &&
9309               name[3] == 'i' &&
9310               name[4] == 'r')
9311           {                                       /* mkdir      */
9312             return -KEY_mkdir;
9313           }
9314
9315           goto unknown;
9316
9317         case 'p':
9318           if (name[1] == 'r' &&
9319               name[2] == 'i' &&
9320               name[3] == 'n' &&
9321               name[4] == 't')
9322           {                                       /* print      */
9323             return KEY_print;
9324           }
9325
9326           goto unknown;
9327
9328         case 'r':
9329           switch (name[1])
9330           {
9331             case 'e':
9332               if (name[2] == 's' &&
9333                   name[3] == 'e' &&
9334                   name[4] == 't')
9335               {                                   /* reset      */
9336                 return -KEY_reset;
9337               }
9338
9339               goto unknown;
9340
9341             case 'm':
9342               if (name[2] == 'd' &&
9343                   name[3] == 'i' &&
9344                   name[4] == 'r')
9345               {                                   /* rmdir      */
9346                 return -KEY_rmdir;
9347               }
9348
9349               goto unknown;
9350
9351             default:
9352               goto unknown;
9353           }
9354
9355         case 's':
9356           switch (name[1])
9357           {
9358             case 'e':
9359               if (name[2] == 'm' &&
9360                   name[3] == 'o' &&
9361                   name[4] == 'p')
9362               {                                   /* semop      */
9363                 return -KEY_semop;
9364               }
9365
9366               goto unknown;
9367
9368             case 'h':
9369               if (name[2] == 'i' &&
9370                   name[3] == 'f' &&
9371                   name[4] == 't')
9372               {                                   /* shift      */
9373                 return -KEY_shift;
9374               }
9375
9376               goto unknown;
9377
9378             case 'l':
9379               if (name[2] == 'e' &&
9380                   name[3] == 'e' &&
9381                   name[4] == 'p')
9382               {                                   /* sleep      */
9383                 return -KEY_sleep;
9384               }
9385
9386               goto unknown;
9387
9388             case 'p':
9389               if (name[2] == 'l' &&
9390                   name[3] == 'i' &&
9391                   name[4] == 't')
9392               {                                   /* split      */
9393                 return KEY_split;
9394               }
9395
9396               goto unknown;
9397
9398             case 'r':
9399               if (name[2] == 'a' &&
9400                   name[3] == 'n' &&
9401                   name[4] == 'd')
9402               {                                   /* srand      */
9403                 return -KEY_srand;
9404               }
9405
9406               goto unknown;
9407
9408             case 't':
9409               switch (name[2])
9410               {
9411                 case 'a':
9412                   if (name[3] == 't' &&
9413                       name[4] == 'e')
9414                   {                               /* state      */
9415                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9416                   }
9417
9418                   goto unknown;
9419
9420                 case 'u':
9421                   if (name[3] == 'd' &&
9422                       name[4] == 'y')
9423                   {                               /* study      */
9424                     return KEY_study;
9425                   }
9426
9427                   goto unknown;
9428
9429                 default:
9430                   goto unknown;
9431               }
9432
9433             default:
9434               goto unknown;
9435           }
9436
9437         case 't':
9438           if (name[1] == 'i' &&
9439               name[2] == 'm' &&
9440               name[3] == 'e' &&
9441               name[4] == 's')
9442           {                                       /* times      */
9443             return -KEY_times;
9444           }
9445
9446           goto unknown;
9447
9448         case 'u':
9449           switch (name[1])
9450           {
9451             case 'm':
9452               if (name[2] == 'a' &&
9453                   name[3] == 's' &&
9454                   name[4] == 'k')
9455               {                                   /* umask      */
9456                 return -KEY_umask;
9457               }
9458
9459               goto unknown;
9460
9461             case 'n':
9462               switch (name[2])
9463               {
9464                 case 'd':
9465                   if (name[3] == 'e' &&
9466                       name[4] == 'f')
9467                   {                               /* undef      */
9468                     return KEY_undef;
9469                   }
9470
9471                   goto unknown;
9472
9473                 case 't':
9474                   if (name[3] == 'i')
9475                   {
9476                     switch (name[4])
9477                     {
9478                       case 'e':
9479                         {                         /* untie      */
9480                           return -KEY_untie;
9481                         }
9482
9483                       case 'l':
9484                         {                         /* until      */
9485                           return KEY_until;
9486                         }
9487
9488                       default:
9489                         goto unknown;
9490                     }
9491                   }
9492
9493                   goto unknown;
9494
9495                 default:
9496                   goto unknown;
9497               }
9498
9499             case 't':
9500               if (name[2] == 'i' &&
9501                   name[3] == 'm' &&
9502                   name[4] == 'e')
9503               {                                   /* utime      */
9504                 return -KEY_utime;
9505               }
9506
9507               goto unknown;
9508
9509             default:
9510               goto unknown;
9511           }
9512
9513         case 'w':
9514           switch (name[1])
9515           {
9516             case 'h':
9517               if (name[2] == 'i' &&
9518                   name[3] == 'l' &&
9519                   name[4] == 'e')
9520               {                                   /* while      */
9521                 return KEY_while;
9522               }
9523
9524               goto unknown;
9525
9526             case 'r':
9527               if (name[2] == 'i' &&
9528                   name[3] == 't' &&
9529                   name[4] == 'e')
9530               {                                   /* write      */
9531                 return -KEY_write;
9532               }
9533
9534               goto unknown;
9535
9536             default:
9537               goto unknown;
9538           }
9539
9540         default:
9541           goto unknown;
9542       }
9543
9544     case 6: /* 33 tokens of length 6 */
9545       switch (name[0])
9546       {
9547         case 'a':
9548           if (name[1] == 'c' &&
9549               name[2] == 'c' &&
9550               name[3] == 'e' &&
9551               name[4] == 'p' &&
9552               name[5] == 't')
9553           {                                       /* accept     */
9554             return -KEY_accept;
9555           }
9556
9557           goto unknown;
9558
9559         case 'c':
9560           switch (name[1])
9561           {
9562             case 'a':
9563               if (name[2] == 'l' &&
9564                   name[3] == 'l' &&
9565                   name[4] == 'e' &&
9566                   name[5] == 'r')
9567               {                                   /* caller     */
9568                 return -KEY_caller;
9569               }
9570
9571               goto unknown;
9572
9573             case 'h':
9574               if (name[2] == 'r' &&
9575                   name[3] == 'o' &&
9576                   name[4] == 'o' &&
9577                   name[5] == 't')
9578               {                                   /* chroot     */
9579                 return -KEY_chroot;
9580               }
9581
9582               goto unknown;
9583
9584             default:
9585               goto unknown;
9586           }
9587
9588         case 'd':
9589           if (name[1] == 'e' &&
9590               name[2] == 'l' &&
9591               name[3] == 'e' &&
9592               name[4] == 't' &&
9593               name[5] == 'e')
9594           {                                       /* delete     */
9595             return KEY_delete;
9596           }
9597
9598           goto unknown;
9599
9600         case 'e':
9601           switch (name[1])
9602           {
9603             case 'l':
9604               if (name[2] == 's' &&
9605                   name[3] == 'e' &&
9606                   name[4] == 'i' &&
9607                   name[5] == 'f')
9608               {                                   /* elseif     */
9609                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9610               }
9611
9612               goto unknown;
9613
9614             case 'x':
9615               if (name[2] == 'i' &&
9616                   name[3] == 's' &&
9617                   name[4] == 't' &&
9618                   name[5] == 's')
9619               {                                   /* exists     */
9620                 return KEY_exists;
9621               }
9622
9623               goto unknown;
9624
9625             default:
9626               goto unknown;
9627           }
9628
9629         case 'f':
9630           switch (name[1])
9631           {
9632             case 'i':
9633               if (name[2] == 'l' &&
9634                   name[3] == 'e' &&
9635                   name[4] == 'n' &&
9636                   name[5] == 'o')
9637               {                                   /* fileno     */
9638                 return -KEY_fileno;
9639               }
9640
9641               goto unknown;
9642
9643             case 'o':
9644               if (name[2] == 'r' &&
9645                   name[3] == 'm' &&
9646                   name[4] == 'a' &&
9647                   name[5] == 't')
9648               {                                   /* format     */
9649                 return KEY_format;
9650               }
9651
9652               goto unknown;
9653
9654             default:
9655               goto unknown;
9656           }
9657
9658         case 'g':
9659           if (name[1] == 'm' &&
9660               name[2] == 't' &&
9661               name[3] == 'i' &&
9662               name[4] == 'm' &&
9663               name[5] == 'e')
9664           {                                       /* gmtime     */
9665             return -KEY_gmtime;
9666           }
9667
9668           goto unknown;
9669
9670         case 'l':
9671           switch (name[1])
9672           {
9673             case 'e':
9674               if (name[2] == 'n' &&
9675                   name[3] == 'g' &&
9676                   name[4] == 't' &&
9677                   name[5] == 'h')
9678               {                                   /* length     */
9679                 return -KEY_length;
9680               }
9681
9682               goto unknown;
9683
9684             case 'i':
9685               if (name[2] == 's' &&
9686                   name[3] == 't' &&
9687                   name[4] == 'e' &&
9688                   name[5] == 'n')
9689               {                                   /* listen     */
9690                 return -KEY_listen;
9691               }
9692
9693               goto unknown;
9694
9695             default:
9696               goto unknown;
9697           }
9698
9699         case 'm':
9700           if (name[1] == 's' &&
9701               name[2] == 'g')
9702           {
9703             switch (name[3])
9704             {
9705               case 'c':
9706                 if (name[4] == 't' &&
9707                     name[5] == 'l')
9708                 {                                 /* msgctl     */
9709                   return -KEY_msgctl;
9710                 }
9711
9712                 goto unknown;
9713
9714               case 'g':
9715                 if (name[4] == 'e' &&
9716                     name[5] == 't')
9717                 {                                 /* msgget     */
9718                   return -KEY_msgget;
9719                 }
9720
9721                 goto unknown;
9722
9723               case 'r':
9724                 if (name[4] == 'c' &&
9725                     name[5] == 'v')
9726                 {                                 /* msgrcv     */
9727                   return -KEY_msgrcv;
9728                 }
9729
9730                 goto unknown;
9731
9732               case 's':
9733                 if (name[4] == 'n' &&
9734                     name[5] == 'd')
9735                 {                                 /* msgsnd     */
9736                   return -KEY_msgsnd;
9737                 }
9738
9739                 goto unknown;
9740
9741               default:
9742                 goto unknown;
9743             }
9744           }
9745
9746           goto unknown;
9747
9748         case 'p':
9749           if (name[1] == 'r' &&
9750               name[2] == 'i' &&
9751               name[3] == 'n' &&
9752               name[4] == 't' &&
9753               name[5] == 'f')
9754           {                                       /* printf     */
9755             return KEY_printf;
9756           }
9757
9758           goto unknown;
9759
9760         case 'r':
9761           switch (name[1])
9762           {
9763             case 'e':
9764               switch (name[2])
9765               {
9766                 case 'n':
9767                   if (name[3] == 'a' &&
9768                       name[4] == 'm' &&
9769                       name[5] == 'e')
9770                   {                               /* rename     */
9771                     return -KEY_rename;
9772                   }
9773
9774                   goto unknown;
9775
9776                 case 't':
9777                   if (name[3] == 'u' &&
9778                       name[4] == 'r' &&
9779                       name[5] == 'n')
9780                   {                               /* return     */
9781                     return KEY_return;
9782                   }
9783
9784                   goto unknown;
9785
9786                 default:
9787                   goto unknown;
9788               }
9789
9790             case 'i':
9791               if (name[2] == 'n' &&
9792                   name[3] == 'd' &&
9793                   name[4] == 'e' &&
9794                   name[5] == 'x')
9795               {                                   /* rindex     */
9796                 return -KEY_rindex;
9797               }
9798
9799               goto unknown;
9800
9801             default:
9802               goto unknown;
9803           }
9804
9805         case 's':
9806           switch (name[1])
9807           {
9808             case 'c':
9809               if (name[2] == 'a' &&
9810                   name[3] == 'l' &&
9811                   name[4] == 'a' &&
9812                   name[5] == 'r')
9813               {                                   /* scalar     */
9814                 return KEY_scalar;
9815               }
9816
9817               goto unknown;
9818
9819             case 'e':
9820               switch (name[2])
9821               {
9822                 case 'l':
9823                   if (name[3] == 'e' &&
9824                       name[4] == 'c' &&
9825                       name[5] == 't')
9826                   {                               /* select     */
9827                     return -KEY_select;
9828                   }
9829
9830                   goto unknown;
9831
9832                 case 'm':
9833                   switch (name[3])
9834                   {
9835                     case 'c':
9836                       if (name[4] == 't' &&
9837                           name[5] == 'l')
9838                       {                           /* semctl     */
9839                         return -KEY_semctl;
9840                       }
9841
9842                       goto unknown;
9843
9844                     case 'g':
9845                       if (name[4] == 'e' &&
9846                           name[5] == 't')
9847                       {                           /* semget     */
9848                         return -KEY_semget;
9849                       }
9850
9851                       goto unknown;
9852
9853                     default:
9854                       goto unknown;
9855                   }
9856
9857                 default:
9858                   goto unknown;
9859               }
9860
9861             case 'h':
9862               if (name[2] == 'm')
9863               {
9864                 switch (name[3])
9865                 {
9866                   case 'c':
9867                     if (name[4] == 't' &&
9868                         name[5] == 'l')
9869                     {                             /* shmctl     */
9870                       return -KEY_shmctl;
9871                     }
9872
9873                     goto unknown;
9874
9875                   case 'g':
9876                     if (name[4] == 'e' &&
9877                         name[5] == 't')
9878                     {                             /* shmget     */
9879                       return -KEY_shmget;
9880                     }
9881
9882                     goto unknown;
9883
9884                   default:
9885                     goto unknown;
9886                 }
9887               }
9888
9889               goto unknown;
9890
9891             case 'o':
9892               if (name[2] == 'c' &&
9893                   name[3] == 'k' &&
9894                   name[4] == 'e' &&
9895                   name[5] == 't')
9896               {                                   /* socket     */
9897                 return -KEY_socket;
9898               }
9899
9900               goto unknown;
9901
9902             case 'p':
9903               if (name[2] == 'l' &&
9904                   name[3] == 'i' &&
9905                   name[4] == 'c' &&
9906                   name[5] == 'e')
9907               {                                   /* splice     */
9908                 return -KEY_splice;
9909               }
9910
9911               goto unknown;
9912
9913             case 'u':
9914               if (name[2] == 'b' &&
9915                   name[3] == 's' &&
9916                   name[4] == 't' &&
9917                   name[5] == 'r')
9918               {                                   /* substr     */
9919                 return -KEY_substr;
9920               }
9921
9922               goto unknown;
9923
9924             case 'y':
9925               if (name[2] == 's' &&
9926                   name[3] == 't' &&
9927                   name[4] == 'e' &&
9928                   name[5] == 'm')
9929               {                                   /* system     */
9930                 return -KEY_system;
9931               }
9932
9933               goto unknown;
9934
9935             default:
9936               goto unknown;
9937           }
9938
9939         case 'u':
9940           if (name[1] == 'n')
9941           {
9942             switch (name[2])
9943             {
9944               case 'l':
9945                 switch (name[3])
9946                 {
9947                   case 'e':
9948                     if (name[4] == 's' &&
9949                         name[5] == 's')
9950                     {                             /* unless     */
9951                       return KEY_unless;
9952                     }
9953
9954                     goto unknown;
9955
9956                   case 'i':
9957                     if (name[4] == 'n' &&
9958                         name[5] == 'k')
9959                     {                             /* unlink     */
9960                       return -KEY_unlink;
9961                     }
9962
9963                     goto unknown;
9964
9965                   default:
9966                     goto unknown;
9967                 }
9968
9969               case 'p':
9970                 if (name[3] == 'a' &&
9971                     name[4] == 'c' &&
9972                     name[5] == 'k')
9973                 {                                 /* unpack     */
9974                   return -KEY_unpack;
9975                 }
9976
9977                 goto unknown;
9978
9979               default:
9980                 goto unknown;
9981             }
9982           }
9983
9984           goto unknown;
9985
9986         case 'v':
9987           if (name[1] == 'a' &&
9988               name[2] == 'l' &&
9989               name[3] == 'u' &&
9990               name[4] == 'e' &&
9991               name[5] == 's')
9992           {                                       /* values     */
9993             return -KEY_values;
9994           }
9995
9996           goto unknown;
9997
9998         default:
9999           goto unknown;
10000       }
10001
10002     case 7: /* 29 tokens of length 7 */
10003       switch (name[0])
10004       {
10005         case 'D':
10006           if (name[1] == 'E' &&
10007               name[2] == 'S' &&
10008               name[3] == 'T' &&
10009               name[4] == 'R' &&
10010               name[5] == 'O' &&
10011               name[6] == 'Y')
10012           {                                       /* DESTROY    */
10013             return KEY_DESTROY;
10014           }
10015
10016           goto unknown;
10017
10018         case '_':
10019           if (name[1] == '_' &&
10020               name[2] == 'E' &&
10021               name[3] == 'N' &&
10022               name[4] == 'D' &&
10023               name[5] == '_' &&
10024               name[6] == '_')
10025           {                                       /* __END__    */
10026             return KEY___END__;
10027           }
10028
10029           goto unknown;
10030
10031         case 'b':
10032           if (name[1] == 'i' &&
10033               name[2] == 'n' &&
10034               name[3] == 'm' &&
10035               name[4] == 'o' &&
10036               name[5] == 'd' &&
10037               name[6] == 'e')
10038           {                                       /* binmode    */
10039             return -KEY_binmode;
10040           }
10041
10042           goto unknown;
10043
10044         case 'c':
10045           if (name[1] == 'o' &&
10046               name[2] == 'n' &&
10047               name[3] == 'n' &&
10048               name[4] == 'e' &&
10049               name[5] == 'c' &&
10050               name[6] == 't')
10051           {                                       /* connect    */
10052             return -KEY_connect;
10053           }
10054
10055           goto unknown;
10056
10057         case 'd':
10058           switch (name[1])
10059           {
10060             case 'b':
10061               if (name[2] == 'm' &&
10062                   name[3] == 'o' &&
10063                   name[4] == 'p' &&
10064                   name[5] == 'e' &&
10065                   name[6] == 'n')
10066               {                                   /* dbmopen    */
10067                 return -KEY_dbmopen;
10068               }
10069
10070               goto unknown;
10071
10072             case 'e':
10073               if (name[2] == 'f')
10074               {
10075                 switch (name[3])
10076                 {
10077                   case 'a':
10078                     if (name[4] == 'u' &&
10079                         name[5] == 'l' &&
10080                         name[6] == 't')
10081                     {                             /* default    */
10082                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
10083                     }
10084
10085                     goto unknown;
10086
10087                   case 'i':
10088                     if (name[4] == 'n' &&
10089                         name[5] == 'e' &&
10090                         name[6] == 'd')
10091                     {                             /* defined    */
10092                       return KEY_defined;
10093                     }
10094
10095                     goto unknown;
10096
10097                   default:
10098                     goto unknown;
10099                 }
10100               }
10101
10102               goto unknown;
10103
10104             default:
10105               goto unknown;
10106           }
10107
10108         case 'f':
10109           if (name[1] == 'o' &&
10110               name[2] == 'r' &&
10111               name[3] == 'e' &&
10112               name[4] == 'a' &&
10113               name[5] == 'c' &&
10114               name[6] == 'h')
10115           {                                       /* foreach    */
10116             return KEY_foreach;
10117           }
10118
10119           goto unknown;
10120
10121         case 'g':
10122           if (name[1] == 'e' &&
10123               name[2] == 't' &&
10124               name[3] == 'p')
10125           {
10126             switch (name[4])
10127             {
10128               case 'g':
10129                 if (name[5] == 'r' &&
10130                     name[6] == 'p')
10131                 {                                 /* getpgrp    */
10132                   return -KEY_getpgrp;
10133                 }
10134
10135                 goto unknown;
10136
10137               case 'p':
10138                 if (name[5] == 'i' &&
10139                     name[6] == 'd')
10140                 {                                 /* getppid    */
10141                   return -KEY_getppid;
10142                 }
10143
10144                 goto unknown;
10145
10146               default:
10147                 goto unknown;
10148             }
10149           }
10150
10151           goto unknown;
10152
10153         case 'l':
10154           if (name[1] == 'c' &&
10155               name[2] == 'f' &&
10156               name[3] == 'i' &&
10157               name[4] == 'r' &&
10158               name[5] == 's' &&
10159               name[6] == 't')
10160           {                                       /* lcfirst    */
10161             return -KEY_lcfirst;
10162           }
10163
10164           goto unknown;
10165
10166         case 'o':
10167           if (name[1] == 'p' &&
10168               name[2] == 'e' &&
10169               name[3] == 'n' &&
10170               name[4] == 'd' &&
10171               name[5] == 'i' &&
10172               name[6] == 'r')
10173           {                                       /* opendir    */
10174             return -KEY_opendir;
10175           }
10176
10177           goto unknown;
10178
10179         case 'p':
10180           if (name[1] == 'a' &&
10181               name[2] == 'c' &&
10182               name[3] == 'k' &&
10183               name[4] == 'a' &&
10184               name[5] == 'g' &&
10185               name[6] == 'e')
10186           {                                       /* package    */
10187             return KEY_package;
10188           }
10189
10190           goto unknown;
10191
10192         case 'r':
10193           if (name[1] == 'e')
10194           {
10195             switch (name[2])
10196             {
10197               case 'a':
10198                 if (name[3] == 'd' &&
10199                     name[4] == 'd' &&
10200                     name[5] == 'i' &&
10201                     name[6] == 'r')
10202                 {                                 /* readdir    */
10203                   return -KEY_readdir;
10204                 }
10205
10206                 goto unknown;
10207
10208               case 'q':
10209                 if (name[3] == 'u' &&
10210                     name[4] == 'i' &&
10211                     name[5] == 'r' &&
10212                     name[6] == 'e')
10213                 {                                 /* require    */
10214                   return KEY_require;
10215                 }
10216
10217                 goto unknown;
10218
10219               case 'v':
10220                 if (name[3] == 'e' &&
10221                     name[4] == 'r' &&
10222                     name[5] == 's' &&
10223                     name[6] == 'e')
10224                 {                                 /* reverse    */
10225                   return -KEY_reverse;
10226                 }
10227
10228                 goto unknown;
10229
10230               default:
10231                 goto unknown;
10232             }
10233           }
10234
10235           goto unknown;
10236
10237         case 's':
10238           switch (name[1])
10239           {
10240             case 'e':
10241               switch (name[2])
10242               {
10243                 case 'e':
10244                   if (name[3] == 'k' &&
10245                       name[4] == 'd' &&
10246                       name[5] == 'i' &&
10247                       name[6] == 'r')
10248                   {                               /* seekdir    */
10249                     return -KEY_seekdir;
10250                   }
10251
10252                   goto unknown;
10253
10254                 case 't':
10255                   if (name[3] == 'p' &&
10256                       name[4] == 'g' &&
10257                       name[5] == 'r' &&
10258                       name[6] == 'p')
10259                   {                               /* setpgrp    */
10260                     return -KEY_setpgrp;
10261                   }
10262
10263                   goto unknown;
10264
10265                 default:
10266                   goto unknown;
10267               }
10268
10269             case 'h':
10270               if (name[2] == 'm' &&
10271                   name[3] == 'r' &&
10272                   name[4] == 'e' &&
10273                   name[5] == 'a' &&
10274                   name[6] == 'd')
10275               {                                   /* shmread    */
10276                 return -KEY_shmread;
10277               }
10278
10279               goto unknown;
10280
10281             case 'p':
10282               if (name[2] == 'r' &&
10283                   name[3] == 'i' &&
10284                   name[4] == 'n' &&
10285                   name[5] == 't' &&
10286                   name[6] == 'f')
10287               {                                   /* sprintf    */
10288                 return -KEY_sprintf;
10289               }
10290
10291               goto unknown;
10292
10293             case 'y':
10294               switch (name[2])
10295               {
10296                 case 'm':
10297                   if (name[3] == 'l' &&
10298                       name[4] == 'i' &&
10299                       name[5] == 'n' &&
10300                       name[6] == 'k')
10301                   {                               /* symlink    */
10302                     return -KEY_symlink;
10303                   }
10304
10305                   goto unknown;
10306
10307                 case 's':
10308                   switch (name[3])
10309                   {
10310                     case 'c':
10311                       if (name[4] == 'a' &&
10312                           name[5] == 'l' &&
10313                           name[6] == 'l')
10314                       {                           /* syscall    */
10315                         return -KEY_syscall;
10316                       }
10317
10318                       goto unknown;
10319
10320                     case 'o':
10321                       if (name[4] == 'p' &&
10322                           name[5] == 'e' &&
10323                           name[6] == 'n')
10324                       {                           /* sysopen    */
10325                         return -KEY_sysopen;
10326                       }
10327
10328                       goto unknown;
10329
10330                     case 'r':
10331                       if (name[4] == 'e' &&
10332                           name[5] == 'a' &&
10333                           name[6] == 'd')
10334                       {                           /* sysread    */
10335                         return -KEY_sysread;
10336                       }
10337
10338                       goto unknown;
10339
10340                     case 's':
10341                       if (name[4] == 'e' &&
10342                           name[5] == 'e' &&
10343                           name[6] == 'k')
10344                       {                           /* sysseek    */
10345                         return -KEY_sysseek;
10346                       }
10347
10348                       goto unknown;
10349
10350                     default:
10351                       goto unknown;
10352                   }
10353
10354                 default:
10355                   goto unknown;
10356               }
10357
10358             default:
10359               goto unknown;
10360           }
10361
10362         case 't':
10363           if (name[1] == 'e' &&
10364               name[2] == 'l' &&
10365               name[3] == 'l' &&
10366               name[4] == 'd' &&
10367               name[5] == 'i' &&
10368               name[6] == 'r')
10369           {                                       /* telldir    */
10370             return -KEY_telldir;
10371           }
10372
10373           goto unknown;
10374
10375         case 'u':
10376           switch (name[1])
10377           {
10378             case 'c':
10379               if (name[2] == 'f' &&
10380                   name[3] == 'i' &&
10381                   name[4] == 'r' &&
10382                   name[5] == 's' &&
10383                   name[6] == 't')
10384               {                                   /* ucfirst    */
10385                 return -KEY_ucfirst;
10386               }
10387
10388               goto unknown;
10389
10390             case 'n':
10391               if (name[2] == 's' &&
10392                   name[3] == 'h' &&
10393                   name[4] == 'i' &&
10394                   name[5] == 'f' &&
10395                   name[6] == 't')
10396               {                                   /* unshift    */
10397                 return -KEY_unshift;
10398               }
10399
10400               goto unknown;
10401
10402             default:
10403               goto unknown;
10404           }
10405
10406         case 'w':
10407           if (name[1] == 'a' &&
10408               name[2] == 'i' &&
10409               name[3] == 't' &&
10410               name[4] == 'p' &&
10411               name[5] == 'i' &&
10412               name[6] == 'd')
10413           {                                       /* waitpid    */
10414             return -KEY_waitpid;
10415           }
10416
10417           goto unknown;
10418
10419         default:
10420           goto unknown;
10421       }
10422
10423     case 8: /* 26 tokens of length 8 */
10424       switch (name[0])
10425       {
10426         case 'A':
10427           if (name[1] == 'U' &&
10428               name[2] == 'T' &&
10429               name[3] == 'O' &&
10430               name[4] == 'L' &&
10431               name[5] == 'O' &&
10432               name[6] == 'A' &&
10433               name[7] == 'D')
10434           {                                       /* AUTOLOAD   */
10435             return KEY_AUTOLOAD;
10436           }
10437
10438           goto unknown;
10439
10440         case '_':
10441           if (name[1] == '_')
10442           {
10443             switch (name[2])
10444             {
10445               case 'D':
10446                 if (name[3] == 'A' &&
10447                     name[4] == 'T' &&
10448                     name[5] == 'A' &&
10449                     name[6] == '_' &&
10450                     name[7] == '_')
10451                 {                                 /* __DATA__   */
10452                   return KEY___DATA__;
10453                 }
10454
10455                 goto unknown;
10456
10457               case 'F':
10458                 if (name[3] == 'I' &&
10459                     name[4] == 'L' &&
10460                     name[5] == 'E' &&
10461                     name[6] == '_' &&
10462                     name[7] == '_')
10463                 {                                 /* __FILE__   */
10464                   return -KEY___FILE__;
10465                 }
10466
10467                 goto unknown;
10468
10469               case 'L':
10470                 if (name[3] == 'I' &&
10471                     name[4] == 'N' &&
10472                     name[5] == 'E' &&
10473                     name[6] == '_' &&
10474                     name[7] == '_')
10475                 {                                 /* __LINE__   */
10476                   return -KEY___LINE__;
10477                 }
10478
10479                 goto unknown;
10480
10481               default:
10482                 goto unknown;
10483             }
10484           }
10485
10486           goto unknown;
10487
10488         case 'c':
10489           switch (name[1])
10490           {
10491             case 'l':
10492               if (name[2] == 'o' &&
10493                   name[3] == 's' &&
10494                   name[4] == 'e' &&
10495                   name[5] == 'd' &&
10496                   name[6] == 'i' &&
10497                   name[7] == 'r')
10498               {                                   /* closedir   */
10499                 return -KEY_closedir;
10500               }
10501
10502               goto unknown;
10503
10504             case 'o':
10505               if (name[2] == 'n' &&
10506                   name[3] == 't' &&
10507                   name[4] == 'i' &&
10508                   name[5] == 'n' &&
10509                   name[6] == 'u' &&
10510                   name[7] == 'e')
10511               {                                   /* continue   */
10512                 return -KEY_continue;
10513               }
10514
10515               goto unknown;
10516
10517             default:
10518               goto unknown;
10519           }
10520
10521         case 'd':
10522           if (name[1] == 'b' &&
10523               name[2] == 'm' &&
10524               name[3] == 'c' &&
10525               name[4] == 'l' &&
10526               name[5] == 'o' &&
10527               name[6] == 's' &&
10528               name[7] == 'e')
10529           {                                       /* dbmclose   */
10530             return -KEY_dbmclose;
10531           }
10532
10533           goto unknown;
10534
10535         case 'e':
10536           if (name[1] == 'n' &&
10537               name[2] == 'd')
10538           {
10539             switch (name[3])
10540             {
10541               case 'g':
10542                 if (name[4] == 'r' &&
10543                     name[5] == 'e' &&
10544                     name[6] == 'n' &&
10545                     name[7] == 't')
10546                 {                                 /* endgrent   */
10547                   return -KEY_endgrent;
10548                 }
10549
10550                 goto unknown;
10551
10552               case 'p':
10553                 if (name[4] == 'w' &&
10554                     name[5] == 'e' &&
10555                     name[6] == 'n' &&
10556                     name[7] == 't')
10557                 {                                 /* endpwent   */
10558                   return -KEY_endpwent;
10559                 }
10560
10561                 goto unknown;
10562
10563               default:
10564                 goto unknown;
10565             }
10566           }
10567
10568           goto unknown;
10569
10570         case 'f':
10571           if (name[1] == 'o' &&
10572               name[2] == 'r' &&
10573               name[3] == 'm' &&
10574               name[4] == 'l' &&
10575               name[5] == 'i' &&
10576               name[6] == 'n' &&
10577               name[7] == 'e')
10578           {                                       /* formline   */
10579             return -KEY_formline;
10580           }
10581
10582           goto unknown;
10583
10584         case 'g':
10585           if (name[1] == 'e' &&
10586               name[2] == 't')
10587           {
10588             switch (name[3])
10589             {
10590               case 'g':
10591                 if (name[4] == 'r')
10592                 {
10593                   switch (name[5])
10594                   {
10595                     case 'e':
10596                       if (name[6] == 'n' &&
10597                           name[7] == 't')
10598                       {                           /* getgrent   */
10599                         return -KEY_getgrent;
10600                       }
10601
10602                       goto unknown;
10603
10604                     case 'g':
10605                       if (name[6] == 'i' &&
10606                           name[7] == 'd')
10607                       {                           /* getgrgid   */
10608                         return -KEY_getgrgid;
10609                       }
10610
10611                       goto unknown;
10612
10613                     case 'n':
10614                       if (name[6] == 'a' &&
10615                           name[7] == 'm')
10616                       {                           /* getgrnam   */
10617                         return -KEY_getgrnam;
10618                       }
10619
10620                       goto unknown;
10621
10622                     default:
10623                       goto unknown;
10624                   }
10625                 }
10626
10627                 goto unknown;
10628
10629               case 'l':
10630                 if (name[4] == 'o' &&
10631                     name[5] == 'g' &&
10632                     name[6] == 'i' &&
10633                     name[7] == 'n')
10634                 {                                 /* getlogin   */
10635                   return -KEY_getlogin;
10636                 }
10637
10638                 goto unknown;
10639
10640               case 'p':
10641                 if (name[4] == 'w')
10642                 {
10643                   switch (name[5])
10644                   {
10645                     case 'e':
10646                       if (name[6] == 'n' &&
10647                           name[7] == 't')
10648                       {                           /* getpwent   */
10649                         return -KEY_getpwent;
10650                       }
10651
10652                       goto unknown;
10653
10654                     case 'n':
10655                       if (name[6] == 'a' &&
10656                           name[7] == 'm')
10657                       {                           /* getpwnam   */
10658                         return -KEY_getpwnam;
10659                       }
10660
10661                       goto unknown;
10662
10663                     case 'u':
10664                       if (name[6] == 'i' &&
10665                           name[7] == 'd')
10666                       {                           /* getpwuid   */
10667                         return -KEY_getpwuid;
10668                       }
10669
10670                       goto unknown;
10671
10672                     default:
10673                       goto unknown;
10674                   }
10675                 }
10676
10677                 goto unknown;
10678
10679               default:
10680                 goto unknown;
10681             }
10682           }
10683
10684           goto unknown;
10685
10686         case 'r':
10687           if (name[1] == 'e' &&
10688               name[2] == 'a' &&
10689               name[3] == 'd')
10690           {
10691             switch (name[4])
10692             {
10693               case 'l':
10694                 if (name[5] == 'i' &&
10695                     name[6] == 'n')
10696                 {
10697                   switch (name[7])
10698                   {
10699                     case 'e':
10700                       {                           /* readline   */
10701                         return -KEY_readline;
10702                       }
10703
10704                     case 'k':
10705                       {                           /* readlink   */
10706                         return -KEY_readlink;
10707                       }
10708
10709                     default:
10710                       goto unknown;
10711                   }
10712                 }
10713
10714                 goto unknown;
10715
10716               case 'p':
10717                 if (name[5] == 'i' &&
10718                     name[6] == 'p' &&
10719                     name[7] == 'e')
10720                 {                                 /* readpipe   */
10721                   return -KEY_readpipe;
10722                 }
10723
10724                 goto unknown;
10725
10726               default:
10727                 goto unknown;
10728             }
10729           }
10730
10731           goto unknown;
10732
10733         case 's':
10734           switch (name[1])
10735           {
10736             case 'e':
10737               if (name[2] == 't')
10738               {
10739                 switch (name[3])
10740                 {
10741                   case 'g':
10742                     if (name[4] == 'r' &&
10743                         name[5] == 'e' &&
10744                         name[6] == 'n' &&
10745                         name[7] == 't')
10746                     {                             /* setgrent   */
10747                       return -KEY_setgrent;
10748                     }
10749
10750                     goto unknown;
10751
10752                   case 'p':
10753                     if (name[4] == 'w' &&
10754                         name[5] == 'e' &&
10755                         name[6] == 'n' &&
10756                         name[7] == 't')
10757                     {                             /* setpwent   */
10758                       return -KEY_setpwent;
10759                     }
10760
10761                     goto unknown;
10762
10763                   default:
10764                     goto unknown;
10765                 }
10766               }
10767
10768               goto unknown;
10769
10770             case 'h':
10771               switch (name[2])
10772               {
10773                 case 'm':
10774                   if (name[3] == 'w' &&
10775                       name[4] == 'r' &&
10776                       name[5] == 'i' &&
10777                       name[6] == 't' &&
10778                       name[7] == 'e')
10779                   {                               /* shmwrite   */
10780                     return -KEY_shmwrite;
10781                   }
10782
10783                   goto unknown;
10784
10785                 case 'u':
10786                   if (name[3] == 't' &&
10787                       name[4] == 'd' &&
10788                       name[5] == 'o' &&
10789                       name[6] == 'w' &&
10790                       name[7] == 'n')
10791                   {                               /* shutdown   */
10792                     return -KEY_shutdown;
10793                   }
10794
10795                   goto unknown;
10796
10797                 default:
10798                   goto unknown;
10799               }
10800
10801             case 'y':
10802               if (name[2] == 's' &&
10803                   name[3] == 'w' &&
10804                   name[4] == 'r' &&
10805                   name[5] == 'i' &&
10806                   name[6] == 't' &&
10807                   name[7] == 'e')
10808               {                                   /* syswrite   */
10809                 return -KEY_syswrite;
10810               }
10811
10812               goto unknown;
10813
10814             default:
10815               goto unknown;
10816           }
10817
10818         case 't':
10819           if (name[1] == 'r' &&
10820               name[2] == 'u' &&
10821               name[3] == 'n' &&
10822               name[4] == 'c' &&
10823               name[5] == 'a' &&
10824               name[6] == 't' &&
10825               name[7] == 'e')
10826           {                                       /* truncate   */
10827             return -KEY_truncate;
10828           }
10829
10830           goto unknown;
10831
10832         default:
10833           goto unknown;
10834       }
10835
10836     case 9: /* 9 tokens of length 9 */
10837       switch (name[0])
10838       {
10839         case 'U':
10840           if (name[1] == 'N' &&
10841               name[2] == 'I' &&
10842               name[3] == 'T' &&
10843               name[4] == 'C' &&
10844               name[5] == 'H' &&
10845               name[6] == 'E' &&
10846               name[7] == 'C' &&
10847               name[8] == 'K')
10848           {                                       /* UNITCHECK  */
10849             return KEY_UNITCHECK;
10850           }
10851
10852           goto unknown;
10853
10854         case 'e':
10855           if (name[1] == 'n' &&
10856               name[2] == 'd' &&
10857               name[3] == 'n' &&
10858               name[4] == 'e' &&
10859               name[5] == 't' &&
10860               name[6] == 'e' &&
10861               name[7] == 'n' &&
10862               name[8] == 't')
10863           {                                       /* endnetent  */
10864             return -KEY_endnetent;
10865           }
10866
10867           goto unknown;
10868
10869         case 'g':
10870           if (name[1] == 'e' &&
10871               name[2] == 't' &&
10872               name[3] == 'n' &&
10873               name[4] == 'e' &&
10874               name[5] == 't' &&
10875               name[6] == 'e' &&
10876               name[7] == 'n' &&
10877               name[8] == 't')
10878           {                                       /* getnetent  */
10879             return -KEY_getnetent;
10880           }
10881
10882           goto unknown;
10883
10884         case 'l':
10885           if (name[1] == 'o' &&
10886               name[2] == 'c' &&
10887               name[3] == 'a' &&
10888               name[4] == 'l' &&
10889               name[5] == 't' &&
10890               name[6] == 'i' &&
10891               name[7] == 'm' &&
10892               name[8] == 'e')
10893           {                                       /* localtime  */
10894             return -KEY_localtime;
10895           }
10896
10897           goto unknown;
10898
10899         case 'p':
10900           if (name[1] == 'r' &&
10901               name[2] == 'o' &&
10902               name[3] == 't' &&
10903               name[4] == 'o' &&
10904               name[5] == 't' &&
10905               name[6] == 'y' &&
10906               name[7] == 'p' &&
10907               name[8] == 'e')
10908           {                                       /* prototype  */
10909             return KEY_prototype;
10910           }
10911
10912           goto unknown;
10913
10914         case 'q':
10915           if (name[1] == 'u' &&
10916               name[2] == 'o' &&
10917               name[3] == 't' &&
10918               name[4] == 'e' &&
10919               name[5] == 'm' &&
10920               name[6] == 'e' &&
10921               name[7] == 't' &&
10922               name[8] == 'a')
10923           {                                       /* quotemeta  */
10924             return -KEY_quotemeta;
10925           }
10926
10927           goto unknown;
10928
10929         case 'r':
10930           if (name[1] == 'e' &&
10931               name[2] == 'w' &&
10932               name[3] == 'i' &&
10933               name[4] == 'n' &&
10934               name[5] == 'd' &&
10935               name[6] == 'd' &&
10936               name[7] == 'i' &&
10937               name[8] == 'r')
10938           {                                       /* rewinddir  */
10939             return -KEY_rewinddir;
10940           }
10941
10942           goto unknown;
10943
10944         case 's':
10945           if (name[1] == 'e' &&
10946               name[2] == 't' &&
10947               name[3] == 'n' &&
10948               name[4] == 'e' &&
10949               name[5] == 't' &&
10950               name[6] == 'e' &&
10951               name[7] == 'n' &&
10952               name[8] == 't')
10953           {                                       /* setnetent  */
10954             return -KEY_setnetent;
10955           }
10956
10957           goto unknown;
10958
10959         case 'w':
10960           if (name[1] == 'a' &&
10961               name[2] == 'n' &&
10962               name[3] == 't' &&
10963               name[4] == 'a' &&
10964               name[5] == 'r' &&
10965               name[6] == 'r' &&
10966               name[7] == 'a' &&
10967               name[8] == 'y')
10968           {                                       /* wantarray  */
10969             return -KEY_wantarray;
10970           }
10971
10972           goto unknown;
10973
10974         default:
10975           goto unknown;
10976       }
10977
10978     case 10: /* 9 tokens of length 10 */
10979       switch (name[0])
10980       {
10981         case 'e':
10982           if (name[1] == 'n' &&
10983               name[2] == 'd')
10984           {
10985             switch (name[3])
10986             {
10987               case 'h':
10988                 if (name[4] == 'o' &&
10989                     name[5] == 's' &&
10990                     name[6] == 't' &&
10991                     name[7] == 'e' &&
10992                     name[8] == 'n' &&
10993                     name[9] == 't')
10994                 {                                 /* endhostent */
10995                   return -KEY_endhostent;
10996                 }
10997
10998                 goto unknown;
10999
11000               case 's':
11001                 if (name[4] == 'e' &&
11002                     name[5] == 'r' &&
11003                     name[6] == 'v' &&
11004                     name[7] == 'e' &&
11005                     name[8] == 'n' &&
11006                     name[9] == 't')
11007                 {                                 /* endservent */
11008                   return -KEY_endservent;
11009                 }
11010
11011                 goto unknown;
11012
11013               default:
11014                 goto unknown;
11015             }
11016           }
11017
11018           goto unknown;
11019
11020         case 'g':
11021           if (name[1] == 'e' &&
11022               name[2] == 't')
11023           {
11024             switch (name[3])
11025             {
11026               case 'h':
11027                 if (name[4] == 'o' &&
11028                     name[5] == 's' &&
11029                     name[6] == 't' &&
11030                     name[7] == 'e' &&
11031                     name[8] == 'n' &&
11032                     name[9] == 't')
11033                 {                                 /* gethostent */
11034                   return -KEY_gethostent;
11035                 }
11036
11037                 goto unknown;
11038
11039               case 's':
11040                 switch (name[4])
11041                 {
11042                   case 'e':
11043                     if (name[5] == 'r' &&
11044                         name[6] == 'v' &&
11045                         name[7] == 'e' &&
11046                         name[8] == 'n' &&
11047                         name[9] == 't')
11048                     {                             /* getservent */
11049                       return -KEY_getservent;
11050                     }
11051
11052                     goto unknown;
11053
11054                   case 'o':
11055                     if (name[5] == 'c' &&
11056                         name[6] == 'k' &&
11057                         name[7] == 'o' &&
11058                         name[8] == 'p' &&
11059                         name[9] == 't')
11060                     {                             /* getsockopt */
11061                       return -KEY_getsockopt;
11062                     }
11063
11064                     goto unknown;
11065
11066                   default:
11067                     goto unknown;
11068                 }
11069
11070               default:
11071                 goto unknown;
11072             }
11073           }
11074
11075           goto unknown;
11076
11077         case 's':
11078           switch (name[1])
11079           {
11080             case 'e':
11081               if (name[2] == 't')
11082               {
11083                 switch (name[3])
11084                 {
11085                   case 'h':
11086                     if (name[4] == 'o' &&
11087                         name[5] == 's' &&
11088                         name[6] == 't' &&
11089                         name[7] == 'e' &&
11090                         name[8] == 'n' &&
11091                         name[9] == 't')
11092                     {                             /* sethostent */
11093                       return -KEY_sethostent;
11094                     }
11095
11096                     goto unknown;
11097
11098                   case 's':
11099                     switch (name[4])
11100                     {
11101                       case 'e':
11102                         if (name[5] == 'r' &&
11103                             name[6] == 'v' &&
11104                             name[7] == 'e' &&
11105                             name[8] == 'n' &&
11106                             name[9] == 't')
11107                         {                         /* setservent */
11108                           return -KEY_setservent;
11109                         }
11110
11111                         goto unknown;
11112
11113                       case 'o':
11114                         if (name[5] == 'c' &&
11115                             name[6] == 'k' &&
11116                             name[7] == 'o' &&
11117                             name[8] == 'p' &&
11118                             name[9] == 't')
11119                         {                         /* setsockopt */
11120                           return -KEY_setsockopt;
11121                         }
11122
11123                         goto unknown;
11124
11125                       default:
11126                         goto unknown;
11127                     }
11128
11129                   default:
11130                     goto unknown;
11131                 }
11132               }
11133
11134               goto unknown;
11135
11136             case 'o':
11137               if (name[2] == 'c' &&
11138                   name[3] == 'k' &&
11139                   name[4] == 'e' &&
11140                   name[5] == 't' &&
11141                   name[6] == 'p' &&
11142                   name[7] == 'a' &&
11143                   name[8] == 'i' &&
11144                   name[9] == 'r')
11145               {                                   /* socketpair */
11146                 return -KEY_socketpair;
11147               }
11148
11149               goto unknown;
11150
11151             default:
11152               goto unknown;
11153           }
11154
11155         default:
11156           goto unknown;
11157       }
11158
11159     case 11: /* 8 tokens of length 11 */
11160       switch (name[0])
11161       {
11162         case '_':
11163           if (name[1] == '_' &&
11164               name[2] == 'P' &&
11165               name[3] == 'A' &&
11166               name[4] == 'C' &&
11167               name[5] == 'K' &&
11168               name[6] == 'A' &&
11169               name[7] == 'G' &&
11170               name[8] == 'E' &&
11171               name[9] == '_' &&
11172               name[10] == '_')
11173           {                                       /* __PACKAGE__ */
11174             return -KEY___PACKAGE__;
11175           }
11176
11177           goto unknown;
11178
11179         case 'e':
11180           if (name[1] == 'n' &&
11181               name[2] == 'd' &&
11182               name[3] == 'p' &&
11183               name[4] == 'r' &&
11184               name[5] == 'o' &&
11185               name[6] == 't' &&
11186               name[7] == 'o' &&
11187               name[8] == 'e' &&
11188               name[9] == 'n' &&
11189               name[10] == 't')
11190           {                                       /* endprotoent */
11191             return -KEY_endprotoent;
11192           }
11193
11194           goto unknown;
11195
11196         case 'g':
11197           if (name[1] == 'e' &&
11198               name[2] == 't')
11199           {
11200             switch (name[3])
11201             {
11202               case 'p':
11203                 switch (name[4])
11204                 {
11205                   case 'e':
11206                     if (name[5] == 'e' &&
11207                         name[6] == 'r' &&
11208                         name[7] == 'n' &&
11209                         name[8] == 'a' &&
11210                         name[9] == 'm' &&
11211                         name[10] == 'e')
11212                     {                             /* getpeername */
11213                       return -KEY_getpeername;
11214                     }
11215
11216                     goto unknown;
11217
11218                   case 'r':
11219                     switch (name[5])
11220                     {
11221                       case 'i':
11222                         if (name[6] == 'o' &&
11223                             name[7] == 'r' &&
11224                             name[8] == 'i' &&
11225                             name[9] == 't' &&
11226                             name[10] == 'y')
11227                         {                         /* getpriority */
11228                           return -KEY_getpriority;
11229                         }
11230
11231                         goto unknown;
11232
11233                       case 'o':
11234                         if (name[6] == 't' &&
11235                             name[7] == 'o' &&
11236                             name[8] == 'e' &&
11237                             name[9] == 'n' &&
11238                             name[10] == 't')
11239                         {                         /* getprotoent */
11240                           return -KEY_getprotoent;
11241                         }
11242
11243                         goto unknown;
11244
11245                       default:
11246                         goto unknown;
11247                     }
11248
11249                   default:
11250                     goto unknown;
11251                 }
11252
11253               case 's':
11254                 if (name[4] == 'o' &&
11255                     name[5] == 'c' &&
11256                     name[6] == 'k' &&
11257                     name[7] == 'n' &&
11258                     name[8] == 'a' &&
11259                     name[9] == 'm' &&
11260                     name[10] == 'e')
11261                 {                                 /* getsockname */
11262                   return -KEY_getsockname;
11263                 }
11264
11265                 goto unknown;
11266
11267               default:
11268                 goto unknown;
11269             }
11270           }
11271
11272           goto unknown;
11273
11274         case 's':
11275           if (name[1] == 'e' &&
11276               name[2] == 't' &&
11277               name[3] == 'p' &&
11278               name[4] == 'r')
11279           {
11280             switch (name[5])
11281             {
11282               case 'i':
11283                 if (name[6] == 'o' &&
11284                     name[7] == 'r' &&
11285                     name[8] == 'i' &&
11286                     name[9] == 't' &&
11287                     name[10] == 'y')
11288                 {                                 /* setpriority */
11289                   return -KEY_setpriority;
11290                 }
11291
11292                 goto unknown;
11293
11294               case 'o':
11295                 if (name[6] == 't' &&
11296                     name[7] == 'o' &&
11297                     name[8] == 'e' &&
11298                     name[9] == 'n' &&
11299                     name[10] == 't')
11300                 {                                 /* setprotoent */
11301                   return -KEY_setprotoent;
11302                 }
11303
11304                 goto unknown;
11305
11306               default:
11307                 goto unknown;
11308             }
11309           }
11310
11311           goto unknown;
11312
11313         default:
11314           goto unknown;
11315       }
11316
11317     case 12: /* 2 tokens of length 12 */
11318       if (name[0] == 'g' &&
11319           name[1] == 'e' &&
11320           name[2] == 't' &&
11321           name[3] == 'n' &&
11322           name[4] == 'e' &&
11323           name[5] == 't' &&
11324           name[6] == 'b' &&
11325           name[7] == 'y')
11326       {
11327         switch (name[8])
11328         {
11329           case 'a':
11330             if (name[9] == 'd' &&
11331                 name[10] == 'd' &&
11332                 name[11] == 'r')
11333             {                                     /* getnetbyaddr */
11334               return -KEY_getnetbyaddr;
11335             }
11336
11337             goto unknown;
11338
11339           case 'n':
11340             if (name[9] == 'a' &&
11341                 name[10] == 'm' &&
11342                 name[11] == 'e')
11343             {                                     /* getnetbyname */
11344               return -KEY_getnetbyname;
11345             }
11346
11347             goto unknown;
11348
11349           default:
11350             goto unknown;
11351         }
11352       }
11353
11354       goto unknown;
11355
11356     case 13: /* 4 tokens of length 13 */
11357       if (name[0] == 'g' &&
11358           name[1] == 'e' &&
11359           name[2] == 't')
11360       {
11361         switch (name[3])
11362         {
11363           case 'h':
11364             if (name[4] == 'o' &&
11365                 name[5] == 's' &&
11366                 name[6] == 't' &&
11367                 name[7] == 'b' &&
11368                 name[8] == 'y')
11369             {
11370               switch (name[9])
11371               {
11372                 case 'a':
11373                   if (name[10] == 'd' &&
11374                       name[11] == 'd' &&
11375                       name[12] == 'r')
11376                   {                               /* gethostbyaddr */
11377                     return -KEY_gethostbyaddr;
11378                   }
11379
11380                   goto unknown;
11381
11382                 case 'n':
11383                   if (name[10] == 'a' &&
11384                       name[11] == 'm' &&
11385                       name[12] == 'e')
11386                   {                               /* gethostbyname */
11387                     return -KEY_gethostbyname;
11388                   }
11389
11390                   goto unknown;
11391
11392                 default:
11393                   goto unknown;
11394               }
11395             }
11396
11397             goto unknown;
11398
11399           case 's':
11400             if (name[4] == 'e' &&
11401                 name[5] == 'r' &&
11402                 name[6] == 'v' &&
11403                 name[7] == 'b' &&
11404                 name[8] == 'y')
11405             {
11406               switch (name[9])
11407               {
11408                 case 'n':
11409                   if (name[10] == 'a' &&
11410                       name[11] == 'm' &&
11411                       name[12] == 'e')
11412                   {                               /* getservbyname */
11413                     return -KEY_getservbyname;
11414                   }
11415
11416                   goto unknown;
11417
11418                 case 'p':
11419                   if (name[10] == 'o' &&
11420                       name[11] == 'r' &&
11421                       name[12] == 't')
11422                   {                               /* getservbyport */
11423                     return -KEY_getservbyport;
11424                   }
11425
11426                   goto unknown;
11427
11428                 default:
11429                   goto unknown;
11430               }
11431             }
11432
11433             goto unknown;
11434
11435           default:
11436             goto unknown;
11437         }
11438       }
11439
11440       goto unknown;
11441
11442     case 14: /* 1 tokens of length 14 */
11443       if (name[0] == 'g' &&
11444           name[1] == 'e' &&
11445           name[2] == 't' &&
11446           name[3] == 'p' &&
11447           name[4] == 'r' &&
11448           name[5] == 'o' &&
11449           name[6] == 't' &&
11450           name[7] == 'o' &&
11451           name[8] == 'b' &&
11452           name[9] == 'y' &&
11453           name[10] == 'n' &&
11454           name[11] == 'a' &&
11455           name[12] == 'm' &&
11456           name[13] == 'e')
11457       {                                           /* getprotobyname */
11458         return -KEY_getprotobyname;
11459       }
11460
11461       goto unknown;
11462
11463     case 16: /* 1 tokens of length 16 */
11464       if (name[0] == 'g' &&
11465           name[1] == 'e' &&
11466           name[2] == 't' &&
11467           name[3] == 'p' &&
11468           name[4] == 'r' &&
11469           name[5] == 'o' &&
11470           name[6] == 't' &&
11471           name[7] == 'o' &&
11472           name[8] == 'b' &&
11473           name[9] == 'y' &&
11474           name[10] == 'n' &&
11475           name[11] == 'u' &&
11476           name[12] == 'm' &&
11477           name[13] == 'b' &&
11478           name[14] == 'e' &&
11479           name[15] == 'r')
11480       {                                           /* getprotobynumber */
11481         return -KEY_getprotobynumber;
11482       }
11483
11484       goto unknown;
11485
11486     default:
11487       goto unknown;
11488   }
11489
11490 unknown:
11491   return 0;
11492 }
11493
11494 STATIC void
11495 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11496 {
11497     dVAR;
11498
11499     PERL_ARGS_ASSERT_CHECKCOMMA;
11500
11501     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11502         if (ckWARN(WARN_SYNTAX)) {
11503             int level = 1;
11504             const char *w;
11505             for (w = s+2; *w && level; w++) {
11506                 if (*w == '(')
11507                     ++level;
11508                 else if (*w == ')')
11509                     --level;
11510             }
11511             while (isSPACE(*w))
11512                 ++w;
11513             /* the list of chars below is for end of statements or
11514              * block / parens, boolean operators (&&, ||, //) and branch
11515              * constructs (or, and, if, until, unless, while, err, for).
11516              * Not a very solid hack... */
11517             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11518                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11519                             "%s (...) interpreted as function",name);
11520         }
11521     }
11522     while (s < PL_bufend && isSPACE(*s))
11523         s++;
11524     if (*s == '(')
11525         s++;
11526     while (s < PL_bufend && isSPACE(*s))
11527         s++;
11528     if (isIDFIRST_lazy_if(s,UTF)) {
11529         const char * const w = s++;
11530         while (isALNUM_lazy_if(s,UTF))
11531             s++;
11532         while (s < PL_bufend && isSPACE(*s))
11533             s++;
11534         if (*s == ',') {
11535             GV* gv;
11536             if (keyword(w, s - w, 0))
11537                 return;
11538
11539             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11540             if (gv && GvCVu(gv))
11541                 return;
11542             Perl_croak(aTHX_ "No comma allowed after %s", what);
11543         }
11544     }
11545 }
11546
11547 /* Either returns sv, or mortalizes sv and returns a new SV*.
11548    Best used as sv=new_constant(..., sv, ...).
11549    If s, pv are NULL, calls subroutine with one argument,
11550    and type is used with error messages only. */
11551
11552 STATIC SV *
11553 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11554                SV *sv, SV *pv, const char *type, STRLEN typelen)
11555 {
11556     dVAR; dSP;
11557     HV * const table = GvHV(PL_hintgv);          /* ^H */
11558     SV *res;
11559     SV **cvp;
11560     SV *cv, *typesv;
11561     const char *why1 = "", *why2 = "", *why3 = "";
11562
11563     PERL_ARGS_ASSERT_NEW_CONSTANT;
11564
11565     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11566         SV *msg;
11567
11568         why2 = (const char *)
11569             (strEQ(key,"charnames")
11570              ? "(possibly a missing \"use charnames ...\")"
11571              : "");
11572         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11573                             (type ? type: "undef"), why2);
11574
11575         /* This is convoluted and evil ("goto considered harmful")
11576          * but I do not understand the intricacies of all the different
11577          * failure modes of %^H in here.  The goal here is to make
11578          * the most probable error message user-friendly. --jhi */
11579
11580         goto msgdone;
11581
11582     report:
11583         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11584                             (type ? type: "undef"), why1, why2, why3);
11585     msgdone:
11586         yyerror(SvPVX_const(msg));
11587         SvREFCNT_dec(msg);
11588         return sv;
11589     }
11590
11591     /* charnames doesn't work well if there have been errors found */
11592     if (PL_error_count > 0 && strEQ(key,"charnames"))
11593         return &PL_sv_undef;
11594
11595     cvp = hv_fetch(table, key, keylen, FALSE);
11596     if (!cvp || !SvOK(*cvp)) {
11597         why1 = "$^H{";
11598         why2 = key;
11599         why3 = "} is not defined";
11600         goto report;
11601     }
11602     sv_2mortal(sv);                     /* Parent created it permanently */
11603     cv = *cvp;
11604     if (!pv && s)
11605         pv = newSVpvn_flags(s, len, SVs_TEMP);
11606     if (type && pv)
11607         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11608     else
11609         typesv = &PL_sv_undef;
11610
11611     PUSHSTACKi(PERLSI_OVERLOAD);
11612     ENTER ;
11613     SAVETMPS;
11614
11615     PUSHMARK(SP) ;
11616     EXTEND(sp, 3);
11617     if (pv)
11618         PUSHs(pv);
11619     PUSHs(sv);
11620     if (pv)
11621         PUSHs(typesv);
11622     PUTBACK;
11623     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11624
11625     SPAGAIN ;
11626
11627     /* Check the eval first */
11628     if (!PL_in_eval && SvTRUE(ERRSV)) {
11629         sv_catpvs(ERRSV, "Propagated");
11630         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11631         (void)POPs;
11632         res = SvREFCNT_inc_simple(sv);
11633     }
11634     else {
11635         res = POPs;
11636         SvREFCNT_inc_simple_void(res);
11637     }
11638
11639     PUTBACK ;
11640     FREETMPS ;
11641     LEAVE ;
11642     POPSTACK;
11643
11644     if (!SvOK(res)) {
11645         why1 = "Call to &{$^H{";
11646         why2 = key;
11647         why3 = "}} did not return a defined value";
11648         sv = res;
11649         goto report;
11650     }
11651
11652     return res;
11653 }
11654
11655 /* Returns a NUL terminated string, with the length of the string written to
11656    *slp
11657    */
11658 STATIC char *
11659 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11660 {
11661     dVAR;
11662     register char *d = dest;
11663     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11664
11665     PERL_ARGS_ASSERT_SCAN_WORD;
11666
11667     for (;;) {
11668         if (d >= e)
11669             Perl_croak(aTHX_ ident_too_long);
11670         if (isALNUM(*s))        /* UTF handled below */
11671             *d++ = *s++;
11672         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11673             *d++ = ':';
11674             *d++ = ':';
11675             s++;
11676         }
11677         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11678             *d++ = *s++;
11679             *d++ = *s++;
11680         }
11681         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11682             char *t = s + UTF8SKIP(s);
11683             size_t len;
11684             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11685                 t += UTF8SKIP(t);
11686             len = t - s;
11687             if (d + len > e)
11688                 Perl_croak(aTHX_ ident_too_long);
11689             Copy(s, d, len, char);
11690             d += len;
11691             s = t;
11692         }
11693         else {
11694             *d = '\0';
11695             *slp = d - dest;
11696             return s;
11697         }
11698     }
11699 }
11700
11701 STATIC char *
11702 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11703 {
11704     dVAR;
11705     char *bracket = NULL;
11706     char funny = *s++;
11707     register char *d = dest;
11708     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
11709
11710     PERL_ARGS_ASSERT_SCAN_IDENT;
11711
11712     if (isSPACE(*s))
11713         s = PEEKSPACE(s);
11714     if (isDIGIT(*s)) {
11715         while (isDIGIT(*s)) {
11716             if (d >= e)
11717                 Perl_croak(aTHX_ ident_too_long);
11718             *d++ = *s++;
11719         }
11720     }
11721     else {
11722         for (;;) {
11723             if (d >= e)
11724                 Perl_croak(aTHX_ ident_too_long);
11725             if (isALNUM(*s))    /* UTF handled below */
11726                 *d++ = *s++;
11727             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11728                 *d++ = ':';
11729                 *d++ = ':';
11730                 s++;
11731             }
11732             else if (*s == ':' && s[1] == ':') {
11733                 *d++ = *s++;
11734                 *d++ = *s++;
11735             }
11736             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11737                 char *t = s + UTF8SKIP(s);
11738                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11739                     t += UTF8SKIP(t);
11740                 if (d + (t - s) > e)
11741                     Perl_croak(aTHX_ ident_too_long);
11742                 Copy(s, d, t - s, char);
11743                 d += t - s;
11744                 s = t;
11745             }
11746             else
11747                 break;
11748         }
11749     }
11750     *d = '\0';
11751     d = dest;
11752     if (*d) {
11753         if (PL_lex_state != LEX_NORMAL)
11754             PL_lex_state = LEX_INTERPENDMAYBE;
11755         return s;
11756     }
11757     if (*s == '$' && s[1] &&
11758         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11759     {
11760         return s;
11761     }
11762     if (*s == '{') {
11763         bracket = s;
11764         s++;
11765     }
11766     else if (ck_uni)
11767         check_uni();
11768     if (s < send)
11769         *d = *s++;
11770     d[1] = '\0';
11771     if (*d == '^' && *s && isCONTROLVAR(*s)) {
11772         *d = toCTRL(*s);
11773         s++;
11774     }
11775     if (bracket) {
11776         if (isSPACE(s[-1])) {
11777             while (s < send) {
11778                 const char ch = *s++;
11779                 if (!SPACE_OR_TAB(ch)) {
11780                     *d = ch;
11781                     break;
11782                 }
11783             }
11784         }
11785         if (isIDFIRST_lazy_if(d,UTF)) {
11786             d++;
11787             if (UTF) {
11788                 char *end = s;
11789                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11790                     end += UTF8SKIP(end);
11791                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11792                         end += UTF8SKIP(end);
11793                 }
11794                 Copy(s, d, end - s, char);
11795                 d += end - s;
11796                 s = end;
11797             }
11798             else {
11799                 while ((isALNUM(*s) || *s == ':') && d < e)
11800                     *d++ = *s++;
11801                 if (d >= e)
11802                     Perl_croak(aTHX_ ident_too_long);
11803             }
11804             *d = '\0';
11805             while (s < send && SPACE_OR_TAB(*s))
11806                 s++;
11807             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11808                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11809                     const char * const brack =
11810                         (const char *)
11811                         ((*s == '[') ? "[...]" : "{...}");
11812                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11813                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11814                         funny, dest, brack, funny, dest, brack);
11815                 }
11816                 bracket++;
11817                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11818                 return s;
11819             }
11820         }
11821         /* Handle extended ${^Foo} variables
11822          * 1999-02-27 mjd-perl-patch@plover.com */
11823         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11824                  && isALNUM(*s))
11825         {
11826             d++;
11827             while (isALNUM(*s) && d < e) {
11828                 *d++ = *s++;
11829             }
11830             if (d >= e)
11831                 Perl_croak(aTHX_ ident_too_long);
11832             *d = '\0';
11833         }
11834         if (*s == '}') {
11835             s++;
11836             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11837                 PL_lex_state = LEX_INTERPEND;
11838                 PL_expect = XREF;
11839             }
11840             if (PL_lex_state == LEX_NORMAL) {
11841                 if (ckWARN(WARN_AMBIGUOUS) &&
11842                     (keyword(dest, d - dest, 0)
11843                      || get_cvn_flags(dest, d - dest, 0)))
11844                 {
11845                     if (funny == '#')
11846                         funny = '@';
11847                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11848                         "Ambiguous use of %c{%s} resolved to %c%s",
11849                         funny, dest, funny, dest);
11850                 }
11851             }
11852         }
11853         else {
11854             s = bracket;                /* let the parser handle it */
11855             *dest = '\0';
11856         }
11857     }
11858     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11859         PL_lex_state = LEX_INTERPEND;
11860     return s;
11861 }
11862
11863 static U32
11864 S_pmflag(U32 pmfl, const char ch) {
11865     switch (ch) {
11866         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11867     case GLOBAL_PAT_MOD:      pmfl |= PMf_GLOBAL; break;
11868     case CONTINUE_PAT_MOD:    pmfl |= PMf_CONTINUE; break;
11869     case ONCE_PAT_MOD:        pmfl |= PMf_KEEP; break;
11870     case KEEPCOPY_PAT_MOD:    pmfl |= PMf_KEEPCOPY; break;
11871     case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
11872     }
11873     return pmfl;
11874 }
11875
11876 STATIC char *
11877 S_scan_pat(pTHX_ char *start, I32 type)
11878 {
11879     dVAR;
11880     PMOP *pm;
11881     char *s = scan_str(start,!!PL_madskills,FALSE);
11882     const char * const valid_flags =
11883         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11884 #ifdef PERL_MAD
11885     char *modstart;
11886 #endif
11887
11888     PERL_ARGS_ASSERT_SCAN_PAT;
11889
11890     if (!s) {
11891         const char * const delimiter = skipspace(start);
11892         Perl_croak(aTHX_
11893                    (const char *)
11894                    (*delimiter == '?'
11895                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11896                     : "Search pattern not terminated" ));
11897     }
11898
11899     pm = (PMOP*)newPMOP(type, 0);
11900     if (PL_multi_open == '?') {
11901         /* This is the only point in the code that sets PMf_ONCE:  */
11902         pm->op_pmflags |= PMf_ONCE;
11903
11904         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11905            allows us to restrict the list needed by reset to just the ??
11906            matches.  */
11907         assert(type != OP_TRANS);
11908         if (PL_curstash) {
11909             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11910             U32 elements;
11911             if (!mg) {
11912                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11913                                  0);
11914             }
11915             elements = mg->mg_len / sizeof(PMOP**);
11916             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11917             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11918             mg->mg_len = elements * sizeof(PMOP**);
11919             PmopSTASH_set(pm,PL_curstash);
11920         }
11921     }
11922 #ifdef PERL_MAD
11923     modstart = s;
11924 #endif
11925     while (*s && strchr(valid_flags, *s))
11926         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11927
11928     if (isALNUM(*s)) {
11929         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11930             "Having no space between pattern and following word is deprecated");
11931
11932     }
11933 #ifdef PERL_MAD
11934     if (PL_madskills && modstart != s) {
11935         SV* tmptoken = newSVpvn(modstart, s - modstart);
11936         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11937     }
11938 #endif
11939     /* issue a warning if /c is specified,but /g is not */
11940     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11941     {
11942         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11943                        "Use of /c modifier is meaningless without /g" );
11944     }
11945
11946     PL_lex_op = (OP*)pm;
11947     pl_yylval.ival = OP_MATCH;
11948     return s;
11949 }
11950
11951 STATIC char *
11952 S_scan_subst(pTHX_ char *start)
11953 {
11954     dVAR;
11955     register char *s;
11956     register PMOP *pm;
11957     I32 first_start;
11958     I32 es = 0;
11959 #ifdef PERL_MAD
11960     char *modstart;
11961 #endif
11962
11963     PERL_ARGS_ASSERT_SCAN_SUBST;
11964
11965     pl_yylval.ival = OP_NULL;
11966
11967     s = scan_str(start,!!PL_madskills,FALSE);
11968
11969     if (!s)
11970         Perl_croak(aTHX_ "Substitution pattern not terminated");
11971
11972     if (s[-1] == PL_multi_open)
11973         s--;
11974 #ifdef PERL_MAD
11975     if (PL_madskills) {
11976         CURMAD('q', PL_thisopen);
11977         CURMAD('_', PL_thiswhite);
11978         CURMAD('E', PL_thisstuff);
11979         CURMAD('Q', PL_thisclose);
11980         PL_realtokenstart = s - SvPVX(PL_linestr);
11981     }
11982 #endif
11983
11984     first_start = PL_multi_start;
11985     s = scan_str(s,!!PL_madskills,FALSE);
11986     if (!s) {
11987         if (PL_lex_stuff) {
11988             SvREFCNT_dec(PL_lex_stuff);
11989             PL_lex_stuff = NULL;
11990         }
11991         Perl_croak(aTHX_ "Substitution replacement not terminated");
11992     }
11993     PL_multi_start = first_start;       /* so whole substitution is taken together */
11994
11995     pm = (PMOP*)newPMOP(OP_SUBST, 0);
11996
11997 #ifdef PERL_MAD
11998     if (PL_madskills) {
11999         CURMAD('z', PL_thisopen);
12000         CURMAD('R', PL_thisstuff);
12001         CURMAD('Z', PL_thisclose);
12002     }
12003     modstart = s;
12004 #endif
12005
12006     while (*s) {
12007         if (*s == EXEC_PAT_MOD) {
12008             s++;
12009             es++;
12010         }
12011         else if (strchr(S_PAT_MODS, *s))
12012             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
12013         else {
12014             if (isALNUM(*s)) {
12015                 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12016                     "Having no space between pattern and following word is deprecated");
12017
12018             }
12019             break;
12020         }
12021     }
12022
12023 #ifdef PERL_MAD
12024     if (PL_madskills) {
12025         if (modstart != s)
12026             curmad('m', newSVpvn(modstart, s - modstart));
12027         append_madprops(PL_thismad, (OP*)pm, 0);
12028         PL_thismad = 0;
12029     }
12030 #endif
12031     if ((pm->op_pmflags & PMf_CONTINUE)) {
12032         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
12033     }
12034
12035     if (es) {
12036         SV * const repl = newSVpvs("");
12037
12038         PL_sublex_info.super_bufptr = s;
12039         PL_sublex_info.super_bufend = PL_bufend;
12040         PL_multi_end = 0;
12041         pm->op_pmflags |= PMf_EVAL;
12042         while (es-- > 0) {
12043             if (es)
12044                 sv_catpvs(repl, "eval ");
12045             else
12046                 sv_catpvs(repl, "do ");
12047         }
12048         sv_catpvs(repl, "{");
12049         sv_catsv(repl, PL_lex_repl);
12050         if (strchr(SvPVX(PL_lex_repl), '#'))
12051             sv_catpvs(repl, "\n");
12052         sv_catpvs(repl, "}");
12053         SvEVALED_on(repl);
12054         SvREFCNT_dec(PL_lex_repl);
12055         PL_lex_repl = repl;
12056     }
12057
12058     PL_lex_op = (OP*)pm;
12059     pl_yylval.ival = OP_SUBST;
12060     return s;
12061 }
12062
12063 STATIC char *
12064 S_scan_trans(pTHX_ char *start)
12065 {
12066     dVAR;
12067     register char* s;
12068     OP *o;
12069     short *tbl;
12070     U8 squash;
12071     U8 del;
12072     U8 complement;
12073 #ifdef PERL_MAD
12074     char *modstart;
12075 #endif
12076
12077     PERL_ARGS_ASSERT_SCAN_TRANS;
12078
12079     pl_yylval.ival = OP_NULL;
12080
12081     s = scan_str(start,!!PL_madskills,FALSE);
12082     if (!s)
12083         Perl_croak(aTHX_ "Transliteration pattern not terminated");
12084
12085     if (s[-1] == PL_multi_open)
12086         s--;
12087 #ifdef PERL_MAD
12088     if (PL_madskills) {
12089         CURMAD('q', PL_thisopen);
12090         CURMAD('_', PL_thiswhite);
12091         CURMAD('E', PL_thisstuff);
12092         CURMAD('Q', PL_thisclose);
12093         PL_realtokenstart = s - SvPVX(PL_linestr);
12094     }
12095 #endif
12096
12097     s = scan_str(s,!!PL_madskills,FALSE);
12098     if (!s) {
12099         if (PL_lex_stuff) {
12100             SvREFCNT_dec(PL_lex_stuff);
12101             PL_lex_stuff = NULL;
12102         }
12103         Perl_croak(aTHX_ "Transliteration replacement not terminated");
12104     }
12105     if (PL_madskills) {
12106         CURMAD('z', PL_thisopen);
12107         CURMAD('R', PL_thisstuff);
12108         CURMAD('Z', PL_thisclose);
12109     }
12110
12111     complement = del = squash = 0;
12112 #ifdef PERL_MAD
12113     modstart = s;
12114 #endif
12115     while (1) {
12116         switch (*s) {
12117         case 'c':
12118             complement = OPpTRANS_COMPLEMENT;
12119             break;
12120         case 'd':
12121             del = OPpTRANS_DELETE;
12122             break;
12123         case 's':
12124             squash = OPpTRANS_SQUASH;
12125             break;
12126         default:
12127             goto no_more;
12128         }
12129         s++;
12130     }
12131   no_more:
12132
12133     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
12134     o = newPVOP(OP_TRANS, 0, (char*)tbl);
12135     o->op_private &= ~OPpTRANS_ALL;
12136     o->op_private |= del|squash|complement|
12137       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12138       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
12139
12140     PL_lex_op = o;
12141     pl_yylval.ival = OP_TRANS;
12142
12143 #ifdef PERL_MAD
12144     if (PL_madskills) {
12145         if (modstart != s)
12146             curmad('m', newSVpvn(modstart, s - modstart));
12147         append_madprops(PL_thismad, o, 0);
12148         PL_thismad = 0;
12149     }
12150 #endif
12151
12152     return s;
12153 }
12154
12155 STATIC char *
12156 S_scan_heredoc(pTHX_ register char *s)
12157 {
12158     dVAR;
12159     SV *herewas;
12160     I32 op_type = OP_SCALAR;
12161     I32 len;
12162     SV *tmpstr;
12163     char term;
12164     const char *found_newline;
12165     register char *d;
12166     register char *e;
12167     char *peek;
12168     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
12169 #ifdef PERL_MAD
12170     I32 stuffstart = s - SvPVX(PL_linestr);
12171     char *tstart;
12172
12173     PL_realtokenstart = -1;
12174 #endif
12175
12176     PERL_ARGS_ASSERT_SCAN_HEREDOC;
12177
12178     s += 2;
12179     d = PL_tokenbuf;
12180     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
12181     if (!outer)
12182         *d++ = '\n';
12183     peek = s;
12184     while (SPACE_OR_TAB(*peek))
12185         peek++;
12186     if (*peek == '`' || *peek == '\'' || *peek =='"') {
12187         s = peek;
12188         term = *s++;
12189         s = delimcpy(d, e, s, PL_bufend, term, &len);
12190         d += len;
12191         if (s < PL_bufend)
12192             s++;
12193     }
12194     else {
12195         if (*s == '\\')
12196             s++, term = '\'';
12197         else
12198             term = '"';
12199         if (!isALNUM_lazy_if(s,UTF))
12200             deprecate("bare << to mean <<\"\"");
12201         for (; isALNUM_lazy_if(s,UTF); s++) {
12202             if (d < e)
12203                 *d++ = *s;
12204         }
12205     }
12206     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
12207         Perl_croak(aTHX_ "Delimiter for here document is too long");
12208     *d++ = '\n';
12209     *d = '\0';
12210     len = d - PL_tokenbuf;
12211
12212 #ifdef PERL_MAD
12213     if (PL_madskills) {
12214         tstart = PL_tokenbuf + !outer;
12215         PL_thisclose = newSVpvn(tstart, len - !outer);
12216         tstart = SvPVX(PL_linestr) + stuffstart;
12217         PL_thisopen = newSVpvn(tstart, s - tstart);
12218         stuffstart = s - SvPVX(PL_linestr);
12219     }
12220 #endif
12221 #ifndef PERL_STRICT_CR
12222     d = strchr(s, '\r');
12223     if (d) {
12224         char * const olds = s;
12225         s = d;
12226         while (s < PL_bufend) {
12227             if (*s == '\r') {
12228                 *d++ = '\n';
12229                 if (*++s == '\n')
12230                     s++;
12231             }
12232             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
12233                 *d++ = *s++;
12234                 s++;
12235             }
12236             else
12237                 *d++ = *s++;
12238         }
12239         *d = '\0';
12240         PL_bufend = d;
12241         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12242         s = olds;
12243     }
12244 #endif
12245 #ifdef PERL_MAD
12246     found_newline = 0;
12247 #endif
12248     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
12249         herewas = newSVpvn(s,PL_bufend-s);
12250     }
12251     else {
12252 #ifdef PERL_MAD
12253         herewas = newSVpvn(s-1,found_newline-s+1);
12254 #else
12255         s--;
12256         herewas = newSVpvn(s,found_newline-s);
12257 #endif
12258     }
12259 #ifdef PERL_MAD
12260     if (PL_madskills) {
12261         tstart = SvPVX(PL_linestr) + stuffstart;
12262         if (PL_thisstuff)
12263             sv_catpvn(PL_thisstuff, tstart, s - tstart);
12264         else
12265             PL_thisstuff = newSVpvn(tstart, s - tstart);
12266     }
12267 #endif
12268     s += SvCUR(herewas);
12269
12270 #ifdef PERL_MAD
12271     stuffstart = s - SvPVX(PL_linestr);
12272
12273     if (found_newline)
12274         s--;
12275 #endif
12276
12277     tmpstr = newSV_type(SVt_PVIV);
12278     SvGROW(tmpstr, 80);
12279     if (term == '\'') {
12280         op_type = OP_CONST;
12281         SvIV_set(tmpstr, -1);
12282     }
12283     else if (term == '`') {
12284         op_type = OP_BACKTICK;
12285         SvIV_set(tmpstr, '\\');
12286     }
12287
12288     CLINE;
12289     PL_multi_start = CopLINE(PL_curcop);
12290     PL_multi_open = PL_multi_close = '<';
12291     term = *PL_tokenbuf;
12292     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
12293         char * const bufptr = PL_sublex_info.super_bufptr;
12294         char * const bufend = PL_sublex_info.super_bufend;
12295         char * const olds = s - SvCUR(herewas);
12296         s = strchr(bufptr, '\n');
12297         if (!s)
12298             s = bufend;
12299         d = s;
12300         while (s < bufend &&
12301           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12302             if (*s++ == '\n')
12303                 CopLINE_inc(PL_curcop);
12304         }
12305         if (s >= bufend) {
12306             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12307             missingterm(PL_tokenbuf);
12308         }
12309         sv_setpvn(herewas,bufptr,d-bufptr+1);
12310         sv_setpvn(tmpstr,d+1,s-d);
12311         s += len - 1;
12312         sv_catpvn(herewas,s,bufend-s);
12313         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
12314
12315         s = olds;
12316         goto retval;
12317     }
12318     else if (!outer) {
12319         d = s;
12320         while (s < PL_bufend &&
12321           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12322             if (*s++ == '\n')
12323                 CopLINE_inc(PL_curcop);
12324         }
12325         if (s >= PL_bufend) {
12326             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12327             missingterm(PL_tokenbuf);
12328         }
12329         sv_setpvn(tmpstr,d+1,s-d);
12330 #ifdef PERL_MAD
12331         if (PL_madskills) {
12332             if (PL_thisstuff)
12333                 sv_catpvn(PL_thisstuff, d + 1, s - d);
12334             else
12335                 PL_thisstuff = newSVpvn(d + 1, s - d);
12336             stuffstart = s - SvPVX(PL_linestr);
12337         }
12338 #endif
12339         s += len - 1;
12340         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12341
12342         sv_catpvn(herewas,s,PL_bufend-s);
12343         sv_setsv(PL_linestr,herewas);
12344         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12345         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12346         PL_last_lop = PL_last_uni = NULL;
12347     }
12348     else
12349         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12350     while (s >= PL_bufend) {    /* multiple line string? */
12351 #ifdef PERL_MAD
12352         if (PL_madskills) {
12353             tstart = SvPVX(PL_linestr) + stuffstart;
12354             if (PL_thisstuff)
12355                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12356             else
12357                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12358         }
12359 #endif
12360         PL_bufptr = s;
12361         CopLINE_inc(PL_curcop);
12362         if (!outer || !lex_next_chunk(0)) {
12363             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12364             missingterm(PL_tokenbuf);
12365         }
12366         CopLINE_dec(PL_curcop);
12367         s = PL_bufptr;
12368 #ifdef PERL_MAD
12369         stuffstart = s - SvPVX(PL_linestr);
12370 #endif
12371         CopLINE_inc(PL_curcop);
12372         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12373         PL_last_lop = PL_last_uni = NULL;
12374 #ifndef PERL_STRICT_CR
12375         if (PL_bufend - PL_linestart >= 2) {
12376             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12377                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12378             {
12379                 PL_bufend[-2] = '\n';
12380                 PL_bufend--;
12381                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12382             }
12383             else if (PL_bufend[-1] == '\r')
12384                 PL_bufend[-1] = '\n';
12385         }
12386         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12387             PL_bufend[-1] = '\n';
12388 #endif
12389         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12390             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12391             *(SvPVX(PL_linestr) + off ) = ' ';
12392             sv_catsv(PL_linestr,herewas);
12393             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12394             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12395         }
12396         else {
12397             s = PL_bufend;
12398             sv_catsv(tmpstr,PL_linestr);
12399         }
12400     }
12401     s++;
12402 retval:
12403     PL_multi_end = CopLINE(PL_curcop);
12404     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12405         SvPV_shrink_to_cur(tmpstr);
12406     }
12407     SvREFCNT_dec(herewas);
12408     if (!IN_BYTES) {
12409         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12410             SvUTF8_on(tmpstr);
12411         else if (PL_encoding)
12412             sv_recode_to_utf8(tmpstr, PL_encoding);
12413     }
12414     PL_lex_stuff = tmpstr;
12415     pl_yylval.ival = op_type;
12416     return s;
12417 }
12418
12419 /* scan_inputsymbol
12420    takes: current position in input buffer
12421    returns: new position in input buffer
12422    side-effects: pl_yylval and lex_op are set.
12423
12424    This code handles:
12425
12426    <>           read from ARGV
12427    <FH>         read from filehandle
12428    <pkg::FH>    read from package qualified filehandle
12429    <pkg'FH>     read from package qualified filehandle
12430    <$fh>        read from filehandle in $fh
12431    <*.h>        filename glob
12432
12433 */
12434
12435 STATIC char *
12436 S_scan_inputsymbol(pTHX_ char *start)
12437 {
12438     dVAR;
12439     register char *s = start;           /* current position in buffer */
12440     char *end;
12441     I32 len;
12442     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12443     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12444
12445     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12446
12447     end = strchr(s, '\n');
12448     if (!end)
12449         end = PL_bufend;
12450     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12451
12452     /* die if we didn't have space for the contents of the <>,
12453        or if it didn't end, or if we see a newline
12454     */
12455
12456     if (len >= (I32)sizeof PL_tokenbuf)
12457         Perl_croak(aTHX_ "Excessively long <> operator");
12458     if (s >= end)
12459         Perl_croak(aTHX_ "Unterminated <> operator");
12460
12461     s++;
12462
12463     /* check for <$fh>
12464        Remember, only scalar variables are interpreted as filehandles by
12465        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12466        treated as a glob() call.
12467        This code makes use of the fact that except for the $ at the front,
12468        a scalar variable and a filehandle look the same.
12469     */
12470     if (*d == '$' && d[1]) d++;
12471
12472     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12473     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12474         d++;
12475
12476     /* If we've tried to read what we allow filehandles to look like, and
12477        there's still text left, then it must be a glob() and not a getline.
12478        Use scan_str to pull out the stuff between the <> and treat it
12479        as nothing more than a string.
12480     */
12481
12482     if (d - PL_tokenbuf != len) {
12483         pl_yylval.ival = OP_GLOB;
12484         s = scan_str(start,!!PL_madskills,FALSE);
12485         if (!s)
12486            Perl_croak(aTHX_ "Glob not terminated");
12487         return s;
12488     }
12489     else {
12490         bool readline_overriden = FALSE;
12491         GV *gv_readline;
12492         GV **gvp;
12493         /* we're in a filehandle read situation */
12494         d = PL_tokenbuf;
12495
12496         /* turn <> into <ARGV> */
12497         if (!len)
12498             Copy("ARGV",d,5,char);
12499
12500         /* Check whether readline() is overriden */
12501         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12502         if ((gv_readline
12503                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12504                 ||
12505                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12506                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12507                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12508             readline_overriden = TRUE;
12509
12510         /* if <$fh>, create the ops to turn the variable into a
12511            filehandle
12512         */
12513         if (*d == '$') {
12514             /* try to find it in the pad for this block, otherwise find
12515                add symbol table ops
12516             */
12517             const PADOFFSET tmp = pad_findmy(d, len, 0);
12518             if (tmp != NOT_IN_PAD) {
12519                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12520                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12521                     HEK * const stashname = HvNAME_HEK(stash);
12522                     SV * const sym = sv_2mortal(newSVhek(stashname));
12523                     sv_catpvs(sym, "::");
12524                     sv_catpv(sym, d+1);
12525                     d = SvPVX(sym);
12526                     goto intro_sym;
12527                 }
12528                 else {
12529                     OP * const o = newOP(OP_PADSV, 0);
12530                     o->op_targ = tmp;
12531                     PL_lex_op = readline_overriden
12532                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12533                                 op_append_elem(OP_LIST, o,
12534                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12535                         : (OP*)newUNOP(OP_READLINE, 0, o);
12536                 }
12537             }
12538             else {
12539                 GV *gv;
12540                 ++d;
12541 intro_sym:
12542                 gv = gv_fetchpv(d,
12543                                 (PL_in_eval
12544                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12545                                  : GV_ADDMULTI),
12546                                 SVt_PV);
12547                 PL_lex_op = readline_overriden
12548                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12549                             op_append_elem(OP_LIST,
12550                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12551                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12552                     : (OP*)newUNOP(OP_READLINE, 0,
12553                             newUNOP(OP_RV2SV, 0,
12554                                 newGVOP(OP_GV, 0, gv)));
12555             }
12556             if (!readline_overriden)
12557                 PL_lex_op->op_flags |= OPf_SPECIAL;
12558             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12559             pl_yylval.ival = OP_NULL;
12560         }
12561
12562         /* If it's none of the above, it must be a literal filehandle
12563            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12564         else {
12565             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12566             PL_lex_op = readline_overriden
12567                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12568                         op_append_elem(OP_LIST,
12569                             newGVOP(OP_GV, 0, gv),
12570                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12571                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12572             pl_yylval.ival = OP_NULL;
12573         }
12574     }
12575
12576     return s;
12577 }
12578
12579
12580 /* scan_str
12581    takes: start position in buffer
12582           keep_quoted preserve \ on the embedded delimiter(s)
12583           keep_delims preserve the delimiters around the string
12584    returns: position to continue reading from buffer
12585    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12586         updates the read buffer.
12587
12588    This subroutine pulls a string out of the input.  It is called for:
12589         q               single quotes           q(literal text)
12590         '               single quotes           'literal text'
12591         qq              double quotes           qq(interpolate $here please)
12592         "               double quotes           "interpolate $here please"
12593         qx              backticks               qx(/bin/ls -l)
12594         `               backticks               `/bin/ls -l`
12595         qw              quote words             @EXPORT_OK = qw( func() $spam )
12596         m//             regexp match            m/this/
12597         s///            regexp substitute       s/this/that/
12598         tr///           string transliterate    tr/this/that/
12599         y///            string transliterate    y/this/that/
12600         ($*@)           sub prototypes          sub foo ($)
12601         (stuff)         sub attr parameters     sub foo : attr(stuff)
12602         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12603
12604    In most of these cases (all but <>, patterns and transliterate)
12605    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12606    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12607    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12608    calls scan_str().
12609
12610    It skips whitespace before the string starts, and treats the first
12611    character as the delimiter.  If the delimiter is one of ([{< then
12612    the corresponding "close" character )]}> is used as the closing
12613    delimiter.  It allows quoting of delimiters, and if the string has
12614    balanced delimiters ([{<>}]) it allows nesting.
12615
12616    On success, the SV with the resulting string is put into lex_stuff or,
12617    if that is already non-NULL, into lex_repl. The second case occurs only
12618    when parsing the RHS of the special constructs s/// and tr/// (y///).
12619    For convenience, the terminating delimiter character is stuffed into
12620    SvIVX of the SV.
12621 */
12622
12623 STATIC char *
12624 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12625 {
12626     dVAR;
12627     SV *sv;                             /* scalar value: string */
12628     const char *tmps;                   /* temp string, used for delimiter matching */
12629     register char *s = start;           /* current position in the buffer */
12630     register char term;                 /* terminating character */
12631     register char *to;                  /* current position in the sv's data */
12632     I32 brackets = 1;                   /* bracket nesting level */
12633     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12634     I32 termcode;                       /* terminating char. code */
12635     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12636     STRLEN termlen;                     /* length of terminating string */
12637     int last_off = 0;                   /* last position for nesting bracket */
12638 #ifdef PERL_MAD
12639     int stuffstart;
12640     char *tstart;
12641 #endif
12642
12643     PERL_ARGS_ASSERT_SCAN_STR;
12644
12645     /* skip space before the delimiter */
12646     if (isSPACE(*s)) {
12647         s = PEEKSPACE(s);
12648     }
12649
12650 #ifdef PERL_MAD
12651     if (PL_realtokenstart >= 0) {
12652         stuffstart = PL_realtokenstart;
12653         PL_realtokenstart = -1;
12654     }
12655     else
12656         stuffstart = start - SvPVX(PL_linestr);
12657 #endif
12658     /* mark where we are, in case we need to report errors */
12659     CLINE;
12660
12661     /* after skipping whitespace, the next character is the terminator */
12662     term = *s;
12663     if (!UTF) {
12664         termcode = termstr[0] = term;
12665         termlen = 1;
12666     }
12667     else {
12668         termcode = utf8_to_uvchr((U8*)s, &termlen);
12669         Copy(s, termstr, termlen, U8);
12670         if (!UTF8_IS_INVARIANT(term))
12671             has_utf8 = TRUE;
12672     }
12673
12674     /* mark where we are */
12675     PL_multi_start = CopLINE(PL_curcop);
12676     PL_multi_open = term;
12677
12678     /* find corresponding closing delimiter */
12679     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12680         termcode = termstr[0] = term = tmps[5];
12681
12682     PL_multi_close = term;
12683
12684     /* create a new SV to hold the contents.  79 is the SV's initial length.
12685        What a random number. */
12686     sv = newSV_type(SVt_PVIV);
12687     SvGROW(sv, 80);
12688     SvIV_set(sv, termcode);
12689     (void)SvPOK_only(sv);               /* validate pointer */
12690
12691     /* move past delimiter and try to read a complete string */
12692     if (keep_delims)
12693         sv_catpvn(sv, s, termlen);
12694     s += termlen;
12695 #ifdef PERL_MAD
12696     tstart = SvPVX(PL_linestr) + stuffstart;
12697     if (!PL_thisopen && !keep_delims) {
12698         PL_thisopen = newSVpvn(tstart, s - tstart);
12699         stuffstart = s - SvPVX(PL_linestr);
12700     }
12701 #endif
12702     for (;;) {
12703         if (PL_encoding && !UTF) {
12704             bool cont = TRUE;
12705
12706             while (cont) {
12707                 int offset = s - SvPVX_const(PL_linestr);
12708                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12709                                            &offset, (char*)termstr, termlen);
12710                 const char * const ns = SvPVX_const(PL_linestr) + offset;
12711                 char * const svlast = SvEND(sv) - 1;
12712
12713                 for (; s < ns; s++) {
12714                     if (*s == '\n' && !PL_rsfp)
12715                         CopLINE_inc(PL_curcop);
12716                 }
12717                 if (!found)
12718                     goto read_more_line;
12719                 else {
12720                     /* handle quoted delimiters */
12721                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12722                         const char *t;
12723                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12724                             t--;
12725                         if ((svlast-1 - t) % 2) {
12726                             if (!keep_quoted) {
12727                                 *(svlast-1) = term;
12728                                 *svlast = '\0';
12729                                 SvCUR_set(sv, SvCUR(sv) - 1);
12730                             }
12731                             continue;
12732                         }
12733                     }
12734                     if (PL_multi_open == PL_multi_close) {
12735                         cont = FALSE;
12736                     }
12737                     else {
12738                         const char *t;
12739                         char *w;
12740                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12741                             /* At here, all closes are "was quoted" one,
12742                                so we don't check PL_multi_close. */
12743                             if (*t == '\\') {
12744                                 if (!keep_quoted && *(t+1) == PL_multi_open)
12745                                     t++;
12746                                 else
12747                                     *w++ = *t++;
12748                             }
12749                             else if (*t == PL_multi_open)
12750                                 brackets++;
12751
12752                             *w = *t;
12753                         }
12754                         if (w < t) {
12755                             *w++ = term;
12756                             *w = '\0';
12757                             SvCUR_set(sv, w - SvPVX_const(sv));
12758                         }
12759                         last_off = w - SvPVX(sv);
12760                         if (--brackets <= 0)
12761                             cont = FALSE;
12762                     }
12763                 }
12764             }
12765             if (!keep_delims) {
12766                 SvCUR_set(sv, SvCUR(sv) - 1);
12767                 *SvEND(sv) = '\0';
12768             }
12769             break;
12770         }
12771
12772         /* extend sv if need be */
12773         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12774         /* set 'to' to the next character in the sv's string */
12775         to = SvPVX(sv)+SvCUR(sv);
12776
12777         /* if open delimiter is the close delimiter read unbridle */
12778         if (PL_multi_open == PL_multi_close) {
12779             for (; s < PL_bufend; s++,to++) {
12780                 /* embedded newlines increment the current line number */
12781                 if (*s == '\n' && !PL_rsfp)
12782                     CopLINE_inc(PL_curcop);
12783                 /* handle quoted delimiters */
12784                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12785                     if (!keep_quoted && s[1] == term)
12786                         s++;
12787                 /* any other quotes are simply copied straight through */
12788                     else
12789                         *to++ = *s++;
12790                 }
12791                 /* terminate when run out of buffer (the for() condition), or
12792                    have found the terminator */
12793                 else if (*s == term) {
12794                     if (termlen == 1)
12795                         break;
12796                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12797                         break;
12798                 }
12799                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12800                     has_utf8 = TRUE;
12801                 *to = *s;
12802             }
12803         }
12804
12805         /* if the terminator isn't the same as the start character (e.g.,
12806            matched brackets), we have to allow more in the quoting, and
12807            be prepared for nested brackets.
12808         */
12809         else {
12810             /* read until we run out of string, or we find the terminator */
12811             for (; s < PL_bufend; s++,to++) {
12812                 /* embedded newlines increment the line count */
12813                 if (*s == '\n' && !PL_rsfp)
12814                     CopLINE_inc(PL_curcop);
12815                 /* backslashes can escape the open or closing characters */
12816                 if (*s == '\\' && s+1 < PL_bufend) {
12817                     if (!keep_quoted &&
12818                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12819                         s++;
12820                     else
12821                         *to++ = *s++;
12822                 }
12823                 /* allow nested opens and closes */
12824                 else if (*s == PL_multi_close && --brackets <= 0)
12825                     break;
12826                 else if (*s == PL_multi_open)
12827                     brackets++;
12828                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12829                     has_utf8 = TRUE;
12830                 *to = *s;
12831             }
12832         }
12833         /* terminate the copied string and update the sv's end-of-string */
12834         *to = '\0';
12835         SvCUR_set(sv, to - SvPVX_const(sv));
12836
12837         /*
12838          * this next chunk reads more into the buffer if we're not done yet
12839          */
12840
12841         if (s < PL_bufend)
12842             break;              /* handle case where we are done yet :-) */
12843
12844 #ifndef PERL_STRICT_CR
12845         if (to - SvPVX_const(sv) >= 2) {
12846             if ((to[-2] == '\r' && to[-1] == '\n') ||
12847                 (to[-2] == '\n' && to[-1] == '\r'))
12848             {
12849                 to[-2] = '\n';
12850                 to--;
12851                 SvCUR_set(sv, to - SvPVX_const(sv));
12852             }
12853             else if (to[-1] == '\r')
12854                 to[-1] = '\n';
12855         }
12856         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12857             to[-1] = '\n';
12858 #endif
12859
12860      read_more_line:
12861         /* if we're out of file, or a read fails, bail and reset the current
12862            line marker so we can report where the unterminated string began
12863         */
12864 #ifdef PERL_MAD
12865         if (PL_madskills) {
12866             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12867             if (PL_thisstuff)
12868                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12869             else
12870                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12871         }
12872 #endif
12873         CopLINE_inc(PL_curcop);
12874         PL_bufptr = PL_bufend;
12875         if (!lex_next_chunk(0)) {
12876             sv_free(sv);
12877             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12878             return NULL;
12879         }
12880         s = PL_bufptr;
12881 #ifdef PERL_MAD
12882         stuffstart = 0;
12883 #endif
12884     }
12885
12886     /* at this point, we have successfully read the delimited string */
12887
12888     if (!PL_encoding || UTF) {
12889 #ifdef PERL_MAD
12890         if (PL_madskills) {
12891             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12892             const int len = s - tstart;
12893             if (PL_thisstuff)
12894                 sv_catpvn(PL_thisstuff, tstart, len);
12895             else
12896                 PL_thisstuff = newSVpvn(tstart, len);
12897             if (!PL_thisclose && !keep_delims)
12898                 PL_thisclose = newSVpvn(s,termlen);
12899         }
12900 #endif
12901
12902         if (keep_delims)
12903             sv_catpvn(sv, s, termlen);
12904         s += termlen;
12905     }
12906 #ifdef PERL_MAD
12907     else {
12908         if (PL_madskills) {
12909             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12910             const int len = s - tstart - termlen;
12911             if (PL_thisstuff)
12912                 sv_catpvn(PL_thisstuff, tstart, len);
12913             else
12914                 PL_thisstuff = newSVpvn(tstart, len);
12915             if (!PL_thisclose && !keep_delims)
12916                 PL_thisclose = newSVpvn(s - termlen,termlen);
12917         }
12918     }
12919 #endif
12920     if (has_utf8 || PL_encoding)
12921         SvUTF8_on(sv);
12922
12923     PL_multi_end = CopLINE(PL_curcop);
12924
12925     /* if we allocated too much space, give some back */
12926     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12927         SvLEN_set(sv, SvCUR(sv) + 1);
12928         SvPV_renew(sv, SvLEN(sv));
12929     }
12930
12931     /* decide whether this is the first or second quoted string we've read
12932        for this op
12933     */
12934
12935     if (PL_lex_stuff)
12936         PL_lex_repl = sv;
12937     else
12938         PL_lex_stuff = sv;
12939     return s;
12940 }
12941
12942 /*
12943   scan_num
12944   takes: pointer to position in buffer
12945   returns: pointer to new position in buffer
12946   side-effects: builds ops for the constant in pl_yylval.op
12947
12948   Read a number in any of the formats that Perl accepts:
12949
12950   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12951   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12952   0b[01](_?[01])*
12953   0[0-7](_?[0-7])*
12954   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12955
12956   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12957   thing it reads.
12958
12959   If it reads a number without a decimal point or an exponent, it will
12960   try converting the number to an integer and see if it can do so
12961   without loss of precision.
12962 */
12963
12964 char *
12965 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12966 {
12967     dVAR;
12968     register const char *s = start;     /* current position in buffer */
12969     register char *d;                   /* destination in temp buffer */
12970     register char *e;                   /* end of temp buffer */
12971     NV nv;                              /* number read, as a double */
12972     SV *sv = NULL;                      /* place to put the converted number */
12973     bool floatit;                       /* boolean: int or float? */
12974     const char *lastub = NULL;          /* position of last underbar */
12975     static char const number_too_long[] = "Number too long";
12976
12977     PERL_ARGS_ASSERT_SCAN_NUM;
12978
12979     /* We use the first character to decide what type of number this is */
12980
12981     switch (*s) {
12982     default:
12983       Perl_croak(aTHX_ "panic: scan_num");
12984
12985     /* if it starts with a 0, it could be an octal number, a decimal in
12986        0.13 disguise, or a hexadecimal number, or a binary number. */
12987     case '0':
12988         {
12989           /* variables:
12990              u          holds the "number so far"
12991              shift      the power of 2 of the base
12992                         (hex == 4, octal == 3, binary == 1)
12993              overflowed was the number more than we can hold?
12994
12995              Shift is used when we add a digit.  It also serves as an "are
12996              we in octal/hex/binary?" indicator to disallow hex characters
12997              when in octal mode.
12998            */
12999             NV n = 0.0;
13000             UV u = 0;
13001             I32 shift;
13002             bool overflowed = FALSE;
13003             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
13004             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
13005             static const char* const bases[5] =
13006               { "", "binary", "", "octal", "hexadecimal" };
13007             static const char* const Bases[5] =
13008               { "", "Binary", "", "Octal", "Hexadecimal" };
13009             static const char* const maxima[5] =
13010               { "",
13011                 "0b11111111111111111111111111111111",
13012                 "",
13013                 "037777777777",
13014                 "0xffffffff" };
13015             const char *base, *Base, *max;
13016
13017             /* check for hex */
13018             if (s[1] == 'x' || s[1] == 'X') {
13019                 shift = 4;
13020                 s += 2;
13021                 just_zero = FALSE;
13022             } else if (s[1] == 'b' || s[1] == 'B') {
13023                 shift = 1;
13024                 s += 2;
13025                 just_zero = FALSE;
13026             }
13027             /* check for a decimal in disguise */
13028             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
13029                 goto decimal;
13030             /* so it must be octal */
13031             else {
13032                 shift = 3;
13033                 s++;
13034             }
13035
13036             if (*s == '_') {
13037                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13038                                "Misplaced _ in number");
13039                lastub = s++;
13040             }
13041
13042             base = bases[shift];
13043             Base = Bases[shift];
13044             max  = maxima[shift];
13045
13046             /* read the rest of the number */
13047             for (;;) {
13048                 /* x is used in the overflow test,
13049                    b is the digit we're adding on. */
13050                 UV x, b;
13051
13052                 switch (*s) {
13053
13054                 /* if we don't mention it, we're done */
13055                 default:
13056                     goto out;
13057
13058                 /* _ are ignored -- but warned about if consecutive */
13059                 case '_':
13060                     if (lastub && s == lastub + 1)
13061                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13062                                        "Misplaced _ in number");
13063                     lastub = s++;
13064                     break;
13065
13066                 /* 8 and 9 are not octal */
13067                 case '8': case '9':
13068                     if (shift == 3)
13069                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
13070                     /* FALL THROUGH */
13071
13072                 /* octal digits */
13073                 case '2': case '3': case '4':
13074                 case '5': case '6': case '7':
13075                     if (shift == 1)
13076                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
13077                     /* FALL THROUGH */
13078
13079                 case '0': case '1':
13080                     b = *s++ & 15;              /* ASCII digit -> value of digit */
13081                     goto digit;
13082
13083                 /* hex digits */
13084                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13085                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
13086                     /* make sure they said 0x */
13087                     if (shift != 4)
13088                         goto out;
13089                     b = (*s++ & 7) + 9;
13090
13091                     /* Prepare to put the digit we have onto the end
13092                        of the number so far.  We check for overflows.
13093                     */
13094
13095                   digit:
13096                     just_zero = FALSE;
13097                     if (!overflowed) {
13098                         x = u << shift; /* make room for the digit */
13099
13100                         if ((x >> shift) != u
13101                             && !(PL_hints & HINT_NEW_BINARY)) {
13102                             overflowed = TRUE;
13103                             n = (NV) u;
13104                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13105                                              "Integer overflow in %s number",
13106                                              base);
13107                         } else
13108                             u = x | b;          /* add the digit to the end */
13109                     }
13110                     if (overflowed) {
13111                         n *= nvshift[shift];
13112                         /* If an NV has not enough bits in its
13113                          * mantissa to represent an UV this summing of
13114                          * small low-order numbers is a waste of time
13115                          * (because the NV cannot preserve the
13116                          * low-order bits anyway): we could just
13117                          * remember when did we overflow and in the
13118                          * end just multiply n by the right
13119                          * amount. */
13120                         n += (NV) b;
13121                     }
13122                     break;
13123                 }
13124             }
13125
13126           /* if we get here, we had success: make a scalar value from
13127              the number.
13128           */
13129           out:
13130
13131             /* final misplaced underbar check */
13132             if (s[-1] == '_') {
13133                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13134             }
13135
13136             if (overflowed) {
13137                 if (n > 4294967295.0)
13138                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13139                                    "%s number > %s non-portable",
13140                                    Base, max);
13141                 sv = newSVnv(n);
13142             }
13143             else {
13144 #if UVSIZE > 4
13145                 if (u > 0xffffffff)
13146                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13147                                    "%s number > %s non-portable",
13148                                    Base, max);
13149 #endif
13150                 sv = newSVuv(u);
13151             }
13152             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
13153                 sv = new_constant(start, s - start, "integer",
13154                                   sv, NULL, NULL, 0);
13155             else if (PL_hints & HINT_NEW_BINARY)
13156                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
13157         }
13158         break;
13159
13160     /*
13161       handle decimal numbers.
13162       we're also sent here when we read a 0 as the first digit
13163     */
13164     case '1': case '2': case '3': case '4': case '5':
13165     case '6': case '7': case '8': case '9': case '.':
13166       decimal:
13167         d = PL_tokenbuf;
13168         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
13169         floatit = FALSE;
13170
13171         /* read next group of digits and _ and copy into d */
13172         while (isDIGIT(*s) || *s == '_') {
13173             /* skip underscores, checking for misplaced ones
13174                if -w is on
13175             */
13176             if (*s == '_') {
13177                 if (lastub && s == lastub + 1)
13178                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13179                                    "Misplaced _ in number");
13180                 lastub = s++;
13181             }
13182             else {
13183                 /* check for end of fixed-length buffer */
13184                 if (d >= e)
13185                     Perl_croak(aTHX_ number_too_long);
13186                 /* if we're ok, copy the character */
13187                 *d++ = *s++;
13188             }
13189         }
13190
13191         /* final misplaced underbar check */
13192         if (lastub && s == lastub + 1) {
13193             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13194         }
13195
13196         /* read a decimal portion if there is one.  avoid
13197            3..5 being interpreted as the number 3. followed
13198            by .5
13199         */
13200         if (*s == '.' && s[1] != '.') {
13201             floatit = TRUE;
13202             *d++ = *s++;
13203
13204             if (*s == '_') {
13205                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13206                                "Misplaced _ in number");
13207                 lastub = s;
13208             }
13209
13210             /* copy, ignoring underbars, until we run out of digits.
13211             */
13212             for (; isDIGIT(*s) || *s == '_'; s++) {
13213                 /* fixed length buffer check */
13214                 if (d >= e)
13215                     Perl_croak(aTHX_ number_too_long);
13216                 if (*s == '_') {
13217                    if (lastub && s == lastub + 1)
13218                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13219                                       "Misplaced _ in number");
13220                    lastub = s;
13221                 }
13222                 else
13223                     *d++ = *s;
13224             }
13225             /* fractional part ending in underbar? */
13226             if (s[-1] == '_') {
13227                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13228                                "Misplaced _ in number");
13229             }
13230             if (*s == '.' && isDIGIT(s[1])) {
13231                 /* oops, it's really a v-string, but without the "v" */
13232                 s = start;
13233                 goto vstring;
13234             }
13235         }
13236
13237         /* read exponent part, if present */
13238         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
13239             floatit = TRUE;
13240             s++;
13241
13242             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
13243             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
13244
13245             /* stray preinitial _ */
13246             if (*s == '_') {
13247                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13248                                "Misplaced _ in number");
13249                 lastub = s++;
13250             }
13251
13252             /* allow positive or negative exponent */
13253             if (*s == '+' || *s == '-')
13254                 *d++ = *s++;
13255
13256             /* stray initial _ */
13257             if (*s == '_') {
13258                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13259                                "Misplaced _ in number");
13260                 lastub = s++;
13261             }
13262
13263             /* read digits of exponent */
13264             while (isDIGIT(*s) || *s == '_') {
13265                 if (isDIGIT(*s)) {
13266                     if (d >= e)
13267                         Perl_croak(aTHX_ number_too_long);
13268                     *d++ = *s++;
13269                 }
13270                 else {
13271                    if (((lastub && s == lastub + 1) ||
13272                         (!isDIGIT(s[1]) && s[1] != '_')))
13273                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13274                                       "Misplaced _ in number");
13275                    lastub = s++;
13276                 }
13277             }
13278         }
13279
13280
13281         /*
13282            We try to do an integer conversion first if no characters
13283            indicating "float" have been found.
13284          */
13285
13286         if (!floatit) {
13287             UV uv;
13288             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
13289
13290             if (flags == IS_NUMBER_IN_UV) {
13291               if (uv <= IV_MAX)
13292                 sv = newSViv(uv); /* Prefer IVs over UVs. */
13293               else
13294                 sv = newSVuv(uv);
13295             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13296               if (uv <= (UV) IV_MIN)
13297                 sv = newSViv(-(IV)uv);
13298               else
13299                 floatit = TRUE;
13300             } else
13301               floatit = TRUE;
13302         }
13303         if (floatit) {
13304             /* terminate the string */
13305             *d = '\0';
13306             nv = Atof(PL_tokenbuf);
13307             sv = newSVnv(nv);
13308         }
13309
13310         if ( floatit
13311              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13312             const char *const key = floatit ? "float" : "integer";
13313             const STRLEN keylen = floatit ? 5 : 7;
13314             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13315                                 key, keylen, sv, NULL, NULL, 0);
13316         }
13317         break;
13318
13319     /* if it starts with a v, it could be a v-string */
13320     case 'v':
13321 vstring:
13322                 sv = newSV(5); /* preallocate storage space */
13323                 s = scan_vstring(s, PL_bufend, sv);
13324         break;
13325     }
13326
13327     /* make the op for the constant and return */
13328
13329     if (sv)
13330         lvalp->opval = newSVOP(OP_CONST, 0, sv);
13331     else
13332         lvalp->opval = NULL;
13333
13334     return (char *)s;
13335 }
13336
13337 STATIC char *
13338 S_scan_formline(pTHX_ register char *s)
13339 {
13340     dVAR;
13341     register char *eol;
13342     register char *t;
13343     SV * const stuff = newSVpvs("");
13344     bool needargs = FALSE;
13345     bool eofmt = FALSE;
13346 #ifdef PERL_MAD
13347     char *tokenstart = s;
13348     SV* savewhite = NULL;
13349
13350     if (PL_madskills) {
13351         savewhite = PL_thiswhite;
13352         PL_thiswhite = 0;
13353     }
13354 #endif
13355
13356     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13357
13358     while (!needargs) {
13359         if (*s == '.') {
13360             t = s+1;
13361 #ifdef PERL_STRICT_CR
13362             while (SPACE_OR_TAB(*t))
13363                 t++;
13364 #else
13365             while (SPACE_OR_TAB(*t) || *t == '\r')
13366                 t++;
13367 #endif
13368             if (*t == '\n' || t == PL_bufend) {
13369                 eofmt = TRUE;
13370                 break;
13371             }
13372         }
13373         if (PL_in_eval && !PL_rsfp) {
13374             eol = (char *) memchr(s,'\n',PL_bufend-s);
13375             if (!eol++)
13376                 eol = PL_bufend;
13377         }
13378         else
13379             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13380         if (*s != '#') {
13381             for (t = s; t < eol; t++) {
13382                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13383                     needargs = FALSE;
13384                     goto enough;        /* ~~ must be first line in formline */
13385                 }
13386                 if (*t == '@' || *t == '^')
13387                     needargs = TRUE;
13388             }
13389             if (eol > s) {
13390                 sv_catpvn(stuff, s, eol-s);
13391 #ifndef PERL_STRICT_CR
13392                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13393                     char *end = SvPVX(stuff) + SvCUR(stuff);
13394                     end[-2] = '\n';
13395                     end[-1] = '\0';
13396                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13397                 }
13398 #endif
13399             }
13400             else
13401               break;
13402         }
13403         s = (char*)eol;
13404         if (PL_rsfp) {
13405             bool got_some;
13406 #ifdef PERL_MAD
13407             if (PL_madskills) {
13408                 if (PL_thistoken)
13409                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13410                 else
13411                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13412             }
13413 #endif
13414             PL_bufptr = PL_bufend;
13415             CopLINE_inc(PL_curcop);
13416             got_some = lex_next_chunk(0);
13417             CopLINE_dec(PL_curcop);
13418             s = PL_bufptr;
13419 #ifdef PERL_MAD
13420             tokenstart = PL_bufptr;
13421 #endif
13422             if (!got_some)
13423                 break;
13424         }
13425         incline(s);
13426     }
13427   enough:
13428     if (SvCUR(stuff)) {
13429         PL_expect = XTERM;
13430         if (needargs) {
13431             PL_lex_state = LEX_NORMAL;
13432             start_force(PL_curforce);
13433             NEXTVAL_NEXTTOKE.ival = 0;
13434             force_next(',');
13435         }
13436         else
13437             PL_lex_state = LEX_FORMLINE;
13438         if (!IN_BYTES) {
13439             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13440                 SvUTF8_on(stuff);
13441             else if (PL_encoding)
13442                 sv_recode_to_utf8(stuff, PL_encoding);
13443         }
13444         start_force(PL_curforce);
13445         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13446         force_next(THING);
13447         start_force(PL_curforce);
13448         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13449         force_next(LSTOP);
13450     }
13451     else {
13452         SvREFCNT_dec(stuff);
13453         if (eofmt)
13454             PL_lex_formbrack = 0;
13455         PL_bufptr = s;
13456     }
13457 #ifdef PERL_MAD
13458     if (PL_madskills) {
13459         if (PL_thistoken)
13460             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13461         else
13462             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13463         PL_thiswhite = savewhite;
13464     }
13465 #endif
13466     return s;
13467 }
13468
13469 I32
13470 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13471 {
13472     dVAR;
13473     const I32 oldsavestack_ix = PL_savestack_ix;
13474     CV* const outsidecv = PL_compcv;
13475
13476     if (PL_compcv) {
13477         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13478     }
13479     SAVEI32(PL_subline);
13480     save_item(PL_subname);
13481     SAVESPTR(PL_compcv);
13482
13483     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13484     CvFLAGS(PL_compcv) |= flags;
13485
13486     PL_subline = CopLINE(PL_curcop);
13487     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13488     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13489     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13490
13491     return oldsavestack_ix;
13492 }
13493
13494 #ifdef __SC__
13495 #pragma segment Perl_yylex
13496 #endif
13497 static int
13498 S_yywarn(pTHX_ const char *const s)
13499 {
13500     dVAR;
13501
13502     PERL_ARGS_ASSERT_YYWARN;
13503
13504     PL_in_eval |= EVAL_WARNONLY;
13505     yyerror(s);
13506     PL_in_eval &= ~EVAL_WARNONLY;
13507     return 0;
13508 }
13509
13510 int
13511 Perl_yyerror(pTHX_ const char *const s)
13512 {
13513     dVAR;
13514     const char *where = NULL;
13515     const char *context = NULL;
13516     int contlen = -1;
13517     SV *msg;
13518     int yychar  = PL_parser->yychar;
13519
13520     PERL_ARGS_ASSERT_YYERROR;
13521
13522     if (!yychar || (yychar == ';' && !PL_rsfp))
13523         where = "at EOF";
13524     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13525       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13526       PL_oldbufptr != PL_bufptr) {
13527         /*
13528                 Only for NetWare:
13529                 The code below is removed for NetWare because it abends/crashes on NetWare
13530                 when the script has error such as not having the closing quotes like:
13531                     if ($var eq "value)
13532                 Checking of white spaces is anyway done in NetWare code.
13533         */
13534 #ifndef NETWARE
13535         while (isSPACE(*PL_oldoldbufptr))
13536             PL_oldoldbufptr++;
13537 #endif
13538         context = PL_oldoldbufptr;
13539         contlen = PL_bufptr - PL_oldoldbufptr;
13540     }
13541     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13542       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13543         /*
13544                 Only for NetWare:
13545                 The code below is removed for NetWare because it abends/crashes on NetWare
13546                 when the script has error such as not having the closing quotes like:
13547                     if ($var eq "value)
13548                 Checking of white spaces is anyway done in NetWare code.
13549         */
13550 #ifndef NETWARE
13551         while (isSPACE(*PL_oldbufptr))
13552             PL_oldbufptr++;
13553 #endif
13554         context = PL_oldbufptr;
13555         contlen = PL_bufptr - PL_oldbufptr;
13556     }
13557     else if (yychar > 255)
13558         where = "next token ???";
13559     else if (yychar == -2) { /* YYEMPTY */
13560         if (PL_lex_state == LEX_NORMAL ||
13561            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13562             where = "at end of line";
13563         else if (PL_lex_inpat)
13564             where = "within pattern";
13565         else
13566             where = "within string";
13567     }
13568     else {
13569         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13570         if (yychar < 32)
13571             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13572         else if (isPRINT_LC(yychar)) {
13573             const char string = yychar;
13574             sv_catpvn(where_sv, &string, 1);
13575         }
13576         else
13577             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13578         where = SvPVX_const(where_sv);
13579     }
13580     msg = sv_2mortal(newSVpv(s, 0));
13581     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13582         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13583     if (context)
13584         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13585     else
13586         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13587     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13588         Perl_sv_catpvf(aTHX_ msg,
13589         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13590                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13591         PL_multi_end = 0;
13592     }
13593     if (PL_in_eval & EVAL_WARNONLY) {
13594         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13595     }
13596     else
13597         qerror(msg);
13598     if (PL_error_count >= 10) {
13599         if (PL_in_eval && SvCUR(ERRSV))
13600             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13601                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13602         else
13603             Perl_croak(aTHX_ "%s has too many errors.\n",
13604             OutCopFILE(PL_curcop));
13605     }
13606     PL_in_my = 0;
13607     PL_in_my_stash = NULL;
13608     return 0;
13609 }
13610 #ifdef __SC__
13611 #pragma segment Main
13612 #endif
13613
13614 STATIC char*
13615 S_swallow_bom(pTHX_ U8 *s)
13616 {
13617     dVAR;
13618     const STRLEN slen = SvCUR(PL_linestr);
13619
13620     PERL_ARGS_ASSERT_SWALLOW_BOM;
13621
13622     switch (s[0]) {
13623     case 0xFF:
13624         if (s[1] == 0xFE) {
13625             /* UTF-16 little-endian? (or UTF-32LE?) */
13626             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13627                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13628 #ifndef PERL_NO_UTF16_FILTER
13629             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13630             s += 2;
13631             if (PL_bufend > (char*)s) {
13632                 s = add_utf16_textfilter(s, TRUE);
13633             }
13634 #else
13635             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13636 #endif
13637         }
13638         break;
13639     case 0xFE:
13640         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13641 #ifndef PERL_NO_UTF16_FILTER
13642             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13643             s += 2;
13644             if (PL_bufend > (char *)s) {
13645                 s = add_utf16_textfilter(s, FALSE);
13646             }
13647 #else
13648             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13649 #endif
13650         }
13651         break;
13652     case 0xEF:
13653         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13654             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13655             s += 3;                      /* UTF-8 */
13656         }
13657         break;
13658     case 0:
13659         if (slen > 3) {
13660              if (s[1] == 0) {
13661                   if (s[2] == 0xFE && s[3] == 0xFF) {
13662                        /* UTF-32 big-endian */
13663                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13664                   }
13665              }
13666              else if (s[2] == 0 && s[3] != 0) {
13667                   /* Leading bytes
13668                    * 00 xx 00 xx
13669                    * are a good indicator of UTF-16BE. */
13670 #ifndef PERL_NO_UTF16_FILTER
13671                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13672                   s = add_utf16_textfilter(s, FALSE);
13673 #else
13674                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13675 #endif
13676              }
13677         }
13678 #ifdef EBCDIC
13679     case 0xDD:
13680         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13681             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13682             s += 4;                      /* UTF-8 */
13683         }
13684         break;
13685 #endif
13686
13687     default:
13688          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13689                   /* Leading bytes
13690                    * xx 00 xx 00
13691                    * are a good indicator of UTF-16LE. */
13692 #ifndef PERL_NO_UTF16_FILTER
13693               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13694               s = add_utf16_textfilter(s, TRUE);
13695 #else
13696               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13697 #endif
13698          }
13699     }
13700     return (char*)s;
13701 }
13702
13703
13704 #ifndef PERL_NO_UTF16_FILTER
13705 static I32
13706 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13707 {
13708     dVAR;
13709     SV *const filter = FILTER_DATA(idx);
13710     /* We re-use this each time round, throwing the contents away before we
13711        return.  */
13712     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13713     SV *const utf8_buffer = filter;
13714     IV status = IoPAGE(filter);
13715     const bool reverse = cBOOL(IoLINES(filter));
13716     I32 retval;
13717
13718     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13719
13720     /* As we're automatically added, at the lowest level, and hence only called
13721        from this file, we can be sure that we're not called in block mode. Hence
13722        don't bother writing code to deal with block mode.  */
13723     if (maxlen) {
13724         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13725     }
13726     if (status < 0) {
13727         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13728     }
13729     DEBUG_P(PerlIO_printf(Perl_debug_log,
13730                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13731                           FPTR2DPTR(void *, S_utf16_textfilter),
13732                           reverse ? 'l' : 'b', idx, maxlen, status,
13733                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13734
13735     while (1) {
13736         STRLEN chars;
13737         STRLEN have;
13738         I32 newlen;
13739         U8 *end;
13740         /* First, look in our buffer of existing UTF-8 data:  */
13741         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13742
13743         if (nl) {
13744             ++nl;
13745         } else if (status == 0) {
13746             /* EOF */
13747             IoPAGE(filter) = 0;
13748             nl = SvEND(utf8_buffer);
13749         }
13750         if (nl) {
13751             STRLEN got = nl - SvPVX(utf8_buffer);
13752             /* Did we have anything to append?  */
13753             retval = got != 0;
13754             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13755             /* Everything else in this code works just fine if SVp_POK isn't
13756                set.  This, however, needs it, and we need it to work, else
13757                we loop infinitely because the buffer is never consumed.  */
13758             sv_chop(utf8_buffer, nl);
13759             break;
13760         }
13761
13762         /* OK, not a complete line there, so need to read some more UTF-16.
13763            Read an extra octect if the buffer currently has an odd number. */
13764         while (1) {
13765             if (status <= 0)
13766                 break;
13767             if (SvCUR(utf16_buffer) >= 2) {
13768                 /* Location of the high octet of the last complete code point.
13769                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13770                    *coupled* with all the benefits of partial reads and
13771                    endianness.  */
13772                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13773                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13774
13775                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13776                     break;
13777                 }
13778
13779                 /* We have the first half of a surrogate. Read more.  */
13780                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13781             }
13782
13783             status = FILTER_READ(idx + 1, utf16_buffer,
13784                                  160 + (SvCUR(utf16_buffer) & 1));
13785             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13786             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13787             if (status < 0) {
13788                 /* Error */
13789                 IoPAGE(filter) = status;
13790                 return status;
13791             }
13792         }
13793
13794         chars = SvCUR(utf16_buffer) >> 1;
13795         have = SvCUR(utf8_buffer);
13796         SvGROW(utf8_buffer, have + chars * 3 + 1);
13797
13798         if (reverse) {
13799             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13800                                          (U8*)SvPVX_const(utf8_buffer) + have,
13801                                          chars * 2, &newlen);
13802         } else {
13803             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13804                                 (U8*)SvPVX_const(utf8_buffer) + have,
13805                                 chars * 2, &newlen);
13806         }
13807         SvCUR_set(utf8_buffer, have + newlen);
13808         *end = '\0';
13809
13810         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13811            it's private to us, and utf16_to_utf8{,reversed} take a
13812            (pointer,length) pair, rather than a NUL-terminated string.  */
13813         if(SvCUR(utf16_buffer) & 1) {
13814             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13815             SvCUR_set(utf16_buffer, 1);
13816         } else {
13817             SvCUR_set(utf16_buffer, 0);
13818         }
13819     }
13820     DEBUG_P(PerlIO_printf(Perl_debug_log,
13821                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13822                           status,
13823                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13824     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13825     return retval;
13826 }
13827
13828 static U8 *
13829 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13830 {
13831     SV *filter = filter_add(S_utf16_textfilter, NULL);
13832
13833     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13834
13835     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13836     sv_setpvs(filter, "");
13837     IoLINES(filter) = reversed;
13838     IoPAGE(filter) = 1; /* Not EOF */
13839
13840     /* Sadly, we have to return a valid pointer, come what may, so we have to
13841        ignore any error return from this.  */
13842     SvCUR_set(PL_linestr, 0);
13843     if (FILTER_READ(0, PL_linestr, 0)) {
13844         SvUTF8_on(PL_linestr);
13845     } else {
13846         SvUTF8_on(PL_linestr);
13847     }
13848     PL_bufend = SvEND(PL_linestr);
13849     return (U8*)SvPVX(PL_linestr);
13850 }
13851 #endif
13852
13853 /*
13854 Returns a pointer to the next character after the parsed
13855 vstring, as well as updating the passed in sv.
13856
13857 Function must be called like
13858
13859         sv = newSV(5);
13860         s = scan_vstring(s,e,sv);
13861
13862 where s and e are the start and end of the string.
13863 The sv should already be large enough to store the vstring
13864 passed in, for performance reasons.
13865
13866 */
13867
13868 char *
13869 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13870 {
13871     dVAR;
13872     const char *pos = s;
13873     const char *start = s;
13874
13875     PERL_ARGS_ASSERT_SCAN_VSTRING;
13876
13877     if (*pos == 'v') pos++;  /* get past 'v' */
13878     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13879         pos++;
13880     if ( *pos != '.') {
13881         /* this may not be a v-string if followed by => */
13882         const char *next = pos;
13883         while (next < e && isSPACE(*next))
13884             ++next;
13885         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13886             /* return string not v-string */
13887             sv_setpvn(sv,(char *)s,pos-s);
13888             return (char *)pos;
13889         }
13890     }
13891
13892     if (!isALPHA(*pos)) {
13893         U8 tmpbuf[UTF8_MAXBYTES+1];
13894
13895         if (*s == 'v')
13896             s++;  /* get past 'v' */
13897
13898         sv_setpvs(sv, "");
13899
13900         for (;;) {
13901             /* this is atoi() that tolerates underscores */
13902             U8 *tmpend;
13903             UV rev = 0;
13904             const char *end = pos;
13905             UV mult = 1;
13906             while (--end >= s) {
13907                 if (*end != '_') {
13908                     const UV orev = rev;
13909                     rev += (*end - '0') * mult;
13910                     mult *= 10;
13911                     if (orev > rev)
13912                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13913                                          "Integer overflow in decimal number");
13914                 }
13915             }
13916 #ifdef EBCDIC
13917             if (rev > 0x7FFFFFFF)
13918                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13919 #endif
13920             /* Append native character for the rev point */
13921             tmpend = uvchr_to_utf8(tmpbuf, rev);
13922             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13923             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13924                  SvUTF8_on(sv);
13925             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13926                  s = ++pos;
13927             else {
13928                  s = pos;
13929                  break;
13930             }
13931             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13932                  pos++;
13933         }
13934         SvPOK_on(sv);
13935         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13936         SvRMAGICAL_on(sv);
13937     }
13938     return (char *)s;
13939 }
13940
13941 int
13942 Perl_keyword_plugin_standard(pTHX_
13943         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13944 {
13945     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13946     PERL_UNUSED_CONTEXT;
13947     PERL_UNUSED_ARG(keyword_ptr);
13948     PERL_UNUSED_ARG(keyword_len);
13949     PERL_UNUSED_ARG(op_ptr);
13950     return KEYWORD_PLUGIN_DECLINE;
13951 }
13952
13953 /*
13954 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
13955
13956 Parse a single complete Perl statement.  This may be a normal imperative
13957 statement, including optional label, or a declaration that has
13958 compile-time effect.  It is up to the caller to ensure that the dynamic
13959 parser state (L</PL_parser> et al) is correctly set to reflect the source
13960 of the code to be parsed and the lexical context for the statement.
13961
13962 The op tree representing the statement is returned.  This may be a
13963 null pointer if the statement is null, for example if it was actually
13964 a subroutine definition (which has compile-time side effects).  If not
13965 null, it will be the result of a L</newSTATEOP> call, normally including
13966 a C<nextstate> or equivalent op.
13967
13968 If an error occurs in parsing or compilation, in most cases a valid op
13969 tree (most likely null) is returned anyway.  The error is reflected in
13970 the parser state, normally resulting in a single exception at the top
13971 level of parsing which covers all the compilation errors that occurred.
13972 Some compilation errors, however, will throw an exception immediately.
13973
13974 The I<flags> parameter is reserved for future use, and must always
13975 be zero.
13976
13977 =cut
13978 */
13979
13980 OP *
13981 Perl_parse_fullstmt(pTHX_ U32 flags)
13982 {
13983     OP *fullstmtop;
13984     if (flags)
13985         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13986     ENTER;
13987     SAVEVPTR(PL_eval_root);
13988     PL_eval_root = NULL;
13989     if(yyparse(GRAMFULLSTMT) && !PL_parser->error_count)
13990         qerror(Perl_mess(aTHX_ "Parse error"));
13991     fullstmtop = PL_eval_root;
13992     LEAVE;
13993     return fullstmtop;
13994 }
13995
13996 /*
13997 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
13998
13999 Parse a sequence of zero or more Perl statements.  These may be normal
14000 imperative statements, including optional labels, or declarations
14001 that have compile-time effect, or any mixture thereof.  The statement
14002 sequence ends when a closing brace or end-of-file is encountered in a
14003 place where a new statement could have validly started.  It is up to
14004 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
14005 is correctly set to reflect the source of the code to be parsed and the
14006 lexical context for the statements.
14007
14008 The op tree representing the statement sequence is returned.  This may
14009 be a null pointer if the statements were all null, for example if there
14010 were no statements or if there were only subroutine definitions (which
14011 have compile-time side effects).  If not null, it will be a C<lineseq>
14012 list, normally including C<nextstate> or equivalent ops.
14013
14014 If an error occurs in parsing or compilation, in most cases a valid op
14015 tree is returned anyway.  The error is reflected in the parser state,
14016 normally resulting in a single exception at the top level of parsing
14017 which covers all the compilation errors that occurred.  Some compilation
14018 errors, however, will throw an exception immediately.
14019
14020 The I<flags> parameter is reserved for future use, and must always
14021 be zero.
14022
14023 =cut
14024 */
14025
14026 OP *
14027 Perl_parse_stmtseq(pTHX_ U32 flags)
14028 {
14029     OP *stmtseqop;
14030     if (flags)
14031         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
14032     ENTER;
14033     SAVEVPTR(PL_eval_root);
14034     PL_eval_root = NULL;
14035     if(yyparse(GRAMSTMTSEQ) && !PL_parser->error_count)
14036         qerror(Perl_mess(aTHX_ "Parse error"));
14037     stmtseqop = PL_eval_root;
14038     LEAVE;
14039     return stmtseqop;
14040 }
14041
14042 void
14043 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
14044 {
14045     PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
14046     deprecate("qw(...) as parentheses");
14047     force_next(')');
14048     if (qwlist->op_type == OP_STUB) {
14049         op_free(qwlist);
14050     }
14051     else {
14052         start_force(PL_curforce);
14053         NEXTVAL_NEXTTOKE.opval = qwlist;
14054         force_next(THING);
14055     }
14056     force_next('(');
14057 }
14058
14059 /*
14060  * Local variables:
14061  * c-indentation-style: bsd
14062  * c-basic-offset: 4
14063  * indent-tabs-mode: t
14064  * End:
14065  *
14066  * ex: set ts=8 sts=4 sw=4 noet:
14067  */