toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* YYINITDEPTH -- initial size of the parser's stacks.  */
  49 #define YYINITDEPTH 200
  50
  51 /* XXX temporary backwards compatibility */
  52 #define PL_lex_brackets         (PL_parser->lex_brackets)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XFAKEBRACK 128
 130 #define XENUMMASK 127
 131
 132 #ifdef USE_UTF8_SCRIPTS
 133 #   define UTF (!IN_BYTES)
 134 #else
 135 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 136 #endif
 137
 138 /* The maximum number of characters preceding the unrecognized one to display */
 139 #define UNRECOGNIZED_PRECEDE_COUNT 10
 140
 141 /* In variables named $^X, these are the legal values for X.
 142  * 1999-02-27 mjd-perl-patch@plover.com */
 143 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 144
 145 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 146
 147 /* LEX_* are values for PL_lex_state, the state of the lexer.
 148  * They are arranged oddly so that the guard on the switch statement
 149  * can get by with a single comparison (if the compiler is smart enough).
 150  */
 151
 152 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 153
 154 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 155 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 156 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 157 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 158 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 159
 160                                    /* at end of code, eg "$x" followed by:  */
 161 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 162 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 163
 164 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 165                                         string or after \E, $foo, etc       */
 166 #define LEX_INTERPCONST          2 /* NOT USED */
 167 #define LEX_FORMLINE             1 /* expecting a format line               */
 168 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 169
 170
 171 #ifdef DEBUGGING
 172 static const char* const lex_state_names[] = {
 173     "KNOWNEXT",
 174     "FORMLINE",
 175     "INTERPCONST",
 176     "INTERPCONCAT",
 177     "INTERPENDMAYBE",
 178     "INTERPEND",
 179     "INTERPSTART",
 180     "INTERPPUSH",
 181     "INTERPCASEMOD",
 182     "INTERPNORMAL",
 183     "NORMAL"
 184 };
 185 #endif
 186
 187 #ifdef ff_next
 188 #undef ff_next
 189 #endif
 190
 191 #include "keywords.h"
 192
 193 /* CLINE is a macro that ensures PL_copline has a sane value */
 194
 195 #ifdef CLINE
 196 #undef CLINE
 197 #endif
 198 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 199
 200 #ifdef PERL_MAD
 201 #  define SKIPSPACE0(s) skipspace0(s)
 202 #  define SKIPSPACE1(s) skipspace1(s)
 203 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 204 #  define PEEKSPACE(s) skipspace2(s,0)
 205 #else
 206 #  define SKIPSPACE0(s) skipspace(s)
 207 #  define SKIPSPACE1(s) skipspace(s)
 208 #  define SKIPSPACE2(s,tsv) skipspace(s)
 209 #  define PEEKSPACE(s) skipspace(s)
 210 #endif
 211
 212 /*
 213  * Convenience functions to return different tokens and prime the
 214  * lexer for the next token.  They all take an argument.
 215  *
 216  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 217  * OPERATOR     : generic operator
 218  * AOPERATOR    : assignment operator
 219  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 220  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 221  * PREREF       : *EXPR where EXPR is not a simple identifier
 222  * TERM         : expression term
 223  * LOOPX        : loop exiting command (goto, last, dump, etc)
 224  * FTST         : file test operator
 225  * FUN0         : zero-argument function
 226  * FUN1         : not used, except for not, which isn't a UNIOP
 227  * BOop         : bitwise or or xor
 228  * BAop         : bitwise and
 229  * SHop         : shift operator
 230  * PWop         : power operator
 231  * PMop         : pattern-matching operator
 232  * Aop          : addition-level operator
 233  * Mop          : multiplication-level operator
 234  * Eop          : equality-testing operator
 235  * Rop          : relational operator <= != gt
 236  *
 237  * Also see LOP and lop() below.
 238  */
 239
 240 #ifdef DEBUGGING /* Serve -DT. */
 241 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 242 #else
 243 #   define REPORT(retval) (retval)
 244 #endif
 245
 246 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 247 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 248 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 249 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 252 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 253 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 254 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 255 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI2(f,x) { \
 273         pl_yylval.ival = f; \
 274         PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI2(f,XTERM)
 284 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 285
 286 #define UNIBRACK(f) { \
 287         pl_yylval.ival = f; \
 288         PL_bufptr = s; \
 289         PL_last_uni = PL_oldbufptr; \
 290         if (*s == '(') \
 291             return REPORT( (int)FUNC1 ); \
 292         s = PEEKSPACE(s); \
 293         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 294         }
 295
 296 /* grandfather return to old style */
 297 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 298
 299 #ifdef DEBUGGING
 300
 301 /* how to interpret the pl_yylval associated with the token */
 302 enum token_type {
 303     TOKENTYPE_NONE,
 304     TOKENTYPE_IVAL,
 305     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 306     TOKENTYPE_PVAL,
 307     TOKENTYPE_OPVAL,
 308     TOKENTYPE_GVVAL
 309 };
 310
 311 static struct debug_tokens {
 312     const int token;
 313     enum token_type type;
 314     const char *name;
 315 } const debug_tokens[] =
 316 {
 317     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 318     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 319     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 320     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 321     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 322     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 323     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 324     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 325     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 326     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 327     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 328     { DO,               TOKENTYPE_NONE,         "DO" },
 329     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 330     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 331     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 332     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 333     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 334     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 335     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 336     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 337     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 338     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 339     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 340     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 341     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 342     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 343     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 344     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 345     { IF,               TOKENTYPE_IVAL,         "IF" },
 346     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 347     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 348     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 349     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 350     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 351     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 352     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 353     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 354     { MY,               TOKENTYPE_IVAL,         "MY" },
 355     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 356     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 357     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 358     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 359     { OROR,             TOKENTYPE_NONE,         "OROR" },
 360     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 361     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 362     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 363     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 364     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 365     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 366     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 367     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 368     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 369     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 370     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 371     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 372     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 373     { SUB,              TOKENTYPE_NONE,         "SUB" },
 374     { THING,            TOKENTYPE_OPVAL,        "THING" },
 375     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 376     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 377     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 378     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 379     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 380     { USE,              TOKENTYPE_IVAL,         "USE" },
 381     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 382     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 383     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 384     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 385     { 0,                TOKENTYPE_NONE,         NULL }
 386 };
 387
 388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 389
 390 STATIC int
 391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 392 {
 393     dVAR;
 394
 395     PERL_ARGS_ASSERT_TOKEREPORT;
 396
 397     if (DEBUG_T_TEST) {
 398         const char *name = NULL;
 399         enum token_type type = TOKENTYPE_NONE;
 400         const struct debug_tokens *p;
 401         SV* const report = newSVpvs("<== ");
 402
 403         for (p = debug_tokens; p->token; p++) {
 404             if (p->token == (int)rv) {
 405                 name = p->name;
 406                 type = p->type;
 407                 break;
 408             }
 409         }
 410         if (name)
 411             Perl_sv_catpv(aTHX_ report, name);
 412         else if ((char)rv > ' ' && (char)rv < '~')
 413             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 414         else if (!rv)
 415             sv_catpvs(report, "EOF");
 416         else
 417             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 418         switch (type) {
 419         case TOKENTYPE_NONE:
 420         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 421             break;
 422         case TOKENTYPE_IVAL:
 423             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 424             break;
 425         case TOKENTYPE_OPNUM:
 426             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 427                                     PL_op_name[lvalp->ival]);
 428             break;
 429         case TOKENTYPE_PVAL:
 430             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 431             break;
 432         case TOKENTYPE_OPVAL:
 433             if (lvalp->opval) {
 434                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 435                                     PL_op_name[lvalp->opval->op_type]);
 436                 if (lvalp->opval->op_type == OP_CONST) {
 437                     Perl_sv_catpvf(aTHX_ report, " %s",
 438                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 439                 }
 440
 441             }
 442             else
 443                 sv_catpvs(report, "(opval=null)");
 444             break;
 445         }
 446         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 447     };
 448     return (int)rv;
 449 }
 450
 451
 452 /* print the buffer with suitable escapes */
 453
 454 STATIC void
 455 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 456 {
 457     SV* const tmp = newSVpvs("");
 458
 459     PERL_ARGS_ASSERT_PRINTBUF;
 460
 461     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 static int
 468 S_deprecate_commaless_var_list(pTHX) {
 469     PL_expect = XTERM;
 470     deprecate("comma-less variable list");
 471     return REPORT(','); /* grandfather non-comma-format format */
 472 }
 473
 474 /*
 475  * S_ao
 476  *
 477  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 478  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 479  */
 480
 481 STATIC int
 482 S_ao(pTHX_ int toketype)
 483 {
 484     dVAR;
 485     if (*PL_bufptr == '=') {
 486         PL_bufptr++;
 487         if (toketype == ANDAND)
 488             pl_yylval.ival = OP_ANDASSIGN;
 489         else if (toketype == OROR)
 490             pl_yylval.ival = OP_ORASSIGN;
 491         else if (toketype == DORDOR)
 492             pl_yylval.ival = OP_DORASSIGN;
 493         toketype = ASSIGNOP;
 494     }
 495     return toketype;
 496 }
 497
 498 /*
 499  * S_no_op
 500  * When Perl expects an operator and finds something else, no_op
 501  * prints the warning.  It always prints "<something> found where
 502  * operator expected.  It prints "Missing semicolon on previous line?"
 503  * if the surprise occurs at the start of the line.  "do you need to
 504  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 505  * where the compiler doesn't know if foo is a method call or a function.
 506  * It prints "Missing operator before end of line" if there's nothing
 507  * after the missing operator, or "... before <...>" if there is something
 508  * after the missing operator.
 509  */
 510
 511 STATIC void
 512 S_no_op(pTHX_ const char *const what, char *s)
 513 {
 514     dVAR;
 515     char * const oldbp = PL_bufptr;
 516     const bool is_first = (PL_oldbufptr == PL_linestart);
 517
 518     PERL_ARGS_ASSERT_NO_OP;
 519
 520     if (!s)
 521         s = oldbp;
 522     else
 523         PL_bufptr = s;
 524     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 525     if (ckWARN_d(WARN_SYNTAX)) {
 526         if (is_first)
 527             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 528                     "\t(Missing semicolon on previous line?)\n");
 529         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 530             const char *t;
 531             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 532                 NOOP;
 533             if (t < PL_bufptr && isSPACE(*t))
 534                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 535                         "\t(Do you need to predeclare %.*s?)\n",
 536                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 537         }
 538         else {
 539             assert(s >= oldbp);
 540             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 541                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 542         }
 543     }
 544     PL_bufptr = oldbp;
 545 }
 546
 547 /*
 548  * S_missingterm
 549  * Complain about missing quote/regexp/heredoc terminator.
 550  * If it's called with NULL then it cauterizes the line buffer.
 551  * If we're in a delimited string and the delimiter is a control
 552  * character, it's reformatted into a two-char sequence like ^C.
 553  * This is fatal.
 554  */
 555
 556 STATIC void
 557 S_missingterm(pTHX_ char *s)
 558 {
 559     dVAR;
 560     char tmpbuf[3];
 561     char q;
 562     if (s) {
 563         char * const nl = strrchr(s,'\n');
 564         if (nl)
 565             *nl = '\0';
 566     }
 567     else if (isCNTRL(PL_multi_close)) {
 568         *tmpbuf = '^';
 569         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 570         tmpbuf[2] = '\0';
 571         s = tmpbuf;
 572     }
 573     else {
 574         *tmpbuf = (char)PL_multi_close;
 575         tmpbuf[1] = '\0';
 576         s = tmpbuf;
 577     }
 578     q = strchr(s,'"') ? '\'' : '"';
 579     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 580 }
 581
 582 #define FEATURE_IS_ENABLED(name)                                        \
 583         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 584             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 585 /* The longest string we pass in.  */
 586 #define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
 587
 588 /*
 589  * S_feature_is_enabled
 590  * Check whether the named feature is enabled.
 591  */
 592 STATIC bool
 593 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 594 {
 595     dVAR;
 596     HV * const hinthv = GvHV(PL_hintgv);
 597     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 598
 599     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 600
 601     assert(namelen <= MAX_FEATURE_LEN);
 602     memcpy(&he_name[8], name, namelen);
 603
 604     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 605 }
 606
 607 /*
 608  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 609  * utf16-to-utf8-reversed.
 610  */
 611
 612 #ifdef PERL_CR_FILTER
 613 static void
 614 strip_return(SV *sv)
 615 {
 616     register const char *s = SvPVX_const(sv);
 617     register const char * const e = s + SvCUR(sv);
 618
 619     PERL_ARGS_ASSERT_STRIP_RETURN;
 620
 621     /* outer loop optimized to do nothing if there are no CR-LFs */
 622     while (s < e) {
 623         if (*s++ == '\r' && *s == '\n') {
 624             /* hit a CR-LF, need to copy the rest */
 625             register char *d = s - 1;
 626             *d++ = *s++;
 627             while (s < e) {
 628                 if (*s == '\r' && s[1] == '\n')
 629                     s++;
 630                 *d++ = *s++;
 631             }
 632             SvCUR(sv) -= s - d;
 633             return;
 634         }
 635     }
 636 }
 637
 638 STATIC I32
 639 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 640 {
 641     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 642     if (count > 0 && !maxlen)
 643         strip_return(sv);
 644     return count;
 645 }
 646 #endif
 647
 648
 649
 650 /*
 651  * Perl_lex_start
 652  *
 653  * Create a parser object and initialise its parser and lexer fields
 654  *
 655  * rsfp       is the opened file handle to read from (if any),
 656  *
 657  * line       holds any initial content already read from the file (or in
 658  *            the case of no file, such as an eval, the whole contents);
 659  *
 660  * new_filter indicates that this is a new file and it shouldn't inherit
 661  *            the filters from the current parser (ie require).
 662  */
 663
 664 void
 665 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 666 {
 667     dVAR;
 668     const char *s = NULL;
 669     STRLEN len;
 670     yy_parser *parser, *oparser;
 671
 672     /* create and initialise a parser */
 673
 674     Newxz(parser, 1, yy_parser);
 675     parser->old_parser = oparser = PL_parser;
 676     PL_parser = parser;
 677
 678     Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
 679     parser->ps = parser->stack;
 680     parser->stack_size = YYINITDEPTH;
 681
 682     parser->stack->state = 0;
 683     parser->yyerrstatus = 0;
 684     parser->yychar = YYEMPTY;           /* Cause a token to be read.  */
 685
 686     /* on scope exit, free this parser and restore any outer one */
 687     SAVEPARSER(parser);
 688     parser->saved_curcop = PL_curcop;
 689
 690     /* initialise lexer state */
 691
 692 #ifdef PERL_MAD
 693     parser->curforce = -1;
 694 #else
 695     parser->nexttoke = 0;
 696 #endif
 697     parser->error_count = oparser ? oparser->error_count : 0;
 698     parser->copline = NOLINE;
 699     parser->lex_state = LEX_NORMAL;
 700     parser->expect = XSTATE;
 701     parser->rsfp = rsfp;
 702     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 703                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 704
 705     Newx(parser->lex_brackstack, 120, char);
 706     Newx(parser->lex_casestack, 12, char);
 707     *parser->lex_casestack = '\0';
 708
 709     if (line) {
 710         s = SvPV_const(line, len);
 711     } else {
 712         len = 0;
 713     }
 714
 715     if (!len) {
 716         parser->linestr = newSVpvs("\n;");
 717     } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
 718         /* avoid tie/overload weirdness */
 719         parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
 720         if (s[len-1] != ';')
 721             sv_catpvs(parser->linestr, "\n;");
 722     } else {
 723         SvTEMP_off(line);
 724         SvREFCNT_inc_simple_void_NN(line);
 725         parser->linestr = line;
 726     }
 727     parser->oldoldbufptr =
 728         parser->oldbufptr =
 729         parser->bufptr =
 730         parser->linestart = SvPVX(parser->linestr);
 731     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 732     parser->last_lop = parser->last_uni = NULL;
 733 }
 734
 735
 736 /* delete a parser object */
 737
 738 void
 739 Perl_parser_free(pTHX_  const yy_parser *parser)
 740 {
 741     PERL_ARGS_ASSERT_PARSER_FREE;
 742
 743     PL_curcop = parser->saved_curcop;
 744     SvREFCNT_dec(parser->linestr);
 745
 746     if (parser->rsfp == PerlIO_stdin())
 747         PerlIO_clearerr(parser->rsfp);
 748     else if (parser->rsfp && (!parser->old_parser ||
 749                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 750         PerlIO_close(parser->rsfp);
 751     SvREFCNT_dec(parser->rsfp_filters);
 752
 753     Safefree(parser->stack);
 754     Safefree(parser->lex_brackstack);
 755     Safefree(parser->lex_casestack);
 756     PL_parser = parser->old_parser;
 757     Safefree(parser);
 758 }
 759
 760
 761 /*
 762  * Perl_lex_end
 763  * Finalizer for lexing operations.  Must be called when the parser is
 764  * done with the lexer.
 765  */
 766
 767 void
 768 Perl_lex_end(pTHX)
 769 {
 770     dVAR;
 771     PL_doextract = FALSE;
 772 }
 773
 774 /*
 775 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 776
 777 Buffer scalar containing the chunk currently under consideration of the
 778 text currently being lexed.  This is always a plain string scalar (for
 779 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 780 normal scalar means; instead refer to the buffer directly by the pointer
 781 variables described below.
 782
 783 The lexer maintains various C<char*> pointers to things in the
 784 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 785 reallocated, all of these pointers must be updated.  Don't attempt to
 786 do this manually, but rather use L</lex_grow_linestr> if you need to
 787 reallocate the buffer.
 788
 789 The content of the text chunk in the buffer is commonly exactly one
 790 complete line of input, up to and including a newline terminator,
 791 but there are situations where it is otherwise.  The octets of the
 792 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 793 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 794 flag on this scalar, which may disagree with it.
 795
 796 For direct examination of the buffer, the variable
 797 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 798 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 799 of these pointers is usually preferable to examination of the scalar
 800 through normal scalar means.
 801
 802 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 803
 804 Direct pointer to the end of the chunk of text currently being lexed, the
 805 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 806 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 807 always located at the end of the buffer, and does not count as part of
 808 the buffer's contents.
 809
 810 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 811
 812 Points to the current position of lexing inside the lexer buffer.
 813 Characters around this point may be freely examined, within
 814 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 815 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 816 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 817
 818 Lexing code (whether in the Perl core or not) moves this pointer past
 819 the characters that it consumes.  It is also expected to perform some
 820 bookkeeping whenever a newline character is consumed.  This movement
 821 can be more conveniently performed by the function L</lex_read_to>,
 822 which handles newlines appropriately.
 823
 824 Interpretation of the buffer's octets can be abstracted out by
 825 using the slightly higher-level functions L</lex_peek_unichar> and
 826 L</lex_read_unichar>.
 827
 828 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 829
 830 Points to the start of the current line inside the lexer buffer.
 831 This is useful for indicating at which column an error occurred, and
 832 not much else.  This must be updated by any lexing code that consumes
 833 a newline; the function L</lex_read_to> handles this detail.
 834
 835 =cut
 836 */
 837
 838 /*
 839 =for apidoc Amx|bool|lex_bufutf8
 840
 841 Indicates whether the octets in the lexer buffer
 842 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 843 of Unicode characters.  If not, they should be interpreted as Latin-1
 844 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 845
 846 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 847 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 848 encoding.
 849
 850 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 851 is significant, but not the whole story regarding the input character
 852 encoding.  Normally, when a file is being read, the scalar contains octets
 853 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 854 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 855 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 856 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 857 is in effect.  This logic may change in the future; use this function
 858 instead of implementing the logic yourself.
 859
 860 =cut
 861 */
 862
 863 bool
 864 Perl_lex_bufutf8(pTHX)
 865 {
 866     return UTF;
 867 }
 868
 869 /*
 870 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 871
 872 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 873 at least I<len> octets (including terminating NUL).  Returns a
 874 pointer to the reallocated buffer.  This is necessary before making
 875 any direct modification of the buffer that would increase its length.
 876 L</lex_stuff_pvn> provides a more convenient way to insert text into
 877 the buffer.
 878
 879 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 880 this function updates all of the lexer's variables that point directly
 881 into the buffer.
 882
 883 =cut
 884 */
 885
 886 char *
 887 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 888 {
 889     SV *linestr;
 890     char *buf;
 891     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 892     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 893     linestr = PL_parser->linestr;
 894     buf = SvPVX(linestr);
 895     if (len <= SvLEN(linestr))
 896         return buf;
 897     bufend_pos = PL_parser->bufend - buf;
 898     bufptr_pos = PL_parser->bufptr - buf;
 899     oldbufptr_pos = PL_parser->oldbufptr - buf;
 900     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 901     linestart_pos = PL_parser->linestart - buf;
 902     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 903     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 904     buf = sv_grow(linestr, len);
 905     PL_parser->bufend = buf + bufend_pos;
 906     PL_parser->bufptr = buf + bufptr_pos;
 907     PL_parser->oldbufptr = buf + oldbufptr_pos;
 908     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 909     PL_parser->linestart = buf + linestart_pos;
 910     if (PL_parser->last_uni)
 911         PL_parser->last_uni = buf + last_uni_pos;
 912     if (PL_parser->last_lop)
 913         PL_parser->last_lop = buf + last_lop_pos;
 914     return buf;
 915 }
 916
 917 /*
 918 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 919
 920 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 921 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 922 reallocating the buffer if necessary.  This means that lexing code that
 923 runs later will see the characters as if they had appeared in the input.
 924 It is not recommended to do this as part of normal parsing, and most
 925 uses of this facility run the risk of the inserted characters being
 926 interpreted in an unintended manner.
 927
 928 The string to be inserted is represented by I<len> octets starting
 929 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 930 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 931 The characters are recoded for the lexer buffer, according to how the
 932 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 933 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
 934 function is more convenient.
 935
 936 =cut
 937 */
 938
 939 void
 940 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 941 {
 942     dVAR;
 943     char *bufptr;
 944     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 945     if (flags & ~(LEX_STUFF_UTF8))
 946         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 947     if (UTF) {
 948         if (flags & LEX_STUFF_UTF8) {
 949             goto plain_copy;
 950         } else {
 951             STRLEN highhalf = 0;
 952             const char *p, *e = pv+len;
 953             for (p = pv; p != e; p++)
 954                 highhalf += !!(((U8)*p) & 0x80);
 955             if (!highhalf)
 956                 goto plain_copy;
 957             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 958             bufptr = PL_parser->bufptr;
 959             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 960             SvCUR_set(PL_parser->linestr,
 961                 SvCUR(PL_parser->linestr) + len+highhalf);
 962             PL_parser->bufend += len+highhalf;
 963             for (p = pv; p != e; p++) {
 964                 U8 c = (U8)*p;
 965                 if (c & 0x80) {
 966                     *bufptr++ = (char)(0xc0 | (c >> 6));
 967                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 968                 } else {
 969                     *bufptr++ = (char)c;
 970                 }
 971             }
 972         }
 973     } else {
 974         if (flags & LEX_STUFF_UTF8) {
 975             STRLEN highhalf = 0;
 976             const char *p, *e = pv+len;
 977             for (p = pv; p != e; p++) {
 978                 U8 c = (U8)*p;
 979                 if (c >= 0xc4) {
 980                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 981                                 "non-Latin-1 character into Latin-1 input");
 982                 } else if (c >= 0xc2 && p+1 != e &&
 983                             (((U8)p[1]) & 0xc0) == 0x80) {
 984                     p++;
 985                     highhalf++;
 986                 } else if (c >= 0x80) {
 987                     /* malformed UTF-8 */
 988                     ENTER;
 989                     SAVESPTR(PL_warnhook);
 990                     PL_warnhook = PERL_WARNHOOK_FATAL;
 991                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 992                     LEAVE;
 993                 }
 994             }
 995             if (!highhalf)
 996                 goto plain_copy;
 997             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 998             bufptr = PL_parser->bufptr;
 999             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1000             SvCUR_set(PL_parser->linestr,
1001                 SvCUR(PL_parser->linestr) + len-highhalf);
1002             PL_parser->bufend += len-highhalf;
1003             for (p = pv; p != e; p++) {
1004                 U8 c = (U8)*p;
1005                 if (c & 0x80) {
1006                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1007                     p++;
1008                 } else {
1009                     *bufptr++ = (char)c;
1010                 }
1011             }
1012         } else {
1013             plain_copy:
1014             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1015             bufptr = PL_parser->bufptr;
1016             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1017             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1018             PL_parser->bufend += len;
1019             Copy(pv, bufptr, len, char);
1020         }
1021     }
1022 }
1023
1024 /*
1025 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1026
1027 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1028 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1029 reallocating the buffer if necessary.  This means that lexing code that
1030 runs later will see the characters as if they had appeared in the input.
1031 It is not recommended to do this as part of normal parsing, and most
1032 uses of this facility run the risk of the inserted characters being
1033 interpreted in an unintended manner.
1034
1035 The string to be inserted is the string value of I<sv>.  The characters
1036 are recoded for the lexer buffer, according to how the buffer is currently
1037 being interpreted (L</lex_bufutf8>).  If a string to be interpreted is
1038 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1039 need to construct a scalar.
1040
1041 =cut
1042 */
1043
1044 void
1045 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1046 {
1047     char *pv;
1048     STRLEN len;
1049     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1050     if (flags)
1051         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1052     pv = SvPV(sv, len);
1053     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1054 }
1055
1056 /*
1057 =for apidoc Amx|void|lex_unstuff|char *ptr
1058
1059 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1060 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1061 This hides the discarded text from any lexing code that runs later,
1062 as if the text had never appeared.
1063
1064 This is not the normal way to consume lexed text.  For that, use
1065 L</lex_read_to>.
1066
1067 =cut
1068 */
1069
1070 void
1071 Perl_lex_unstuff(pTHX_ char *ptr)
1072 {
1073     char *buf, *bufend;
1074     STRLEN unstuff_len;
1075     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1076     buf = PL_parser->bufptr;
1077     if (ptr < buf)
1078         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1079     if (ptr == buf)
1080         return;
1081     bufend = PL_parser->bufend;
1082     if (ptr > bufend)
1083         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1084     unstuff_len = ptr - buf;
1085     Move(ptr, buf, bufend+1-ptr, char);
1086     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1087     PL_parser->bufend = bufend - unstuff_len;
1088 }
1089
1090 /*
1091 =for apidoc Amx|void|lex_read_to|char *ptr
1092
1093 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1094 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1095 performing the correct bookkeeping whenever a newline character is passed.
1096 This is the normal way to consume lexed text.
1097
1098 Interpretation of the buffer's octets can be abstracted out by
1099 using the slightly higher-level functions L</lex_peek_unichar> and
1100 L</lex_read_unichar>.
1101
1102 =cut
1103 */
1104
1105 void
1106 Perl_lex_read_to(pTHX_ char *ptr)
1107 {
1108     char *s;
1109     PERL_ARGS_ASSERT_LEX_READ_TO;
1110     s = PL_parser->bufptr;
1111     if (ptr < s || ptr > PL_parser->bufend)
1112         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1113     for (; s != ptr; s++)
1114         if (*s == '\n') {
1115             CopLINE_inc(PL_curcop);
1116             PL_parser->linestart = s+1;
1117         }
1118     PL_parser->bufptr = ptr;
1119 }
1120
1121 /*
1122 =for apidoc Amx|void|lex_discard_to|char *ptr
1123
1124 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1125 up to I<ptr>.  The remaining content of the buffer will be moved, and
1126 all pointers into the buffer updated appropriately.  I<ptr> must not
1127 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1128 it is not permitted to discard text that has yet to be lexed.
1129
1130 Normally it is not necessarily to do this directly, because it suffices to
1131 use the implicit discarding behaviour of L</lex_next_chunk> and things
1132 based on it.  However, if a token stretches across multiple lines,
1133 and the lexing code has kept multiple lines of text in the buffer for
1134 that purpose, then after completion of the token it would be wise to
1135 explicitly discard the now-unneeded earlier lines, to avoid future
1136 multi-line tokens growing the buffer without bound.
1137
1138 =cut
1139 */
1140
1141 void
1142 Perl_lex_discard_to(pTHX_ char *ptr)
1143 {
1144     char *buf;
1145     STRLEN discard_len;
1146     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1147     buf = SvPVX(PL_parser->linestr);
1148     if (ptr < buf)
1149         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1150     if (ptr == buf)
1151         return;
1152     if (ptr > PL_parser->bufptr)
1153         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1154     discard_len = ptr - buf;
1155     if (PL_parser->oldbufptr < ptr)
1156         PL_parser->oldbufptr = ptr;
1157     if (PL_parser->oldoldbufptr < ptr)
1158         PL_parser->oldoldbufptr = ptr;
1159     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1160         PL_parser->last_uni = NULL;
1161     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1162         PL_parser->last_lop = NULL;
1163     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1164     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1165     PL_parser->bufend -= discard_len;
1166     PL_parser->bufptr -= discard_len;
1167     PL_parser->oldbufptr -= discard_len;
1168     PL_parser->oldoldbufptr -= discard_len;
1169     if (PL_parser->last_uni)
1170         PL_parser->last_uni -= discard_len;
1171     if (PL_parser->last_lop)
1172         PL_parser->last_lop -= discard_len;
1173 }
1174
1175 /*
1176 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1177
1178 Reads in the next chunk of text to be lexed, appending it to
1179 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1180 looked to the end of the current chunk and wants to know more.  It is
1181 usual, but not necessary, for lexing to have consumed the entirety of
1182 the current chunk at this time.
1183
1184 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1185 chunk (i.e., the current chunk has been entirely consumed), normally the
1186 current chunk will be discarded at the same time that the new chunk is
1187 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1188 will not be discarded.  If the current chunk has not been entirely
1189 consumed, then it will not be discarded regardless of the flag.
1190
1191 Returns true if some new text was added to the buffer, or false if the
1192 buffer has reached the end of the input text.
1193
1194 =cut
1195 */
1196
1197 #define LEX_FAKE_EOF 0x80000000
1198
1199 bool
1200 Perl_lex_next_chunk(pTHX_ U32 flags)
1201 {
1202     SV *linestr;
1203     char *buf;
1204     STRLEN old_bufend_pos, new_bufend_pos;
1205     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1206     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1207     bool got_some_for_debugger = 0;
1208     bool got_some;
1209     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1210         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1211     linestr = PL_parser->linestr;
1212     buf = SvPVX(linestr);
1213     if (!(flags & LEX_KEEP_PREVIOUS) &&
1214             PL_parser->bufptr == PL_parser->bufend) {
1215         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1216         linestart_pos = 0;
1217         if (PL_parser->last_uni != PL_parser->bufend)
1218             PL_parser->last_uni = NULL;
1219         if (PL_parser->last_lop != PL_parser->bufend)
1220             PL_parser->last_lop = NULL;
1221         last_uni_pos = last_lop_pos = 0;
1222         *buf = 0;
1223         SvCUR(linestr) = 0;
1224     } else {
1225         old_bufend_pos = PL_parser->bufend - buf;
1226         bufptr_pos = PL_parser->bufptr - buf;
1227         oldbufptr_pos = PL_parser->oldbufptr - buf;
1228         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1229         linestart_pos = PL_parser->linestart - buf;
1230         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1231         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1232     }
1233     if (flags & LEX_FAKE_EOF) {
1234         goto eof;
1235     } else if (!PL_parser->rsfp) {
1236         got_some = 0;
1237     } else if (filter_gets(linestr, old_bufend_pos)) {
1238         got_some = 1;
1239         got_some_for_debugger = 1;
1240     } else {
1241         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1242             sv_setpvs(linestr, "");
1243         eof:
1244         /* End of real input.  Close filehandle (unless it was STDIN),
1245          * then add implicit termination.
1246          */
1247         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1248             PerlIO_clearerr(PL_parser->rsfp);
1249         else if (PL_parser->rsfp)
1250             (void)PerlIO_close(PL_parser->rsfp);
1251         PL_parser->rsfp = NULL;
1252         PL_doextract = FALSE;
1253 #ifdef PERL_MAD
1254         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1255             PL_faketokens = 1;
1256 #endif
1257         if (!PL_in_eval && PL_minus_p) {
1258             sv_catpvs(linestr,
1259                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1260             PL_minus_n = PL_minus_p = 0;
1261         } else if (!PL_in_eval && PL_minus_n) {
1262             sv_catpvs(linestr, /*{*/";}");
1263             PL_minus_n = 0;
1264         } else
1265             sv_catpvs(linestr, ";");
1266         got_some = 1;
1267     }
1268     buf = SvPVX(linestr);
1269     new_bufend_pos = SvCUR(linestr);
1270     PL_parser->bufend = buf + new_bufend_pos;
1271     PL_parser->bufptr = buf + bufptr_pos;
1272     PL_parser->oldbufptr = buf + oldbufptr_pos;
1273     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1274     PL_parser->linestart = buf + linestart_pos;
1275     if (PL_parser->last_uni)
1276         PL_parser->last_uni = buf + last_uni_pos;
1277     if (PL_parser->last_lop)
1278         PL_parser->last_lop = buf + last_lop_pos;
1279     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1280             PL_curstash != PL_debstash) {
1281         /* debugger active and we're not compiling the debugger code,
1282          * so store the line into the debugger's array of lines
1283          */
1284         update_debugger_info(NULL, buf+old_bufend_pos,
1285             new_bufend_pos-old_bufend_pos);
1286     }
1287     return got_some;
1288 }
1289
1290 /*
1291 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1292
1293 Looks ahead one (Unicode) character in the text currently being lexed.
1294 Returns the codepoint (unsigned integer value) of the next character,
1295 or -1 if lexing has reached the end of the input text.  To consume the
1296 peeked character, use L</lex_read_unichar>.
1297
1298 If the next character is in (or extends into) the next chunk of input
1299 text, the next chunk will be read in.  Normally the current chunk will be
1300 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1301 then the current chunk will not be discarded.
1302
1303 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1304 is encountered, an exception is generated.
1305
1306 =cut
1307 */
1308
1309 I32
1310 Perl_lex_peek_unichar(pTHX_ U32 flags)
1311 {
1312     dVAR;
1313     char *s, *bufend;
1314     if (flags & ~(LEX_KEEP_PREVIOUS))
1315         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1316     s = PL_parser->bufptr;
1317     bufend = PL_parser->bufend;
1318     if (UTF) {
1319         U8 head;
1320         I32 unichar;
1321         STRLEN len, retlen;
1322         if (s == bufend) {
1323             if (!lex_next_chunk(flags))
1324                 return -1;
1325             s = PL_parser->bufptr;
1326             bufend = PL_parser->bufend;
1327         }
1328         head = (U8)*s;
1329         if (!(head & 0x80))
1330             return head;
1331         if (head & 0x40) {
1332             len = PL_utf8skip[head];
1333             while ((STRLEN)(bufend-s) < len) {
1334                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1335                     break;
1336                 s = PL_parser->bufptr;
1337                 bufend = PL_parser->bufend;
1338             }
1339         }
1340         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1341         if (retlen == (STRLEN)-1) {
1342             /* malformed UTF-8 */
1343             ENTER;
1344             SAVESPTR(PL_warnhook);
1345             PL_warnhook = PERL_WARNHOOK_FATAL;
1346             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1347             LEAVE;
1348         }
1349         return unichar;
1350     } else {
1351         if (s == bufend) {
1352             if (!lex_next_chunk(flags))
1353                 return -1;
1354             s = PL_parser->bufptr;
1355         }
1356         return (U8)*s;
1357     }
1358 }
1359
1360 /*
1361 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1362
1363 Reads the next (Unicode) character in the text currently being lexed.
1364 Returns the codepoint (unsigned integer value) of the character read,
1365 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1366 if lexing has reached the end of the input text.  To non-destructively
1367 examine the next character, use L</lex_peek_unichar> instead.
1368
1369 If the next character is in (or extends into) the next chunk of input
1370 text, the next chunk will be read in.  Normally the current chunk will be
1371 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1372 then the current chunk will not be discarded.
1373
1374 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1375 is encountered, an exception is generated.
1376
1377 =cut
1378 */
1379
1380 I32
1381 Perl_lex_read_unichar(pTHX_ U32 flags)
1382 {
1383     I32 c;
1384     if (flags & ~(LEX_KEEP_PREVIOUS))
1385         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1386     c = lex_peek_unichar(flags);
1387     if (c != -1) {
1388         if (c == '\n')
1389             CopLINE_inc(PL_curcop);
1390         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1391     }
1392     return c;
1393 }
1394
1395 /*
1396 =for apidoc Amx|void|lex_read_space|U32 flags
1397
1398 Reads optional spaces, in Perl style, in the text currently being
1399 lexed.  The spaces may include ordinary whitespace characters and
1400 Perl-style comments.  C<#line> directives are processed if encountered.
1401 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1402 at a non-space character (or the end of the input text).
1403
1404 If spaces extend into the next chunk of input text, the next chunk will
1405 be read in.  Normally the current chunk will be discarded at the same
1406 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1407 chunk will not be discarded.
1408
1409 =cut
1410 */
1411
1412 #define LEX_NO_NEXT_CHUNK 0x80000000
1413
1414 void
1415 Perl_lex_read_space(pTHX_ U32 flags)
1416 {
1417     char *s, *bufend;
1418     bool need_incline = 0;
1419     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1420         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1421 #ifdef PERL_MAD
1422     if (PL_skipwhite) {
1423         sv_free(PL_skipwhite);
1424         PL_skipwhite = NULL;
1425     }
1426     if (PL_madskills)
1427         PL_skipwhite = newSVpvs("");
1428 #endif /* PERL_MAD */
1429     s = PL_parser->bufptr;
1430     bufend = PL_parser->bufend;
1431     while (1) {
1432         char c = *s;
1433         if (c == '#') {
1434             do {
1435                 c = *++s;
1436             } while (!(c == '\n' || (c == 0 && s == bufend)));
1437         } else if (c == '\n') {
1438             s++;
1439             PL_parser->linestart = s;
1440             if (s == bufend)
1441                 need_incline = 1;
1442             else
1443                 incline(s);
1444         } else if (isSPACE(c)) {
1445             s++;
1446         } else if (c == 0 && s == bufend) {
1447             bool got_more;
1448 #ifdef PERL_MAD
1449             if (PL_madskills)
1450                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1451 #endif /* PERL_MAD */
1452             if (flags & LEX_NO_NEXT_CHUNK)
1453                 break;
1454             PL_parser->bufptr = s;
1455             CopLINE_inc(PL_curcop);
1456             got_more = lex_next_chunk(flags);
1457             CopLINE_dec(PL_curcop);
1458             s = PL_parser->bufptr;
1459             bufend = PL_parser->bufend;
1460             if (!got_more)
1461                 break;
1462             if (need_incline && PL_parser->rsfp) {
1463                 incline(s);
1464                 need_incline = 0;
1465             }
1466         } else {
1467             break;
1468         }
1469     }
1470 #ifdef PERL_MAD
1471     if (PL_madskills)
1472         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1473 #endif /* PERL_MAD */
1474     PL_parser->bufptr = s;
1475 }
1476
1477 /*
1478  * S_incline
1479  * This subroutine has nothing to do with tilting, whether at windmills
1480  * or pinball tables.  Its name is short for "increment line".  It
1481  * increments the current line number in CopLINE(PL_curcop) and checks
1482  * to see whether the line starts with a comment of the form
1483  *    # line 500 "foo.pm"
1484  * If so, it sets the current line number and file to the values in the comment.
1485  */
1486
1487 STATIC void
1488 S_incline(pTHX_ const char *s)
1489 {
1490     dVAR;
1491     const char *t;
1492     const char *n;
1493     const char *e;
1494
1495     PERL_ARGS_ASSERT_INCLINE;
1496
1497     CopLINE_inc(PL_curcop);
1498     if (*s++ != '#')
1499         return;
1500     while (SPACE_OR_TAB(*s))
1501         s++;
1502     if (strnEQ(s, "line", 4))
1503         s += 4;
1504     else
1505         return;
1506     if (SPACE_OR_TAB(*s))
1507         s++;
1508     else
1509         return;
1510     while (SPACE_OR_TAB(*s))
1511         s++;
1512     if (!isDIGIT(*s))
1513         return;
1514
1515     n = s;
1516     while (isDIGIT(*s))
1517         s++;
1518     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1519         return;
1520     while (SPACE_OR_TAB(*s))
1521         s++;
1522     if (*s == '"' && (t = strchr(s+1, '"'))) {
1523         s++;
1524         e = t + 1;
1525     }
1526     else {
1527         t = s;
1528         while (!isSPACE(*t))
1529             t++;
1530         e = t;
1531     }
1532     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1533         e++;
1534     if (*e != '\n' && *e != '\0')
1535         return;         /* false alarm */
1536
1537     if (t - s > 0) {
1538         const STRLEN len = t - s;
1539 #ifndef USE_ITHREADS
1540         SV *const temp_sv = CopFILESV(PL_curcop);
1541         const char *cf;
1542         STRLEN tmplen;
1543
1544         if (temp_sv) {
1545             cf = SvPVX(temp_sv);
1546             tmplen = SvCUR(temp_sv);
1547         } else {
1548             cf = NULL;
1549             tmplen = 0;
1550         }
1551
1552         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1553             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1554              * to *{"::_<newfilename"} */
1555             /* However, the long form of evals is only turned on by the
1556                debugger - usually they're "(eval %lu)" */
1557             char smallbuf[128];
1558             char *tmpbuf;
1559             GV **gvp;
1560             STRLEN tmplen2 = len;
1561             if (tmplen + 2 <= sizeof smallbuf)
1562                 tmpbuf = smallbuf;
1563             else
1564                 Newx(tmpbuf, tmplen + 2, char);
1565             tmpbuf[0] = '_';
1566             tmpbuf[1] = '<';
1567             memcpy(tmpbuf + 2, cf, tmplen);
1568             tmplen += 2;
1569             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1570             if (gvp) {
1571                 char *tmpbuf2;
1572                 GV *gv2;
1573
1574                 if (tmplen2 + 2 <= sizeof smallbuf)
1575                     tmpbuf2 = smallbuf;
1576                 else
1577                     Newx(tmpbuf2, tmplen2 + 2, char);
1578
1579                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1580                     /* Either they malloc'd it, or we malloc'd it,
1581                        so no prefix is present in ours.  */
1582                     tmpbuf2[0] = '_';
1583                     tmpbuf2[1] = '<';
1584                 }
1585
1586                 memcpy(tmpbuf2 + 2, s, tmplen2);
1587                 tmplen2 += 2;
1588
1589                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1590                 if (!isGV(gv2)) {
1591                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1592                     /* adjust ${"::_<newfilename"} to store the new file name */
1593                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1594                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1595                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1596                 }
1597
1598                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1599             }
1600             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1601         }
1602 #endif
1603         CopFILE_free(PL_curcop);
1604         CopFILE_setn(PL_curcop, s, len);
1605     }
1606     CopLINE_set(PL_curcop, atoi(n)-1);
1607 }
1608
1609 #ifdef PERL_MAD
1610 /* skip space before PL_thistoken */
1611
1612 STATIC char *
1613 S_skipspace0(pTHX_ register char *s)
1614 {
1615     PERL_ARGS_ASSERT_SKIPSPACE0;
1616
1617     s = skipspace(s);
1618     if (!PL_madskills)
1619         return s;
1620     if (PL_skipwhite) {
1621         if (!PL_thiswhite)
1622             PL_thiswhite = newSVpvs("");
1623         sv_catsv(PL_thiswhite, PL_skipwhite);
1624         sv_free(PL_skipwhite);
1625         PL_skipwhite = 0;
1626     }
1627     PL_realtokenstart = s - SvPVX(PL_linestr);
1628     return s;
1629 }
1630
1631 /* skip space after PL_thistoken */
1632
1633 STATIC char *
1634 S_skipspace1(pTHX_ register char *s)
1635 {
1636     const char *start = s;
1637     I32 startoff = start - SvPVX(PL_linestr);
1638
1639     PERL_ARGS_ASSERT_SKIPSPACE1;
1640
1641     s = skipspace(s);
1642     if (!PL_madskills)
1643         return s;
1644     start = SvPVX(PL_linestr) + startoff;
1645     if (!PL_thistoken && PL_realtokenstart >= 0) {
1646         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1647         PL_thistoken = newSVpvn(tstart, start - tstart);
1648     }
1649     PL_realtokenstart = -1;
1650     if (PL_skipwhite) {
1651         if (!PL_nextwhite)
1652             PL_nextwhite = newSVpvs("");
1653         sv_catsv(PL_nextwhite, PL_skipwhite);
1654         sv_free(PL_skipwhite);
1655         PL_skipwhite = 0;
1656     }
1657     return s;
1658 }
1659
1660 STATIC char *
1661 S_skipspace2(pTHX_ register char *s, SV **svp)
1662 {
1663     char *start;
1664     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1665     const I32 startoff = s - SvPVX(PL_linestr);
1666
1667     PERL_ARGS_ASSERT_SKIPSPACE2;
1668
1669     s = skipspace(s);
1670     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1671     if (!PL_madskills || !svp)
1672         return s;
1673     start = SvPVX(PL_linestr) + startoff;
1674     if (!PL_thistoken && PL_realtokenstart >= 0) {
1675         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1676         PL_thistoken = newSVpvn(tstart, start - tstart);
1677         PL_realtokenstart = -1;
1678     }
1679     if (PL_skipwhite) {
1680         if (!*svp)
1681             *svp = newSVpvs("");
1682         sv_setsv(*svp, PL_skipwhite);
1683         sv_free(PL_skipwhite);
1684         PL_skipwhite = 0;
1685     }
1686
1687     return s;
1688 }
1689 #endif
1690
1691 STATIC void
1692 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1693 {
1694     AV *av = CopFILEAVx(PL_curcop);
1695     if (av) {
1696         SV * const sv = newSV_type(SVt_PVMG);
1697         if (orig_sv)
1698             sv_setsv(sv, orig_sv);
1699         else
1700             sv_setpvn(sv, buf, len);
1701         (void)SvIOK_on(sv);
1702         SvIV_set(sv, 0);
1703         av_store(av, (I32)CopLINE(PL_curcop), sv);
1704     }
1705 }
1706
1707 /*
1708  * S_skipspace
1709  * Called to gobble the appropriate amount and type of whitespace.
1710  * Skips comments as well.
1711  */
1712
1713 STATIC char *
1714 S_skipspace(pTHX_ register char *s)
1715 {
1716 #ifdef PERL_MAD
1717     char *start = s;
1718 #endif /* PERL_MAD */
1719     PERL_ARGS_ASSERT_SKIPSPACE;
1720 #ifdef PERL_MAD
1721     if (PL_skipwhite) {
1722         sv_free(PL_skipwhite);
1723         PL_skipwhite = NULL;
1724     }
1725 #endif /* PERL_MAD */
1726     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1727         while (s < PL_bufend && SPACE_OR_TAB(*s))
1728             s++;
1729     } else {
1730         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1731         PL_bufptr = s;
1732         lex_read_space(LEX_KEEP_PREVIOUS |
1733                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1734                     LEX_NO_NEXT_CHUNK : 0));
1735         s = PL_bufptr;
1736         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1737         if (PL_linestart > PL_bufptr)
1738             PL_bufptr = PL_linestart;
1739         return s;
1740     }
1741 #ifdef PERL_MAD
1742     if (PL_madskills)
1743         PL_skipwhite = newSVpvn(start, s-start);
1744 #endif /* PERL_MAD */
1745     return s;
1746 }
1747
1748 /*
1749  * S_check_uni
1750  * Check the unary operators to ensure there's no ambiguity in how they're
1751  * used.  An ambiguous piece of code would be:
1752  *     rand + 5
1753  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1754  * the +5 is its argument.
1755  */
1756
1757 STATIC void
1758 S_check_uni(pTHX)
1759 {
1760     dVAR;
1761     const char *s;
1762     const char *t;
1763
1764     if (PL_oldoldbufptr != PL_last_uni)
1765         return;
1766     while (isSPACE(*PL_last_uni))
1767         PL_last_uni++;
1768     s = PL_last_uni;
1769     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1770         s++;
1771     if ((t = strchr(s, '(')) && t < PL_bufptr)
1772         return;
1773
1774     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1775                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1776                      (int)(s - PL_last_uni), PL_last_uni);
1777 }
1778
1779 /*
1780  * LOP : macro to build a list operator.  Its behaviour has been replaced
1781  * with a subroutine, S_lop() for which LOP is just another name.
1782  */
1783
1784 #define LOP(f,x) return lop(f,x,s)
1785
1786 /*
1787  * S_lop
1788  * Build a list operator (or something that might be one).  The rules:
1789  *  - if we have a next token, then it's a list operator [why?]
1790  *  - if the next thing is an opening paren, then it's a function
1791  *  - else it's a list operator
1792  */
1793
1794 STATIC I32
1795 S_lop(pTHX_ I32 f, int x, char *s)
1796 {
1797     dVAR;
1798
1799     PERL_ARGS_ASSERT_LOP;
1800
1801     pl_yylval.ival = f;
1802     CLINE;
1803     PL_expect = x;
1804     PL_bufptr = s;
1805     PL_last_lop = PL_oldbufptr;
1806     PL_last_lop_op = (OPCODE)f;
1807 #ifdef PERL_MAD
1808     if (PL_lasttoke)
1809         return REPORT(LSTOP);
1810 #else
1811     if (PL_nexttoke)
1812         return REPORT(LSTOP);
1813 #endif
1814     if (*s == '(')
1815         return REPORT(FUNC);
1816     s = PEEKSPACE(s);
1817     if (*s == '(')
1818         return REPORT(FUNC);
1819     else
1820         return REPORT(LSTOP);
1821 }
1822
1823 #ifdef PERL_MAD
1824  /*
1825  * S_start_force
1826  * Sets up for an eventual force_next().  start_force(0) basically does
1827  * an unshift, while start_force(-1) does a push.  yylex removes items
1828  * on the "pop" end.
1829  */
1830
1831 STATIC void
1832 S_start_force(pTHX_ int where)
1833 {
1834     int i;
1835
1836     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1837         where = PL_lasttoke;
1838     assert(PL_curforce < 0 || PL_curforce == where);
1839     if (PL_curforce != where) {
1840         for (i = PL_lasttoke; i > where; --i) {
1841             PL_nexttoke[i] = PL_nexttoke[i-1];
1842         }
1843         PL_lasttoke++;
1844     }
1845     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1846         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1847     PL_curforce = where;
1848     if (PL_nextwhite) {
1849         if (PL_madskills)
1850             curmad('^', newSVpvs(""));
1851         CURMAD('_', PL_nextwhite);
1852     }
1853 }
1854
1855 STATIC void
1856 S_curmad(pTHX_ char slot, SV *sv)
1857 {
1858     MADPROP **where;
1859
1860     if (!sv)
1861         return;
1862     if (PL_curforce < 0)
1863         where = &PL_thismad;
1864     else
1865         where = &PL_nexttoke[PL_curforce].next_mad;
1866
1867     if (PL_faketokens)
1868         sv_setpvs(sv, "");
1869     else {
1870         if (!IN_BYTES) {
1871             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1872                 SvUTF8_on(sv);
1873             else if (PL_encoding) {
1874                 sv_recode_to_utf8(sv, PL_encoding);
1875             }
1876         }
1877     }
1878
1879     /* keep a slot open for the head of the list? */
1880     if (slot != '_' && *where && (*where)->mad_key == '^') {
1881         (*where)->mad_key = slot;
1882         sv_free(MUTABLE_SV(((*where)->mad_val)));
1883         (*where)->mad_val = (void*)sv;
1884     }
1885     else
1886         addmad(newMADsv(slot, sv), where, 0);
1887 }
1888 #else
1889 #  define start_force(where)    NOOP
1890 #  define curmad(slot, sv)      NOOP
1891 #endif
1892
1893 /*
1894  * S_force_next
1895  * When the lexer realizes it knows the next token (for instance,
1896  * it is reordering tokens for the parser) then it can call S_force_next
1897  * to know what token to return the next time the lexer is called.  Caller
1898  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1899  * and possibly PL_expect to ensure the lexer handles the token correctly.
1900  */
1901
1902 STATIC void
1903 S_force_next(pTHX_ I32 type)
1904 {
1905     dVAR;
1906 #ifdef DEBUGGING
1907     if (DEBUG_T_TEST) {
1908         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1909         tokereport(type, &NEXTVAL_NEXTTOKE);
1910     }
1911 #endif
1912 #ifdef PERL_MAD
1913     if (PL_curforce < 0)
1914         start_force(PL_lasttoke);
1915     PL_nexttoke[PL_curforce].next_type = type;
1916     if (PL_lex_state != LEX_KNOWNEXT)
1917         PL_lex_defer = PL_lex_state;
1918     PL_lex_state = LEX_KNOWNEXT;
1919     PL_lex_expect = PL_expect;
1920     PL_curforce = -1;
1921 #else
1922     PL_nexttype[PL_nexttoke] = type;
1923     PL_nexttoke++;
1924     if (PL_lex_state != LEX_KNOWNEXT) {
1925         PL_lex_defer = PL_lex_state;
1926         PL_lex_expect = PL_expect;
1927         PL_lex_state = LEX_KNOWNEXT;
1928     }
1929 #endif
1930 }
1931
1932 STATIC SV *
1933 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1934 {
1935     dVAR;
1936     SV * const sv = newSVpvn_utf8(start, len,
1937                                   !IN_BYTES
1938                                   && UTF
1939                                   && !is_ascii_string((const U8*)start, len)
1940                                   && is_utf8_string((const U8*)start, len));
1941     return sv;
1942 }
1943
1944 /*
1945  * S_force_word
1946  * When the lexer knows the next thing is a word (for instance, it has
1947  * just seen -> and it knows that the next char is a word char, then
1948  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1949  * lookahead.
1950  *
1951  * Arguments:
1952  *   char *start : buffer position (must be within PL_linestr)
1953  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1954  *   int check_keyword : if true, Perl checks to make sure the word isn't
1955  *       a keyword (do this if the word is a label, e.g. goto FOO)
1956  *   int allow_pack : if true, : characters will also be allowed (require,
1957  *       use, etc. do this)
1958  *   int allow_initial_tick : used by the "sub" lexer only.
1959  */
1960
1961 STATIC char *
1962 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1963 {
1964     dVAR;
1965     register char *s;
1966     STRLEN len;
1967
1968     PERL_ARGS_ASSERT_FORCE_WORD;
1969
1970     start = SKIPSPACE1(start);
1971     s = start;
1972     if (isIDFIRST_lazy_if(s,UTF) ||
1973         (allow_pack && *s == ':') ||
1974         (allow_initial_tick && *s == '\'') )
1975     {
1976         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1977         if (check_keyword && keyword(PL_tokenbuf, len, 0))
1978             return start;
1979         start_force(PL_curforce);
1980         if (PL_madskills)
1981             curmad('X', newSVpvn(start,s-start));
1982         if (token == METHOD) {
1983             s = SKIPSPACE1(s);
1984             if (*s == '(')
1985                 PL_expect = XTERM;
1986             else {
1987                 PL_expect = XOPERATOR;
1988             }
1989         }
1990         if (PL_madskills)
1991             curmad('g', newSVpvs( "forced" ));
1992         NEXTVAL_NEXTTOKE.opval
1993             = (OP*)newSVOP(OP_CONST,0,
1994                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1995         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1996         force_next(token);
1997     }
1998     return s;
1999 }
2000
2001 /*
2002  * S_force_ident
2003  * Called when the lexer wants $foo *foo &foo etc, but the program
2004  * text only contains the "foo" portion.  The first argument is a pointer
2005  * to the "foo", and the second argument is the type symbol to prefix.
2006  * Forces the next token to be a "WORD".
2007  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2008  */
2009
2010 STATIC void
2011 S_force_ident(pTHX_ register const char *s, int kind)
2012 {
2013     dVAR;
2014
2015     PERL_ARGS_ASSERT_FORCE_IDENT;
2016
2017     if (*s) {
2018         const STRLEN len = strlen(s);
2019         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2020         start_force(PL_curforce);
2021         NEXTVAL_NEXTTOKE.opval = o;
2022         force_next(WORD);
2023         if (kind) {
2024             o->op_private = OPpCONST_ENTERED;
2025             /* XXX see note in pp_entereval() for why we forgo typo
2026                warnings if the symbol must be introduced in an eval.
2027                GSAR 96-10-12 */
2028             gv_fetchpvn_flags(s, len,
2029                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2030                               : GV_ADD,
2031                               kind == '$' ? SVt_PV :
2032                               kind == '@' ? SVt_PVAV :
2033                               kind == '%' ? SVt_PVHV :
2034                               SVt_PVGV
2035                               );
2036         }
2037     }
2038 }
2039
2040 NV
2041 Perl_str_to_version(pTHX_ SV *sv)
2042 {
2043     NV retval = 0.0;
2044     NV nshift = 1.0;
2045     STRLEN len;
2046     const char *start = SvPV_const(sv,len);
2047     const char * const end = start + len;
2048     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2049
2050     PERL_ARGS_ASSERT_STR_TO_VERSION;
2051
2052     while (start < end) {
2053         STRLEN skip;
2054         UV n;
2055         if (utf)
2056             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2057         else {
2058             n = *(U8*)start;
2059             skip = 1;
2060         }
2061         retval += ((NV)n)/nshift;
2062         start += skip;
2063         nshift *= 1000;
2064     }
2065     return retval;
2066 }
2067
2068 /*
2069  * S_force_version
2070  * Forces the next token to be a version number.
2071  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2072  * and if "guessing" is TRUE, then no new token is created (and the caller
2073  * must use an alternative parsing method).
2074  */
2075
2076 STATIC char *
2077 S_force_version(pTHX_ char *s, int guessing)
2078 {
2079     dVAR;
2080     OP *version = NULL;
2081     char *d;
2082 #ifdef PERL_MAD
2083     I32 startoff = s - SvPVX(PL_linestr);
2084 #endif
2085
2086     PERL_ARGS_ASSERT_FORCE_VERSION;
2087
2088     s = SKIPSPACE1(s);
2089
2090     d = s;
2091     if (*d == 'v')
2092         d++;
2093     if (isDIGIT(*d)) {
2094         while (isDIGIT(*d) || *d == '_' || *d == '.')
2095             d++;
2096 #ifdef PERL_MAD
2097         if (PL_madskills) {
2098             start_force(PL_curforce);
2099             curmad('X', newSVpvn(s,d-s));
2100         }
2101 #endif
2102         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2103             SV *ver;
2104 #ifdef USE_LOCALE_NUMERIC
2105             char *loc = setlocale(LC_NUMERIC, "C");
2106 #endif
2107             s = scan_num(s, &pl_yylval);
2108 #ifdef USE_LOCALE_NUMERIC
2109             setlocale(LC_NUMERIC, loc);
2110 #endif
2111             version = pl_yylval.opval;
2112             ver = cSVOPx(version)->op_sv;
2113             if (SvPOK(ver) && !SvNIOK(ver)) {
2114                 SvUPGRADE(ver, SVt_PVNV);
2115                 SvNV_set(ver, str_to_version(ver));
2116                 SvNOK_on(ver);          /* hint that it is a version */
2117             }
2118         }
2119         else if (guessing) {
2120 #ifdef PERL_MAD
2121             if (PL_madskills) {
2122                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2123                 PL_nextwhite = 0;
2124                 s = SvPVX(PL_linestr) + startoff;
2125             }
2126 #endif
2127             return s;
2128         }
2129     }
2130
2131 #ifdef PERL_MAD
2132     if (PL_madskills && !version) {
2133         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2134         PL_nextwhite = 0;
2135         s = SvPVX(PL_linestr) + startoff;
2136     }
2137 #endif
2138     /* NOTE: The parser sees the package name and the VERSION swapped */
2139     start_force(PL_curforce);
2140     NEXTVAL_NEXTTOKE.opval = version;
2141     force_next(WORD);
2142
2143     return s;
2144 }
2145
2146 /*
2147  * S_force_strict_version
2148  * Forces the next token to be a version number using strict syntax rules.
2149  */
2150
2151 STATIC char *
2152 S_force_strict_version(pTHX_ char *s)
2153 {
2154     dVAR;
2155     OP *version = NULL;
2156 #ifdef PERL_MAD
2157     I32 startoff = s - SvPVX(PL_linestr);
2158 #endif
2159     const char *errstr = NULL;
2160
2161     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2162
2163     while (isSPACE(*s)) /* leading whitespace */
2164         s++;
2165
2166     if (is_STRICT_VERSION(s,&errstr)) {
2167         SV *ver = newSV(0);
2168         s = (char *)scan_version(s, ver, 0);
2169         version = newSVOP(OP_CONST, 0, ver);
2170     }
2171     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2172             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2173     {
2174         PL_bufptr = s;
2175         if (errstr)
2176             yyerror(errstr); /* version required */
2177         return s;
2178     }
2179
2180 #ifdef PERL_MAD
2181     if (PL_madskills && !version) {
2182         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2183         PL_nextwhite = 0;
2184         s = SvPVX(PL_linestr) + startoff;
2185     }
2186 #endif
2187     /* NOTE: The parser sees the package name and the VERSION swapped */
2188     start_force(PL_curforce);
2189     NEXTVAL_NEXTTOKE.opval = version;
2190     force_next(WORD);
2191
2192     return s;
2193 }
2194
2195 /*
2196  * S_tokeq
2197  * Tokenize a quoted string passed in as an SV.  It finds the next
2198  * chunk, up to end of string or a backslash.  It may make a new
2199  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2200  * turns \\ into \.
2201  */
2202
2203 STATIC SV *
2204 S_tokeq(pTHX_ SV *sv)
2205 {
2206     dVAR;
2207     register char *s;
2208     register char *send;
2209     register char *d;
2210     STRLEN len = 0;
2211     SV *pv = sv;
2212
2213     PERL_ARGS_ASSERT_TOKEQ;
2214
2215     if (!SvLEN(sv))
2216         goto finish;
2217
2218     s = SvPV_force(sv, len);
2219     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2220         goto finish;
2221     send = s + len;
2222     while (s < send && *s != '\\')
2223         s++;
2224     if (s == send)
2225         goto finish;
2226     d = s;
2227     if ( PL_hints & HINT_NEW_STRING ) {
2228         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2229     }
2230     while (s < send) {
2231         if (*s == '\\') {
2232             if (s + 1 < send && (s[1] == '\\'))
2233                 s++;            /* all that, just for this */
2234         }
2235         *d++ = *s++;
2236     }
2237     *d = '\0';
2238     SvCUR_set(sv, d - SvPVX_const(sv));
2239   finish:
2240     if ( PL_hints & HINT_NEW_STRING )
2241        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2242     return sv;
2243 }
2244
2245 /*
2246  * Now come three functions related to double-quote context,
2247  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2248  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2249  * interact with PL_lex_state, and create fake ( ... ) argument lists
2250  * to handle functions and concatenation.
2251  * They assume that whoever calls them will be setting up a fake
2252  * join call, because each subthing puts a ',' after it.  This lets
2253  *   "lower \luPpEr"
2254  * become
2255  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2256  *
2257  * (I'm not sure whether the spurious commas at the end of lcfirst's
2258  * arguments and join's arguments are created or not).
2259  */
2260
2261 /*
2262  * S_sublex_start
2263  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2264  *
2265  * Pattern matching will set PL_lex_op to the pattern-matching op to
2266  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2267  *
2268  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2269  *
2270  * Everything else becomes a FUNC.
2271  *
2272  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2273  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2274  * call to S_sublex_push().
2275  */
2276
2277 STATIC I32
2278 S_sublex_start(pTHX)
2279 {
2280     dVAR;
2281     register const I32 op_type = pl_yylval.ival;
2282
2283     if (op_type == OP_NULL) {
2284         pl_yylval.opval = PL_lex_op;
2285         PL_lex_op = NULL;
2286         return THING;
2287     }
2288     if (op_type == OP_CONST || op_type == OP_READLINE) {
2289         SV *sv = tokeq(PL_lex_stuff);
2290
2291         if (SvTYPE(sv) == SVt_PVIV) {
2292             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2293             STRLEN len;
2294             const char * const p = SvPV_const(sv, len);
2295             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2296             SvREFCNT_dec(sv);
2297             sv = nsv;
2298         }
2299         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2300         PL_lex_stuff = NULL;
2301         /* Allow <FH> // "foo" */
2302         if (op_type == OP_READLINE)
2303             PL_expect = XTERMORDORDOR;
2304         return THING;
2305     }
2306     else if (op_type == OP_BACKTICK && PL_lex_op) {
2307         /* readpipe() vas overriden */
2308         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2309         pl_yylval.opval = PL_lex_op;
2310         PL_lex_op = NULL;
2311         PL_lex_stuff = NULL;
2312         return THING;
2313     }
2314
2315     PL_sublex_info.super_state = PL_lex_state;
2316     PL_sublex_info.sub_inwhat = (U16)op_type;
2317     PL_sublex_info.sub_op = PL_lex_op;
2318     PL_lex_state = LEX_INTERPPUSH;
2319
2320     PL_expect = XTERM;
2321     if (PL_lex_op) {
2322         pl_yylval.opval = PL_lex_op;
2323         PL_lex_op = NULL;
2324         return PMFUNC;
2325     }
2326     else
2327         return FUNC;
2328 }
2329
2330 /*
2331  * S_sublex_push
2332  * Create a new scope to save the lexing state.  The scope will be
2333  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2334  * to the uc, lc, etc. found before.
2335  * Sets PL_lex_state to LEX_INTERPCONCAT.
2336  */
2337
2338 STATIC I32
2339 S_sublex_push(pTHX)
2340 {
2341     dVAR;
2342     ENTER;
2343
2344     PL_lex_state = PL_sublex_info.super_state;
2345     SAVEBOOL(PL_lex_dojoin);
2346     SAVEI32(PL_lex_brackets);
2347     SAVEI32(PL_lex_casemods);
2348     SAVEI32(PL_lex_starts);
2349     SAVEI8(PL_lex_state);
2350     SAVEVPTR(PL_lex_inpat);
2351     SAVEI16(PL_lex_inwhat);
2352     SAVECOPLINE(PL_curcop);
2353     SAVEPPTR(PL_bufptr);
2354     SAVEPPTR(PL_bufend);
2355     SAVEPPTR(PL_oldbufptr);
2356     SAVEPPTR(PL_oldoldbufptr);
2357     SAVEPPTR(PL_last_lop);
2358     SAVEPPTR(PL_last_uni);
2359     SAVEPPTR(PL_linestart);
2360     SAVESPTR(PL_linestr);
2361     SAVEGENERICPV(PL_lex_brackstack);
2362     SAVEGENERICPV(PL_lex_casestack);
2363
2364     PL_linestr = PL_lex_stuff;
2365     PL_lex_stuff = NULL;
2366
2367     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2368         = SvPVX(PL_linestr);
2369     PL_bufend += SvCUR(PL_linestr);
2370     PL_last_lop = PL_last_uni = NULL;
2371     SAVEFREESV(PL_linestr);
2372
2373     PL_lex_dojoin = FALSE;
2374     PL_lex_brackets = 0;
2375     Newx(PL_lex_brackstack, 120, char);
2376     Newx(PL_lex_casestack, 12, char);
2377     PL_lex_casemods = 0;
2378     *PL_lex_casestack = '\0';
2379     PL_lex_starts = 0;
2380     PL_lex_state = LEX_INTERPCONCAT;
2381     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2382
2383     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2384     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2385         PL_lex_inpat = PL_sublex_info.sub_op;
2386     else
2387         PL_lex_inpat = NULL;
2388
2389     return '(';
2390 }
2391
2392 /*
2393  * S_sublex_done
2394  * Restores lexer state after a S_sublex_push.
2395  */
2396
2397 STATIC I32
2398 S_sublex_done(pTHX)
2399 {
2400     dVAR;
2401     if (!PL_lex_starts++) {
2402         SV * const sv = newSVpvs("");
2403         if (SvUTF8(PL_linestr))
2404             SvUTF8_on(sv);
2405         PL_expect = XOPERATOR;
2406         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2407         return THING;
2408     }
2409
2410     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2411         PL_lex_state = LEX_INTERPCASEMOD;
2412         return yylex();
2413     }
2414
2415     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2416     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2417         PL_linestr = PL_lex_repl;
2418         PL_lex_inpat = 0;
2419         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2420         PL_bufend += SvCUR(PL_linestr);
2421         PL_last_lop = PL_last_uni = NULL;
2422         SAVEFREESV(PL_linestr);
2423         PL_lex_dojoin = FALSE;
2424         PL_lex_brackets = 0;
2425         PL_lex_casemods = 0;
2426         *PL_lex_casestack = '\0';
2427         PL_lex_starts = 0;
2428         if (SvEVALED(PL_lex_repl)) {
2429             PL_lex_state = LEX_INTERPNORMAL;
2430             PL_lex_starts++;
2431             /*  we don't clear PL_lex_repl here, so that we can check later
2432                 whether this is an evalled subst; that means we rely on the
2433                 logic to ensure sublex_done() is called again only via the
2434                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2435         }
2436         else {
2437             PL_lex_state = LEX_INTERPCONCAT;
2438             PL_lex_repl = NULL;
2439         }
2440         return ',';
2441     }
2442     else {
2443 #ifdef PERL_MAD
2444         if (PL_madskills) {
2445             if (PL_thiswhite) {
2446                 if (!PL_endwhite)
2447                     PL_endwhite = newSVpvs("");
2448                 sv_catsv(PL_endwhite, PL_thiswhite);
2449                 PL_thiswhite = 0;
2450             }
2451             if (PL_thistoken)
2452                 sv_setpvs(PL_thistoken,"");
2453             else
2454                 PL_realtokenstart = -1;
2455         }
2456 #endif
2457         LEAVE;
2458         PL_bufend = SvPVX(PL_linestr);
2459         PL_bufend += SvCUR(PL_linestr);
2460         PL_expect = XOPERATOR;
2461         PL_sublex_info.sub_inwhat = 0;
2462         return ')';
2463     }
2464 }
2465
2466 /*
2467   scan_const
2468
2469   Extracts a pattern, double-quoted string, or transliteration.  This
2470   is terrifying code.
2471
2472   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2473   processing a pattern (PL_lex_inpat is true), a transliteration
2474   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2475
2476   Returns a pointer to the character scanned up to. If this is
2477   advanced from the start pointer supplied (i.e. if anything was
2478   successfully parsed), will leave an OP for the substring scanned
2479   in pl_yylval. Caller must intuit reason for not parsing further
2480   by looking at the next characters herself.
2481
2482   In patterns:
2483     backslashes:
2484       constants: \N{NAME} only
2485       case and quoting: \U \Q \E
2486     stops on @ and $, but not for $ as tail anchor
2487
2488   In transliterations:
2489     characters are VERY literal, except for - not at the start or end
2490     of the string, which indicates a range. If the range is in bytes,
2491     scan_const expands the range to the full set of intermediate
2492     characters. If the range is in utf8, the hyphen is replaced with
2493     a certain range mark which will be handled by pmtrans() in op.c.
2494
2495   In double-quoted strings:
2496     backslashes:
2497       double-quoted style: \r and \n
2498       constants: \x31, etc.
2499       deprecated backrefs: \1 (in substitution replacements)
2500       case and quoting: \U \Q \E
2501     stops on @ and $
2502
2503   scan_const does *not* construct ops to handle interpolated strings.
2504   It stops processing as soon as it finds an embedded $ or @ variable
2505   and leaves it to the caller to work out what's going on.
2506
2507   embedded arrays (whether in pattern or not) could be:
2508       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2509
2510   $ in double-quoted strings must be the symbol of an embedded scalar.
2511
2512   $ in pattern could be $foo or could be tail anchor.  Assumption:
2513   it's a tail anchor if $ is the last thing in the string, or if it's
2514   followed by one of "()| \r\n\t"
2515
2516   \1 (backreferences) are turned into $1
2517
2518   The structure of the code is
2519       while (there's a character to process) {
2520           handle transliteration ranges
2521           skip regexp comments /(?#comment)/ and codes /(?{code})/
2522           skip #-initiated comments in //x patterns
2523           check for embedded arrays
2524           check for embedded scalars
2525           if (backslash) {
2526               deprecate \1 in substitution replacements
2527               handle string-changing backslashes \l \U \Q \E, etc.
2528               switch (what was escaped) {
2529                   handle \- in a transliteration (becomes a literal -)
2530                   if a pattern and not \N{, go treat as regular character
2531                   handle \132 (octal characters)
2532                   handle \x15 and \x{1234} (hex characters)
2533                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2534                   handle \cV (control characters)
2535                   handle printf-style backslashes (\f, \r, \n, etc)
2536               } (end switch)
2537               continue
2538           } (end if backslash)
2539           handle regular character
2540     } (end while character to read)
2541
2542 */
2543
2544 STATIC char *
2545 S_scan_const(pTHX_ char *start)
2546 {
2547     dVAR;
2548     register char *send = PL_bufend;            /* end of the constant */
2549     SV *sv = newSV(send - start);               /* sv for the constant.  See
2550                                                    note below on sizing. */
2551     register char *s = start;                   /* start of the constant */
2552     register char *d = SvPVX(sv);               /* destination for copies */
2553     bool dorange = FALSE;                       /* are we in a translit range? */
2554     bool didrange = FALSE;                      /* did we just finish a range? */
2555     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2556     I32  this_utf8 = UTF;                       /* Is the source string assumed
2557                                                    to be UTF8?  But, this can
2558                                                    show as true when the source
2559                                                    isn't utf8, as for example
2560                                                    when it is entirely composed
2561                                                    of hex constants */
2562
2563     /* Note on sizing:  The scanned constant is placed into sv, which is
2564      * initialized by newSV() assuming one byte of output for every byte of
2565      * input.  This routine expects newSV() to allocate an extra byte for a
2566      * trailing NUL, which this routine will append if it gets to the end of
2567      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2568      * CAPITAL LETTER A}), or more output than input if the constant ends up
2569      * recoded to utf8, but each time a construct is found that might increase
2570      * the needed size, SvGROW() is called.  Its size parameter each time is
2571      * based on the best guess estimate at the time, namely the length used so
2572      * far, plus the length the current construct will occupy, plus room for
2573      * the trailing NUL, plus one byte for every input byte still unscanned */
2574
2575     UV uv;
2576 #ifdef EBCDIC
2577     UV literal_endpoint = 0;
2578     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2579 #endif
2580
2581     PERL_ARGS_ASSERT_SCAN_CONST;
2582
2583     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2584         /* If we are doing a trans and we know we want UTF8 set expectation */
2585         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2586         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2587     }
2588
2589
2590     while (s < send || dorange) {
2591
2592         /* get transliterations out of the way (they're most literal) */
2593         if (PL_lex_inwhat == OP_TRANS) {
2594             /* expand a range A-Z to the full set of characters.  AIE! */
2595             if (dorange) {
2596                 I32 i;                          /* current expanded character */
2597                 I32 min;                        /* first character in range */
2598                 I32 max;                        /* last character in range */
2599
2600 #ifdef EBCDIC
2601                 UV uvmax = 0;
2602 #endif
2603
2604                 if (has_utf8
2605 #ifdef EBCDIC
2606                     && !native_range
2607 #endif
2608                     ) {
2609                     char * const c = (char*)utf8_hop((U8*)d, -1);
2610                     char *e = d++;
2611                     while (e-- > c)
2612                         *(e + 1) = *e;
2613                     *c = (char)UTF_TO_NATIVE(0xff);
2614                     /* mark the range as done, and continue */
2615                     dorange = FALSE;
2616                     didrange = TRUE;
2617                     continue;
2618                 }
2619
2620                 i = d - SvPVX_const(sv);                /* remember current offset */
2621 #ifdef EBCDIC
2622                 SvGROW(sv,
2623                        SvLEN(sv) + (has_utf8 ?
2624                                     (512 - UTF_CONTINUATION_MARK +
2625                                      UNISKIP(0x100))
2626                                     : 256));
2627                 /* How many two-byte within 0..255: 128 in UTF-8,
2628                  * 96 in UTF-8-mod. */
2629 #else
2630                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2631 #endif
2632                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2633 #ifdef EBCDIC
2634                 if (has_utf8) {
2635                     int j;
2636                     for (j = 0; j <= 1; j++) {
2637                         char * const c = (char*)utf8_hop((U8*)d, -1);
2638                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2639                         if (j)
2640                             min = (U8)uv;
2641                         else if (uv < 256)
2642                             max = (U8)uv;
2643                         else {
2644                             max = (U8)0xff; /* only to \xff */
2645                             uvmax = uv; /* \x{100} to uvmax */
2646                         }
2647                         d = c; /* eat endpoint chars */
2648                      }
2649                 }
2650                else {
2651 #endif
2652                    d -= 2;              /* eat the first char and the - */
2653                    min = (U8)*d;        /* first char in range */
2654                    max = (U8)d[1];      /* last char in range  */
2655 #ifdef EBCDIC
2656                }
2657 #endif
2658
2659                 if (min > max) {
2660                     Perl_croak(aTHX_
2661                                "Invalid range \"%c-%c\" in transliteration operator",
2662                                (char)min, (char)max);
2663                 }
2664
2665 #ifdef EBCDIC
2666                 if (literal_endpoint == 2 &&
2667                     ((isLOWER(min) && isLOWER(max)) ||
2668                      (isUPPER(min) && isUPPER(max)))) {
2669                     if (isLOWER(min)) {
2670                         for (i = min; i <= max; i++)
2671                             if (isLOWER(i))
2672                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2673                     } else {
2674                         for (i = min; i <= max; i++)
2675                             if (isUPPER(i))
2676                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2677                     }
2678                 }
2679                 else
2680 #endif
2681                     for (i = min; i <= max; i++)
2682 #ifdef EBCDIC
2683                         if (has_utf8) {
2684                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2685                             if (UNI_IS_INVARIANT(ch))
2686                                 *d++ = (U8)i;
2687                             else {
2688                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2689                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2690                             }
2691                         }
2692                         else
2693 #endif
2694                             *d++ = (char)i;
2695
2696 #ifdef EBCDIC
2697                 if (uvmax) {
2698                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2699                     if (uvmax > 0x101)
2700                         *d++ = (char)UTF_TO_NATIVE(0xff);
2701                     if (uvmax > 0x100)
2702                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2703                 }
2704 #endif
2705
2706                 /* mark the range as done, and continue */
2707                 dorange = FALSE;
2708                 didrange = TRUE;
2709 #ifdef EBCDIC
2710                 literal_endpoint = 0;
2711 #endif
2712                 continue;
2713             }
2714
2715             /* range begins (ignore - as first or last char) */
2716             else if (*s == '-' && s+1 < send  && s != start) {
2717                 if (didrange) {
2718                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2719                 }
2720                 if (has_utf8
2721 #ifdef EBCDIC
2722                     && !native_range
2723 #endif
2724                     ) {
2725                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2726                     s++;
2727                     continue;
2728                 }
2729                 dorange = TRUE;
2730                 s++;
2731             }
2732             else {
2733                 didrange = FALSE;
2734 #ifdef EBCDIC
2735                 literal_endpoint = 0;
2736                 native_range = TRUE;
2737 #endif
2738             }
2739         }
2740
2741         /* if we get here, we're not doing a transliteration */
2742
2743         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2744            except for the last char, which will be done separately. */
2745         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2746             if (s[2] == '#') {
2747                 while (s+1 < send && *s != ')')
2748                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2749             }
2750             else if (s[2] == '{' /* This should match regcomp.c */
2751                     || (s[2] == '?' && s[3] == '{'))
2752             {
2753                 I32 count = 1;
2754                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2755                 char c;
2756
2757                 while (count && (c = *regparse)) {
2758                     if (c == '\\' && regparse[1])
2759                         regparse++;
2760                     else if (c == '{')
2761                         count++;
2762                     else if (c == '}')
2763                         count--;
2764                     regparse++;
2765                 }
2766                 if (*regparse != ')')
2767                     regparse--;         /* Leave one char for continuation. */
2768                 while (s < regparse)
2769                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2770             }
2771         }
2772
2773         /* likewise skip #-initiated comments in //x patterns */
2774         else if (*s == '#' && PL_lex_inpat &&
2775           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2776             while (s+1 < send && *s != '\n')
2777                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2778         }
2779
2780         /* check for embedded arrays
2781            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2782            */
2783         else if (*s == '@' && s[1]) {
2784             if (isALNUM_lazy_if(s+1,UTF))
2785                 break;
2786             if (strchr(":'{$", s[1]))
2787                 break;
2788             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2789                 break; /* in regexp, neither @+ nor @- are interpolated */
2790         }
2791
2792         /* check for embedded scalars.  only stop if we're sure it's a
2793            variable.
2794         */
2795         else if (*s == '$') {
2796             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2797                 break;
2798             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2799                 if (s[1] == '\\') {
2800                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2801                                    "Possible unintended interpolation of $\\ in regex");
2802                 }
2803                 break;          /* in regexp, $ might be tail anchor */
2804             }
2805         }
2806
2807         /* End of else if chain - OP_TRANS rejoin rest */
2808
2809         /* backslashes */
2810         if (*s == '\\' && s+1 < send) {
2811             char* e;    /* Can be used for ending '}', etc. */
2812
2813             s++;
2814
2815             /* warn on \1 - \9 in substitution replacements, but note that \11
2816              * is an octal; and \19 is \1 followed by '9' */
2817             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2818                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2819             {
2820                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2821                 *--s = '$';
2822                 break;
2823             }
2824
2825             /* string-change backslash escapes */
2826             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2827                 --s;
2828                 break;
2829             }
2830             /* In a pattern, process \N, but skip any other backslash escapes.
2831              * This is because we don't want to translate an escape sequence
2832              * into a meta symbol and have the regex compiler use the meta
2833              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2834              * in spite of this, we do have to process \N here while the proper
2835              * charnames handler is in scope.  See bugs #56444 and #62056.
2836              * There is a complication because \N in a pattern may also stand
2837              * for 'match a non-nl', and not mean a charname, in which case its
2838              * processing should be deferred to the regex compiler.  To be a
2839              * charname it must be followed immediately by a '{', and not look
2840              * like \N followed by a curly quantifier, i.e., not something like
2841              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2842              * quantifier */
2843             else if (PL_lex_inpat
2844                     && (*s != 'N'
2845                         || s[1] != '{'
2846                         || regcurly(s + 1)))
2847             {
2848                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2849                 goto default_action;
2850             }
2851
2852             switch (*s) {
2853
2854             /* quoted - in transliterations */
2855             case '-':
2856                 if (PL_lex_inwhat == OP_TRANS) {
2857                     *d++ = *s++;
2858                     continue;
2859                 }
2860                 /* FALL THROUGH */
2861             default:
2862                 {
2863                     if ((isALPHA(*s) || isDIGIT(*s)))
2864                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2865                                        "Unrecognized escape \\%c passed through",
2866                                        *s);
2867                     /* default action is to copy the quoted character */
2868                     goto default_action;
2869                 }
2870
2871             /* eg. \132 indicates the octal constant 0132 */
2872             case '0': case '1': case '2': case '3':
2873             case '4': case '5': case '6': case '7':
2874                 {
2875                     I32 flags = 0;
2876                     STRLEN len = 3;
2877                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2878                     s += len;
2879                 }
2880                 goto NUM_ESCAPE_INSERT;
2881
2882             /* eg. \o{24} indicates the octal constant \024 */
2883             case 'o':
2884                 {
2885                     STRLEN len;
2886                     const char* error;
2887
2888                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2889                     s += len;
2890                     if (! valid) {
2891                         yyerror(error);
2892                         continue;
2893                     }
2894                     goto NUM_ESCAPE_INSERT;
2895                 }
2896
2897             /* eg. \x24 indicates the hex constant 0x24 */
2898             case 'x':
2899                 ++s;
2900                 if (*s == '{') {
2901                     char* const e = strchr(s, '}');
2902                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2903                       PERL_SCAN_DISALLOW_PREFIX;
2904                     STRLEN len;
2905
2906                     ++s;
2907                     if (!e) {
2908                         yyerror("Missing right brace on \\x{}");
2909                         continue;
2910                     }
2911                     len = e - s;
2912                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2913                     s = e + 1;
2914                 }
2915                 else {
2916                     {
2917                         STRLEN len = 2;
2918                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2919                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2920                         s += len;
2921                     }
2922                 }
2923
2924               NUM_ESCAPE_INSERT:
2925                 /* Insert oct or hex escaped character.  There will always be
2926                  * enough room in sv since such escapes will be longer than any
2927                  * UTF-8 sequence they can end up as, except if they force us
2928                  * to recode the rest of the string into utf8 */
2929
2930                 /* Here uv is the ordinal of the next character being added in
2931                  * unicode (converted from native). */
2932                 if (!UNI_IS_INVARIANT(uv)) {
2933                     if (!has_utf8 && uv > 255) {
2934                         /* Might need to recode whatever we have accumulated so
2935                          * far if it contains any chars variant in utf8 or
2936                          * utf-ebcdic. */
2937
2938                         SvCUR_set(sv, d - SvPVX_const(sv));
2939                         SvPOK_on(sv);
2940                         *d = '\0';
2941                         /* See Note on sizing above.  */
2942                         sv_utf8_upgrade_flags_grow(sv,
2943                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2944                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2945                         d = SvPVX(sv) + SvCUR(sv);
2946                         has_utf8 = TRUE;
2947                     }
2948
2949                     if (has_utf8) {
2950                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2951                         if (PL_lex_inwhat == OP_TRANS &&
2952                             PL_sublex_info.sub_op) {
2953                             PL_sublex_info.sub_op->op_private |=
2954                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2955                                              : OPpTRANS_TO_UTF);
2956                         }
2957 #ifdef EBCDIC
2958                         if (uv > 255 && !dorange)
2959                             native_range = FALSE;
2960 #endif
2961                     }
2962                     else {
2963                         *d++ = (char)uv;
2964                     }
2965                 }
2966                 else {
2967                     *d++ = (char) uv;
2968                 }
2969                 continue;
2970
2971             case 'N':
2972                 /* In a non-pattern \N must be a named character, like \N{LATIN
2973                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
2974                  * mean to match a non-newline.  For non-patterns, named
2975                  * characters are converted to their string equivalents. In
2976                  * patterns, named characters are not converted to their
2977                  * ultimate forms for the same reasons that other escapes
2978                  * aren't.  Instead, they are converted to the \N{U+...} form
2979                  * to get the value from the charnames that is in effect right
2980                  * now, while preserving the fact that it was a named character
2981                  * so that the regex compiler knows this */
2982
2983                 /* This section of code doesn't generally use the
2984                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
2985                  * a close examination of this macro and determined it is a
2986                  * no-op except on utfebcdic variant characters.  Every
2987                  * character generated by this that would normally need to be
2988                  * enclosed by this macro is invariant, so the macro is not
2989                  * needed, and would complicate use of copy(). There are other
2990                  * parts of this file where the macro is used inconsistently,
2991                  * but are saved by it being a no-op */
2992
2993                 /* The structure of this section of code (besides checking for
2994                  * errors and upgrading to utf8) is:
2995                  *  Further disambiguate between the two meanings of \N, and if
2996                  *      not a charname, go process it elsewhere
2997                  *  If of form \N{U+...}, pass it through if a pattern;
2998                  *      otherwise convert to utf8
2999                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3000                  *  pattern; otherwise convert to utf8 */
3001
3002                 /* Here, s points to the 'N'; the test below is guaranteed to
3003                  * succeed if we are being called on a pattern as we already
3004                  * know from a test above that the next character is a '{'.
3005                  * On a non-pattern \N must mean 'named sequence, which
3006                  * requires braces */
3007                 s++;
3008                 if (*s != '{') {
3009                     yyerror("Missing braces on \\N{}");
3010                     continue;
3011                 }
3012                 s++;
3013
3014                 /* If there is no matching '}', it is an error. */
3015                 if (! (e = strchr(s, '}'))) {
3016                     if (! PL_lex_inpat) {
3017                         yyerror("Missing right brace on \\N{}");
3018                     } else {
3019                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3020                     }
3021                     continue;
3022                 }
3023
3024                 /* Here it looks like a named character */
3025
3026                 if (PL_lex_inpat) {
3027
3028                     /* XXX This block is temporary code.  \N{} implies that the
3029                      * pattern is to have Unicode semantics, and therefore
3030                      * currently has to be encoded in utf8.  By putting it in
3031                      * utf8 now, we save a whole pass in the regular expression
3032                      * compiler.  Once that code is changed so Unicode
3033                      * semantics doesn't necessarily have to be in utf8, this
3034                      * block should be removed */
3035                     if (!has_utf8) {
3036                         SvCUR_set(sv, d - SvPVX_const(sv));
3037                         SvPOK_on(sv);
3038                         *d = '\0';
3039                         /* See Note on sizing above.  */
3040                         sv_utf8_upgrade_flags_grow(sv,
3041                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3042                                         /* 5 = '\N{' + cur char + NUL */
3043                                         (STRLEN)(send - s) + 5);
3044                         d = SvPVX(sv) + SvCUR(sv);
3045                         has_utf8 = TRUE;
3046                     }
3047                 }
3048
3049                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3050                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3051                                 | PERL_SCAN_DISALLOW_PREFIX;
3052                     STRLEN len;
3053
3054                     /* For \N{U+...}, the '...' is a unicode value even on
3055                      * EBCDIC machines */
3056                     s += 2;         /* Skip to next char after the 'U+' */
3057                     len = e - s;
3058                     uv = grok_hex(s, &len, &flags, NULL);
3059                     if (len == 0 || len != (STRLEN)(e - s)) {
3060                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3061                         s = e + 1;
3062                         continue;
3063                     }
3064
3065                     if (PL_lex_inpat) {
3066
3067                         /* Pass through to the regex compiler unchanged.  The
3068                          * reason we evaluated the number above is to make sure
3069                          * there wasn't a syntax error. */
3070                         s -= 5;     /* Include the '\N{U+' */
3071                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3072                         d += e - s + 1;
3073                     }
3074                     else {  /* Not a pattern: convert the hex to string */
3075
3076                          /* If destination is not in utf8, unconditionally
3077                           * recode it to be so.  This is because \N{} implies
3078                           * Unicode semantics, and scalars have to be in utf8
3079                           * to guarantee those semantics */
3080                         if (! has_utf8) {
3081                             SvCUR_set(sv, d - SvPVX_const(sv));
3082                             SvPOK_on(sv);
3083                             *d = '\0';
3084                             /* See Note on sizing above.  */
3085                             sv_utf8_upgrade_flags_grow(
3086                                         sv,
3087                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3088                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3089                             d = SvPVX(sv) + SvCUR(sv);
3090                             has_utf8 = TRUE;
3091                         }
3092
3093                         /* Add the string to the output */
3094                         if (UNI_IS_INVARIANT(uv)) {
3095                             *d++ = (char) uv;
3096                         }
3097                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3098                     }
3099                 }
3100                 else { /* Here is \N{NAME} but not \N{U+...}. */
3101
3102                     SV *res;            /* result from charnames */
3103                     const char *str;    /* the string in 'res' */
3104                     STRLEN len;         /* its length */
3105
3106                     /* Get the value for NAME */
3107                     res = newSVpvn(s, e - s);
3108                     res = new_constant( NULL, 0, "charnames",
3109                                         /* includes all of: \N{...} */
3110                                         res, NULL, s - 3, e - s + 4 );
3111
3112                     /* Most likely res will be in utf8 already since the
3113                      * standard charnames uses pack U, but a custom translator
3114                      * can leave it otherwise, so make sure.  XXX This can be
3115                      * revisited to not have charnames use utf8 for characters
3116                      * that don't need it when regexes don't have to be in utf8
3117                      * for Unicode semantics.  If doing so, remember EBCDIC */
3118                     sv_utf8_upgrade(res);
3119                     str = SvPV_const(res, len);
3120
3121                     /* Don't accept malformed input */
3122                     if (! is_utf8_string((U8 *) str, len)) {
3123                         yyerror("Malformed UTF-8 returned by \\N");
3124                     }
3125                     else if (PL_lex_inpat) {
3126
3127                         if (! len) { /* The name resolved to an empty string */
3128                             Copy("\\N{}", d, 4, char);
3129                             d += 4;
3130                         }
3131                         else {
3132                             /* In order to not lose information for the regex
3133                             * compiler, pass the result in the specially made
3134                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3135                             * the code points in hex of each character
3136                             * returned by charnames */
3137
3138                             const char *str_end = str + len;
3139                             STRLEN char_length;     /* cur char's byte length */
3140                             STRLEN output_length;   /* and the number of bytes
3141                                                        after this is translated
3142                                                        into hex digits */
3143                             const STRLEN off = d - SvPVX_const(sv);
3144
3145                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3146                              * max('U+', '.'); and 1 for NUL */
3147                             char hex_string[2 * UTF8_MAXBYTES + 5];
3148
3149                             /* Get the first character of the result. */
3150                             U32 uv = utf8n_to_uvuni((U8 *) str,
3151                                                     len,
3152                                                     &char_length,
3153                                                     UTF8_ALLOW_ANYUV);
3154
3155                             /* The call to is_utf8_string() above hopefully
3156                              * guarantees that there won't be an error.  But
3157                              * it's easy here to make sure.  The function just
3158                              * above warns and returns 0 if invalid utf8, but
3159                              * it can also return 0 if the input is validly a
3160                              * NUL. Disambiguate */
3161                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3162                                 uv = UNICODE_REPLACEMENT;
3163                             }
3164
3165                             /* Convert first code point to hex, including the
3166                              * boiler plate before it */
3167                             sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3168                             output_length = strlen(hex_string);
3169
3170                             /* Make sure there is enough space to hold it */
3171                             d = off + SvGROW(sv, off
3172                                                  + output_length
3173                                                  + (STRLEN)(send - e)
3174                                                  + 2);  /* '}' + NUL */
3175                             /* And output it */
3176                             Copy(hex_string, d, output_length, char);
3177                             d += output_length;
3178
3179                             /* For each subsequent character, append dot and
3180                              * its ordinal in hex */
3181                             while ((str += char_length) < str_end) {
3182                                 const STRLEN off = d - SvPVX_const(sv);
3183                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3184                                                         str_end - str,
3185                                                         &char_length,
3186                                                         UTF8_ALLOW_ANYUV);
3187                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3188                                     uv = UNICODE_REPLACEMENT;
3189                                 }
3190
3191                                 sprintf(hex_string, ".%X", (unsigned int) uv);
3192                                 output_length = strlen(hex_string);
3193
3194                                 d = off + SvGROW(sv, off
3195                                                      + output_length
3196                                                      + (STRLEN)(send - e)
3197                                                      + 2);      /* '}' +  NUL */
3198                                 Copy(hex_string, d, output_length, char);
3199                                 d += output_length;
3200                             }
3201
3202                             *d++ = '}'; /* Done.  Add the trailing brace */
3203                         }
3204                     }
3205                     else { /* Here, not in a pattern.  Convert the name to a
3206                             * string. */
3207
3208                          /* If destination is not in utf8, unconditionally
3209                           * recode it to be so.  This is because \N{} implies
3210                           * Unicode semantics, and scalars have to be in utf8
3211                           * to guarantee those semantics */
3212                         if (! has_utf8) {
3213                             SvCUR_set(sv, d - SvPVX_const(sv));
3214                             SvPOK_on(sv);
3215                             *d = '\0';
3216                             /* See Note on sizing above.  */
3217                             sv_utf8_upgrade_flags_grow(sv,
3218                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3219                                                 len + (STRLEN)(send - s) + 1);
3220                             d = SvPVX(sv) + SvCUR(sv);
3221                             has_utf8 = TRUE;
3222                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3223
3224                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3225                              * set correctly here). */
3226                             const STRLEN off = d - SvPVX_const(sv);
3227                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3228                         }
3229                         Copy(str, d, len, char);
3230                         d += len;
3231                     }
3232                     SvREFCNT_dec(res);
3233
3234                     /* Deprecate non-approved name syntax */
3235                     if (ckWARN_d(WARN_DEPRECATED)) {
3236                         bool problematic = FALSE;
3237                         char* i = s;
3238
3239                         /* For non-ut8 input, look to see that the first
3240                          * character is an alpha, then loop through the rest
3241                          * checking that each is a continuation */
3242                         if (! this_utf8) {
3243                             if (! isALPHAU(*i)) problematic = TRUE;
3244                             else for (i = s + 1; i < e; i++) {
3245                                 if (isCHARNAME_CONT(*i)) continue;
3246                                 problematic = TRUE;
3247                                 break;
3248                             }
3249                         }
3250                         else {
3251                             /* Similarly for utf8.  For invariants can check
3252                              * directly.  We accept anything above the latin1
3253                              * range because it is immaterial to Perl if it is
3254                              * correct or not, and is expensive to check.  But
3255                              * it is fairly easy in the latin1 range to convert
3256                              * the variants into a single character and check
3257                              * those */
3258                             if (UTF8_IS_INVARIANT(*i)) {
3259                                 if (! isALPHAU(*i)) problematic = TRUE;
3260                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3261                                 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3262                                                                             *(i+1)))))
3263                                 {
3264                                     problematic = TRUE;
3265                                 }
3266                             }
3267                             if (! problematic) for (i = s + UTF8SKIP(s);
3268                                                     i < e;
3269                                                     i+= UTF8SKIP(i))
3270                             {
3271                                 if (UTF8_IS_INVARIANT(*i)) {
3272                                     if (isCHARNAME_CONT(*i)) continue;
3273                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3274                                     continue;
3275                                 } else if (isCHARNAME_CONT(
3276                                             UNI_TO_NATIVE(
3277                                             UTF8_ACCUMULATE(*i, *(i+1)))))
3278                                 {
3279                                     continue;
3280                                 }
3281                                 problematic = TRUE;
3282                                 break;
3283                             }
3284                         }
3285                         if (problematic) {
3286                             /* The e-i passed to the final %.*s makes sure that
3287                              * should the trailing NUL be missing that this
3288                              * print won't run off the end of the string */
3289                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3290                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3291                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3292                         }
3293                     }
3294                 } /* End \N{NAME} */
3295 #ifdef EBCDIC
3296                 if (!dorange)
3297                     native_range = FALSE; /* \N{} is defined to be Unicode */
3298 #endif
3299                 s = e + 1;  /* Point to just after the '}' */
3300                 continue;
3301
3302             /* \c is a control character */
3303             case 'c':
3304                 s++;
3305                 if (s < send) {
3306                     *d++ = grok_bslash_c(*s++, 1);
3307                 }
3308                 else {
3309                     yyerror("Missing control char name in \\c");
3310                 }
3311                 continue;
3312
3313             /* printf-style backslashes, formfeeds, newlines, etc */
3314             case 'b':
3315                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3316                 break;
3317             case 'n':
3318                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3319                 break;
3320             case 'r':
3321                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3322                 break;
3323             case 'f':
3324                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3325                 break;
3326             case 't':
3327                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3328                 break;
3329             case 'e':
3330                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3331                 break;
3332             case 'a':
3333                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3334                 break;
3335             } /* end switch */
3336
3337             s++;
3338             continue;
3339         } /* end if (backslash) */
3340 #ifdef EBCDIC
3341         else
3342             literal_endpoint++;
3343 #endif
3344
3345     default_action:
3346         /* If we started with encoded form, or already know we want it,
3347            then encode the next character */
3348         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3349             STRLEN len  = 1;
3350
3351
3352             /* One might think that it is wasted effort in the case of the
3353              * source being utf8 (this_utf8 == TRUE) to take the next character
3354              * in the source, convert it to an unsigned value, and then convert
3355              * it back again.  But the source has not been validated here.  The
3356              * routine that does the conversion checks for errors like
3357              * malformed utf8 */
3358
3359             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3360             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3361             if (!has_utf8) {
3362                 SvCUR_set(sv, d - SvPVX_const(sv));
3363                 SvPOK_on(sv);
3364                 *d = '\0';
3365                 /* See Note on sizing above.  */
3366                 sv_utf8_upgrade_flags_grow(sv,
3367                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3368                                         need + (STRLEN)(send - s) + 1);
3369                 d = SvPVX(sv) + SvCUR(sv);
3370                 has_utf8 = TRUE;
3371             } else if (need > len) {
3372                 /* encoded value larger than old, may need extra space (NOTE:
3373                  * SvCUR() is not set correctly here).   See Note on sizing
3374                  * above.  */
3375                 const STRLEN off = d - SvPVX_const(sv);
3376                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3377             }
3378             s += len;
3379
3380             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3381 #ifdef EBCDIC
3382             if (uv > 255 && !dorange)
3383                 native_range = FALSE;
3384 #endif
3385         }
3386         else {
3387             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3388         }
3389     } /* while loop to process each character */
3390
3391     /* terminate the string and set up the sv */
3392     *d = '\0';
3393     SvCUR_set(sv, d - SvPVX_const(sv));
3394     if (SvCUR(sv) >= SvLEN(sv))
3395         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3396
3397     SvPOK_on(sv);
3398     if (PL_encoding && !has_utf8) {
3399         sv_recode_to_utf8(sv, PL_encoding);
3400         if (SvUTF8(sv))
3401             has_utf8 = TRUE;
3402     }
3403     if (has_utf8) {
3404         SvUTF8_on(sv);
3405         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3406             PL_sublex_info.sub_op->op_private |=
3407                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3408         }
3409     }
3410
3411     /* shrink the sv if we allocated more than we used */
3412     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3413         SvPV_shrink_to_cur(sv);
3414     }
3415
3416     /* return the substring (via pl_yylval) only if we parsed anything */
3417     if (s > PL_bufptr) {
3418         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3419             const char *const key = PL_lex_inpat ? "qr" : "q";
3420             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3421             const char *type;
3422             STRLEN typelen;
3423
3424             if (PL_lex_inwhat == OP_TRANS) {
3425                 type = "tr";
3426                 typelen = 2;
3427             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3428                 type = "s";
3429                 typelen = 1;
3430             } else  {
3431                 type = "qq";
3432                 typelen = 2;
3433             }
3434
3435             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3436                                 type, typelen);
3437         }
3438         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3439     } else
3440         SvREFCNT_dec(sv);
3441     return s;
3442 }
3443
3444 /* S_intuit_more
3445  * Returns TRUE if there's more to the expression (e.g., a subscript),
3446  * FALSE otherwise.
3447  *
3448  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3449  *
3450  * ->[ and ->{ return TRUE
3451  * { and [ outside a pattern are always subscripts, so return TRUE
3452  * if we're outside a pattern and it's not { or [, then return FALSE
3453  * if we're in a pattern and the first char is a {
3454  *   {4,5} (any digits around the comma) returns FALSE
3455  * if we're in a pattern and the first char is a [
3456  *   [] returns FALSE
3457  *   [SOMETHING] has a funky algorithm to decide whether it's a
3458  *      character class or not.  It has to deal with things like
3459  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3460  * anything else returns TRUE
3461  */
3462
3463 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3464
3465 STATIC int
3466 S_intuit_more(pTHX_ register char *s)
3467 {
3468     dVAR;
3469
3470     PERL_ARGS_ASSERT_INTUIT_MORE;
3471
3472     if (PL_lex_brackets)
3473         return TRUE;
3474     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3475         return TRUE;
3476     if (*s != '{' && *s != '[')
3477         return FALSE;
3478     if (!PL_lex_inpat)
3479         return TRUE;
3480
3481     /* In a pattern, so maybe we have {n,m}. */
3482     if (*s == '{') {
3483         s++;
3484         if (!isDIGIT(*s))
3485             return TRUE;
3486         while (isDIGIT(*s))
3487             s++;
3488         if (*s == ',')
3489             s++;
3490         while (isDIGIT(*s))
3491             s++;
3492         if (*s == '}')
3493             return FALSE;
3494         return TRUE;
3495
3496     }
3497
3498     /* On the other hand, maybe we have a character class */
3499
3500     s++;
3501     if (*s == ']' || *s == '^')
3502         return FALSE;
3503     else {
3504         /* this is terrifying, and it works */
3505         int weight = 2;         /* let's weigh the evidence */
3506         char seen[256];
3507         unsigned char un_char = 255, last_un_char;
3508         const char * const send = strchr(s,']');
3509         char tmpbuf[sizeof PL_tokenbuf * 4];
3510
3511         if (!send)              /* has to be an expression */
3512             return TRUE;
3513
3514         Zero(seen,256,char);
3515         if (*s == '$')
3516             weight -= 3;
3517         else if (isDIGIT(*s)) {
3518             if (s[1] != ']') {
3519                 if (isDIGIT(s[1]) && s[2] == ']')
3520                     weight -= 10;
3521             }
3522             else
3523                 weight -= 100;
3524         }
3525         for (; s < send; s++) {
3526             last_un_char = un_char;
3527             un_char = (unsigned char)*s;
3528             switch (*s) {
3529             case '@':
3530             case '&':
3531             case '$':
3532                 weight -= seen[un_char] * 10;
3533                 if (isALNUM_lazy_if(s+1,UTF)) {
3534                     int len;
3535                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3536                     len = (int)strlen(tmpbuf);
3537                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3538                         weight -= 100;
3539                     else
3540                         weight -= 10;
3541                 }
3542                 else if (*s == '$' && s[1] &&
3543                   strchr("[#!%*<>()-=",s[1])) {
3544                     if (/*{*/ strchr("])} =",s[2]))
3545                         weight -= 10;
3546                     else
3547                         weight -= 1;
3548                 }
3549                 break;
3550             case '\\':
3551                 un_char = 254;
3552                 if (s[1]) {
3553                     if (strchr("wds]",s[1]))
3554                         weight += 100;
3555                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3556                         weight += 1;
3557                     else if (strchr("rnftbxcav",s[1]))
3558                         weight += 40;
3559                     else if (isDIGIT(s[1])) {
3560                         weight += 40;
3561                         while (s[1] && isDIGIT(s[1]))
3562                             s++;
3563                     }
3564                 }
3565                 else
3566                     weight += 100;
3567                 break;
3568             case '-':
3569                 if (s[1] == '\\')
3570                     weight += 50;
3571                 if (strchr("aA01! ",last_un_char))
3572                     weight += 30;
3573                 if (strchr("zZ79~",s[1]))
3574                     weight += 30;
3575                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3576                     weight -= 5;        /* cope with negative subscript */
3577                 break;
3578             default:
3579                 if (!isALNUM(last_un_char)
3580                     && !(last_un_char == '$' || last_un_char == '@'
3581                          || last_un_char == '&')
3582                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3583                     char *d = tmpbuf;
3584                     while (isALPHA(*s))
3585                         *d++ = *s++;
3586                     *d = '\0';
3587                     if (keyword(tmpbuf, d - tmpbuf, 0))
3588                         weight -= 150;
3589                 }
3590                 if (un_char == last_un_char + 1)
3591                     weight += 5;
3592                 weight -= seen[un_char];
3593                 break;
3594             }
3595             seen[un_char]++;
3596         }
3597         if (weight >= 0)        /* probably a character class */
3598             return FALSE;
3599     }
3600
3601     return TRUE;
3602 }
3603
3604 /*
3605  * S_intuit_method
3606  *
3607  * Does all the checking to disambiguate
3608  *   foo bar
3609  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3610  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3611  *
3612  * First argument is the stuff after the first token, e.g. "bar".
3613  *
3614  * Not a method if bar is a filehandle.
3615  * Not a method if foo is a subroutine prototyped to take a filehandle.
3616  * Not a method if it's really "Foo $bar"
3617  * Method if it's "foo $bar"
3618  * Not a method if it's really "print foo $bar"
3619  * Method if it's really "foo package::" (interpreted as package->foo)
3620  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3621  * Not a method if bar is a filehandle or package, but is quoted with
3622  *   =>
3623  */
3624
3625 STATIC int
3626 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3627 {
3628     dVAR;
3629     char *s = start + (*start == '$');
3630     char tmpbuf[sizeof PL_tokenbuf];
3631     STRLEN len;
3632     GV* indirgv;
3633 #ifdef PERL_MAD
3634     int soff;
3635 #endif
3636
3637     PERL_ARGS_ASSERT_INTUIT_METHOD;
3638
3639     if (gv) {
3640         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3641             return 0;
3642         if (cv) {
3643             if (SvPOK(cv)) {
3644                 const char *proto = SvPVX_const(cv);
3645                 if (proto) {
3646                     if (*proto == ';')
3647                         proto++;
3648                     if (*proto == '*')
3649                         return 0;
3650                 }
3651             }
3652         } else
3653             gv = NULL;
3654     }
3655     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3656     /* start is the beginning of the possible filehandle/object,
3657      * and s is the end of it
3658      * tmpbuf is a copy of it
3659      */
3660
3661     if (*start == '$') {
3662         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3663                 isUPPER(*PL_tokenbuf))
3664             return 0;
3665 #ifdef PERL_MAD
3666         len = start - SvPVX(PL_linestr);
3667 #endif
3668         s = PEEKSPACE(s);
3669 #ifdef PERL_MAD
3670         start = SvPVX(PL_linestr) + len;
3671 #endif
3672         PL_bufptr = start;
3673         PL_expect = XREF;
3674         return *s == '(' ? FUNCMETH : METHOD;
3675     }
3676     if (!keyword(tmpbuf, len, 0)) {
3677         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3678             len -= 2;
3679             tmpbuf[len] = '\0';
3680 #ifdef PERL_MAD
3681             soff = s - SvPVX(PL_linestr);
3682 #endif
3683             goto bare_package;
3684         }
3685         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3686         if (indirgv && GvCVu(indirgv))
3687             return 0;
3688         /* filehandle or package name makes it a method */
3689         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3690 #ifdef PERL_MAD
3691             soff = s - SvPVX(PL_linestr);
3692 #endif
3693             s = PEEKSPACE(s);
3694             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3695                 return 0;       /* no assumptions -- "=>" quotes bearword */
3696       bare_package:
3697             start_force(PL_curforce);
3698             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3699                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3700             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3701             if (PL_madskills)
3702                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3703             PL_expect = XTERM;
3704             force_next(WORD);
3705             PL_bufptr = s;
3706 #ifdef PERL_MAD
3707             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3708 #endif
3709             return *s == '(' ? FUNCMETH : METHOD;
3710         }
3711     }
3712     return 0;
3713 }
3714
3715 /* Encoded script support. filter_add() effectively inserts a
3716  * 'pre-processing' function into the current source input stream.
3717  * Note that the filter function only applies to the current source file
3718  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3719  *
3720  * The datasv parameter (which may be NULL) can be used to pass
3721  * private data to this instance of the filter. The filter function
3722  * can recover the SV using the FILTER_DATA macro and use it to
3723  * store private buffers and state information.
3724  *
3725  * The supplied datasv parameter is upgraded to a PVIO type
3726  * and the IoDIRP/IoANY field is used to store the function pointer,
3727  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3728  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3729  * private use must be set using malloc'd pointers.
3730  */
3731
3732 SV *
3733 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3734 {
3735     dVAR;
3736     if (!funcp)
3737         return NULL;
3738
3739     if (!PL_parser)
3740         return NULL;
3741
3742     if (!PL_rsfp_filters)
3743         PL_rsfp_filters = newAV();
3744     if (!datasv)
3745         datasv = newSV(0);
3746     SvUPGRADE(datasv, SVt_PVIO);
3747     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3748     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3749     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3750                           FPTR2DPTR(void *, IoANY(datasv)),
3751                           SvPV_nolen(datasv)));
3752     av_unshift(PL_rsfp_filters, 1);
3753     av_store(PL_rsfp_filters, 0, datasv) ;
3754     return(datasv);
3755 }
3756
3757
3758 /* Delete most recently added instance of this filter function. */
3759 void
3760 Perl_filter_del(pTHX_ filter_t funcp)
3761 {
3762     dVAR;
3763     SV *datasv;
3764
3765     PERL_ARGS_ASSERT_FILTER_DEL;
3766
3767 #ifdef DEBUGGING
3768     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3769                           FPTR2DPTR(void*, funcp)));
3770 #endif
3771     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3772         return;
3773     /* if filter is on top of stack (usual case) just pop it off */
3774     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3775     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3776         sv_free(av_pop(PL_rsfp_filters));
3777
3778         return;
3779     }
3780     /* we need to search for the correct entry and clear it     */
3781     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3782 }
3783
3784
3785 /* Invoke the idxth filter function for the current rsfp.        */
3786 /* maxlen 0 = read one text line */
3787 I32
3788 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3789 {
3790     dVAR;
3791     filter_t funcp;
3792     SV *datasv = NULL;
3793     /* This API is bad. It should have been using unsigned int for maxlen.
3794        Not sure if we want to change the API, but if not we should sanity
3795        check the value here.  */
3796     const unsigned int correct_length
3797         = maxlen < 0 ?
3798 #ifdef PERL_MICRO
3799         0x7FFFFFFF
3800 #else
3801         INT_MAX
3802 #endif
3803         : maxlen;
3804
3805     PERL_ARGS_ASSERT_FILTER_READ;
3806
3807     if (!PL_parser || !PL_rsfp_filters)
3808         return -1;
3809     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3810         /* Provide a default input filter to make life easy.    */
3811         /* Note that we append to the line. This is handy.      */
3812         DEBUG_P(PerlIO_printf(Perl_debug_log,
3813                               "filter_read %d: from rsfp\n", idx));
3814         if (correct_length) {
3815             /* Want a block */
3816             int len ;
3817             const int old_len = SvCUR(buf_sv);
3818
3819             /* ensure buf_sv is large enough */
3820             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3821             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3822                                    correct_length)) <= 0) {
3823                 if (PerlIO_error(PL_rsfp))
3824                     return -1;          /* error */
3825                 else
3826                     return 0 ;          /* end of file */
3827             }
3828             SvCUR_set(buf_sv, old_len + len) ;
3829             SvPVX(buf_sv)[old_len + len] = '\0';
3830         } else {
3831             /* Want a line */
3832             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3833                 if (PerlIO_error(PL_rsfp))
3834                     return -1;          /* error */
3835                 else
3836                     return 0 ;          /* end of file */
3837             }
3838         }
3839         return SvCUR(buf_sv);
3840     }
3841     /* Skip this filter slot if filter has been deleted */
3842     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3843         DEBUG_P(PerlIO_printf(Perl_debug_log,
3844                               "filter_read %d: skipped (filter deleted)\n",
3845                               idx));
3846         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3847     }
3848     /* Get function pointer hidden within datasv        */
3849     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3850     DEBUG_P(PerlIO_printf(Perl_debug_log,
3851                           "filter_read %d: via function %p (%s)\n",
3852                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3853     /* Call function. The function is expected to       */
3854     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3855     /* Return: <0:error, =0:eof, >0:not eof             */
3856     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3857 }
3858
3859 STATIC char *
3860 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3861 {
3862     dVAR;
3863
3864     PERL_ARGS_ASSERT_FILTER_GETS;
3865
3866 #ifdef PERL_CR_FILTER
3867     if (!PL_rsfp_filters) {
3868         filter_add(S_cr_textfilter,NULL);
3869     }
3870 #endif
3871     if (PL_rsfp_filters) {
3872         if (!append)
3873             SvCUR_set(sv, 0);   /* start with empty line        */
3874         if (FILTER_READ(0, sv, 0) > 0)
3875             return ( SvPVX(sv) ) ;
3876         else
3877             return NULL ;
3878     }
3879     else
3880         return (sv_gets(sv, PL_rsfp, append));
3881 }
3882
3883 STATIC HV *
3884 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3885 {
3886     dVAR;
3887     GV *gv;
3888
3889     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3890
3891     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3892         return PL_curstash;
3893
3894     if (len > 2 &&
3895         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3896         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3897     {
3898         return GvHV(gv);                        /* Foo:: */
3899     }
3900
3901     /* use constant CLASS => 'MyClass' */
3902     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3903     if (gv && GvCV(gv)) {
3904         SV * const sv = cv_const_sv(GvCV(gv));
3905         if (sv)
3906             pkgname = SvPV_const(sv, len);
3907     }
3908
3909     return gv_stashpvn(pkgname, len, 0);
3910 }
3911
3912 /*
3913  * S_readpipe_override
3914  * Check whether readpipe() is overriden, and generates the appropriate
3915  * optree, provided sublex_start() is called afterwards.
3916  */
3917 STATIC void
3918 S_readpipe_override(pTHX)
3919 {
3920     GV **gvp;
3921     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3922     pl_yylval.ival = OP_BACKTICK;
3923     if ((gv_readpipe
3924                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3925             ||
3926             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3927              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3928              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3929     {
3930         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3931             append_elem(OP_LIST,
3932                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3933                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3934     }
3935 }
3936
3937 #ifdef PERL_MAD
3938  /*
3939  * Perl_madlex
3940  * The intent of this yylex wrapper is to minimize the changes to the
3941  * tokener when we aren't interested in collecting madprops.  It remains
3942  * to be seen how successful this strategy will be...
3943  */
3944
3945 int
3946 Perl_madlex(pTHX)
3947 {
3948     int optype;
3949     char *s = PL_bufptr;
3950
3951     /* make sure PL_thiswhite is initialized */
3952     PL_thiswhite = 0;
3953     PL_thismad = 0;
3954
3955     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3956     if (PL_pending_ident)
3957         return S_pending_ident(aTHX);
3958
3959     /* previous token ate up our whitespace? */
3960     if (!PL_lasttoke && PL_nextwhite) {
3961         PL_thiswhite = PL_nextwhite;
3962         PL_nextwhite = 0;
3963     }
3964
3965     /* isolate the token, and figure out where it is without whitespace */
3966     PL_realtokenstart = -1;
3967     PL_thistoken = 0;
3968     optype = yylex();
3969     s = PL_bufptr;
3970     assert(PL_curforce < 0);
3971
3972     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3973         if (!PL_thistoken) {
3974             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3975                 PL_thistoken = newSVpvs("");
3976             else {
3977                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3978                 PL_thistoken = newSVpvn(tstart, s - tstart);
3979             }
3980         }
3981         if (PL_thismad) /* install head */
3982             CURMAD('X', PL_thistoken);
3983     }
3984
3985     /* last whitespace of a sublex? */
3986     if (optype == ')' && PL_endwhite) {
3987         CURMAD('X', PL_endwhite);
3988     }
3989
3990     if (!PL_thismad) {
3991
3992         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
3993         if (!PL_thiswhite && !PL_endwhite && !optype) {
3994             sv_free(PL_thistoken);
3995             PL_thistoken = 0;
3996             return 0;
3997         }
3998
3999         /* put off final whitespace till peg */
4000         if (optype == ';' && !PL_rsfp) {
4001             PL_nextwhite = PL_thiswhite;
4002             PL_thiswhite = 0;
4003         }
4004         else if (PL_thisopen) {
4005             CURMAD('q', PL_thisopen);
4006             if (PL_thistoken)
4007                 sv_free(PL_thistoken);
4008             PL_thistoken = 0;
4009         }
4010         else {
4011             /* Store actual token text as madprop X */
4012             CURMAD('X', PL_thistoken);
4013         }
4014
4015         if (PL_thiswhite) {
4016             /* add preceding whitespace as madprop _ */
4017             CURMAD('_', PL_thiswhite);
4018         }
4019
4020         if (PL_thisstuff) {
4021             /* add quoted material as madprop = */
4022             CURMAD('=', PL_thisstuff);
4023         }
4024
4025         if (PL_thisclose) {
4026             /* add terminating quote as madprop Q */
4027             CURMAD('Q', PL_thisclose);
4028         }
4029     }
4030
4031     /* special processing based on optype */
4032
4033     switch (optype) {
4034
4035     /* opval doesn't need a TOKEN since it can already store mp */
4036     case WORD:
4037     case METHOD:
4038     case FUNCMETH:
4039     case THING:
4040     case PMFUNC:
4041     case PRIVATEREF:
4042     case FUNC0SUB:
4043     case UNIOPSUB:
4044     case LSTOPSUB:
4045         if (pl_yylval.opval)
4046             append_madprops(PL_thismad, pl_yylval.opval, 0);
4047         PL_thismad = 0;
4048         return optype;
4049
4050     /* fake EOF */
4051     case 0:
4052         optype = PEG;
4053         if (PL_endwhite) {
4054             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4055             PL_endwhite = 0;
4056         }
4057         break;
4058
4059     case ']':
4060     case '}':
4061         if (PL_faketokens)
4062             break;
4063         /* remember any fake bracket that lexer is about to discard */
4064         if (PL_lex_brackets == 1 &&
4065             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4066         {
4067             s = PL_bufptr;
4068             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4069                 s++;
4070             if (*s == '}') {
4071                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4072                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4073                 PL_thiswhite = 0;
4074                 PL_bufptr = s - 1;
4075                 break;  /* don't bother looking for trailing comment */
4076             }
4077             else
4078                 s = PL_bufptr;
4079         }
4080         if (optype == ']')
4081             break;
4082         /* FALLTHROUGH */
4083
4084     /* attach a trailing comment to its statement instead of next token */
4085     case ';':
4086         if (PL_faketokens)
4087             break;
4088         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4089             s = PL_bufptr;
4090             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4091                 s++;
4092             if (*s == '\n' || *s == '#') {
4093                 while (s < PL_bufend && *s != '\n')
4094                     s++;
4095                 if (s < PL_bufend)
4096                     s++;
4097                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4098                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4099                 PL_thiswhite = 0;
4100                 PL_bufptr = s;
4101             }
4102         }
4103         break;
4104
4105     /* pval */
4106     case LABEL:
4107         break;
4108
4109     /* ival */
4110     default:
4111         break;
4112
4113     }
4114
4115     /* Create new token struct.  Note: opvals return early above. */
4116     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4117     PL_thismad = 0;
4118     return optype;
4119 }
4120 #endif
4121
4122 STATIC char *
4123 S_tokenize_use(pTHX_ int is_use, char *s) {
4124     dVAR;
4125
4126     PERL_ARGS_ASSERT_TOKENIZE_USE;
4127
4128     if (PL_expect != XSTATE)
4129         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4130                     is_use ? "use" : "no"));
4131     s = SKIPSPACE1(s);
4132     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4133         s = force_version(s, TRUE);
4134         if (*s == ';' || *s == '}'
4135                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4136             start_force(PL_curforce);
4137             NEXTVAL_NEXTTOKE.opval = NULL;
4138             force_next(WORD);
4139         }
4140         else if (*s == 'v') {
4141             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4142             s = force_version(s, FALSE);
4143         }
4144     }
4145     else {
4146         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4147         s = force_version(s, FALSE);
4148     }
4149     pl_yylval.ival = is_use;
4150     return s;
4151 }
4152 #ifdef DEBUGGING
4153     static const char* const exp_name[] =
4154         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4155           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4156         };
4157 #endif
4158
4159 /*
4160   yylex
4161
4162   Works out what to call the token just pulled out of the input
4163   stream.  The yacc parser takes care of taking the ops we return and
4164   stitching them into a tree.
4165
4166   Returns:
4167     PRIVATEREF
4168
4169   Structure:
4170       if read an identifier
4171           if we're in a my declaration
4172               croak if they tried to say my($foo::bar)
4173               build the ops for a my() declaration
4174           if it's an access to a my() variable
4175               are we in a sort block?
4176                   croak if my($a); $a <=> $b
4177               build ops for access to a my() variable
4178           if in a dq string, and they've said @foo and we can't find @foo
4179               croak
4180           build ops for a bareword
4181       if we already built the token before, use it.
4182 */
4183
4184
4185 #ifdef __SC__
4186 #pragma segment Perl_yylex
4187 #endif
4188 int
4189 Perl_yylex(pTHX)
4190 {
4191     dVAR;
4192     register char *s = PL_bufptr;
4193     register char *d;
4194     STRLEN len;
4195     bool bof = FALSE;
4196     U32 fake_eof = 0;
4197
4198     /* orig_keyword, gvp, and gv are initialized here because
4199      * jump to the label just_a_word_zero can bypass their
4200      * initialization later. */
4201     I32 orig_keyword = 0;
4202     GV *gv = NULL;
4203     GV **gvp = NULL;
4204
4205     DEBUG_T( {
4206         SV* tmp = newSVpvs("");
4207         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4208             (IV)CopLINE(PL_curcop),
4209             lex_state_names[PL_lex_state],
4210             exp_name[PL_expect],
4211             pv_display(tmp, s, strlen(s), 0, 60));
4212         SvREFCNT_dec(tmp);
4213     } );
4214     /* check if there's an identifier for us to look at */
4215     if (PL_pending_ident)
4216         return REPORT(S_pending_ident(aTHX));
4217
4218     /* no identifier pending identification */
4219
4220     switch (PL_lex_state) {
4221 #ifdef COMMENTARY
4222     case LEX_NORMAL:            /* Some compilers will produce faster */
4223     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4224         break;
4225 #endif
4226
4227     /* when we've already built the next token, just pull it out of the queue */
4228     case LEX_KNOWNEXT:
4229 #ifdef PERL_MAD
4230         PL_lasttoke--;
4231         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4232         if (PL_madskills) {
4233             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4234             PL_nexttoke[PL_lasttoke].next_mad = 0;
4235             if (PL_thismad && PL_thismad->mad_key == '_') {
4236                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4237                 PL_thismad->mad_val = 0;
4238                 mad_free(PL_thismad);
4239                 PL_thismad = 0;
4240             }
4241         }
4242         if (!PL_lasttoke) {
4243             PL_lex_state = PL_lex_defer;
4244             PL_expect = PL_lex_expect;
4245             PL_lex_defer = LEX_NORMAL;
4246             if (!PL_nexttoke[PL_lasttoke].next_type)
4247                 return yylex();
4248         }
4249 #else
4250         PL_nexttoke--;
4251         pl_yylval = PL_nextval[PL_nexttoke];
4252         if (!PL_nexttoke) {
4253             PL_lex_state = PL_lex_defer;
4254             PL_expect = PL_lex_expect;
4255             PL_lex_defer = LEX_NORMAL;
4256         }
4257 #endif
4258 #ifdef PERL_MAD
4259         /* FIXME - can these be merged?  */
4260         return(PL_nexttoke[PL_lasttoke].next_type);
4261 #else
4262         return REPORT(PL_nexttype[PL_nexttoke]);
4263 #endif
4264
4265     /* interpolated case modifiers like \L \U, including \Q and \E.
4266        when we get here, PL_bufptr is at the \
4267     */
4268     case LEX_INTERPCASEMOD:
4269 #ifdef DEBUGGING
4270         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4271             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4272 #endif
4273         /* handle \E or end of string */
4274         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4275             /* if at a \E */
4276             if (PL_lex_casemods) {
4277                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4278                 PL_lex_casestack[PL_lex_casemods] = '\0';
4279
4280                 if (PL_bufptr != PL_bufend
4281                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4282                     PL_bufptr += 2;
4283                     PL_lex_state = LEX_INTERPCONCAT;
4284 #ifdef PERL_MAD
4285                     if (PL_madskills)
4286                         PL_thistoken = newSVpvs("\\E");
4287 #endif
4288                 }
4289                 return REPORT(')');
4290             }
4291 #ifdef PERL_MAD
4292             while (PL_bufptr != PL_bufend &&
4293               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4294                 if (!PL_thiswhite)
4295                     PL_thiswhite = newSVpvs("");
4296                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4297                 PL_bufptr += 2;
4298             }
4299 #else
4300             if (PL_bufptr != PL_bufend)
4301                 PL_bufptr += 2;
4302 #endif
4303             PL_lex_state = LEX_INTERPCONCAT;
4304             return yylex();
4305         }
4306         else {
4307             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4308               "### Saw case modifier\n"); });
4309             s = PL_bufptr + 1;
4310             if (s[1] == '\\' && s[2] == 'E') {
4311 #ifdef PERL_MAD
4312                 if (!PL_thiswhite)
4313                     PL_thiswhite = newSVpvs("");
4314                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4315 #endif
4316                 PL_bufptr = s + 3;
4317                 PL_lex_state = LEX_INTERPCONCAT;
4318                 return yylex();
4319             }
4320             else {
4321                 I32 tmp;
4322                 if (!PL_madskills) /* when just compiling don't need correct */
4323                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4324                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4325                 if ((*s == 'L' || *s == 'U') &&
4326                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4327                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4328                     return REPORT(')');
4329                 }
4330                 if (PL_lex_casemods > 10)
4331                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4332                 PL_lex_casestack[PL_lex_casemods++] = *s;
4333                 PL_lex_casestack[PL_lex_casemods] = '\0';
4334                 PL_lex_state = LEX_INTERPCONCAT;
4335                 start_force(PL_curforce);
4336                 NEXTVAL_NEXTTOKE.ival = 0;
4337                 force_next('(');
4338                 start_force(PL_curforce);
4339                 if (*s == 'l')
4340                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4341                 else if (*s == 'u')
4342                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4343                 else if (*s == 'L')
4344                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4345                 else if (*s == 'U')
4346                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4347                 else if (*s == 'Q')
4348                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4349                 else
4350                     Perl_croak(aTHX_ "panic: yylex");
4351                 if (PL_madskills) {
4352                     SV* const tmpsv = newSVpvs("\\ ");
4353                     /* replace the space with the character we want to escape
4354                      */
4355                     SvPVX(tmpsv)[1] = *s;
4356                     curmad('_', tmpsv);
4357                 }
4358                 PL_bufptr = s + 1;
4359             }
4360             force_next(FUNC);
4361             if (PL_lex_starts) {
4362                 s = PL_bufptr;
4363                 PL_lex_starts = 0;
4364 #ifdef PERL_MAD
4365                 if (PL_madskills) {
4366                     if (PL_thistoken)
4367                         sv_free(PL_thistoken);
4368                     PL_thistoken = newSVpvs("");
4369                 }
4370 #endif
4371                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4372                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4373                     OPERATOR(',');
4374                 else
4375                     Aop(OP_CONCAT);
4376             }
4377             else
4378                 return yylex();
4379         }
4380
4381     case LEX_INTERPPUSH:
4382         return REPORT(sublex_push());
4383
4384     case LEX_INTERPSTART:
4385         if (PL_bufptr == PL_bufend)
4386             return REPORT(sublex_done());
4387         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4388               "### Interpolated variable\n"); });
4389         PL_expect = XTERM;
4390         PL_lex_dojoin = (*PL_bufptr == '@');
4391         PL_lex_state = LEX_INTERPNORMAL;
4392         if (PL_lex_dojoin) {
4393             start_force(PL_curforce);
4394             NEXTVAL_NEXTTOKE.ival = 0;
4395             force_next(',');
4396             start_force(PL_curforce);
4397             force_ident("\"", '$');
4398             start_force(PL_curforce);
4399             NEXTVAL_NEXTTOKE.ival = 0;
4400             force_next('$');
4401             start_force(PL_curforce);
4402             NEXTVAL_NEXTTOKE.ival = 0;
4403             force_next('(');
4404             start_force(PL_curforce);
4405             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4406             force_next(FUNC);
4407         }
4408         if (PL_lex_starts++) {
4409             s = PL_bufptr;
4410 #ifdef PERL_MAD
4411             if (PL_madskills) {
4412                 if (PL_thistoken)
4413                     sv_free(PL_thistoken);
4414                 PL_thistoken = newSVpvs("");
4415             }
4416 #endif
4417             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4418             if (!PL_lex_casemods && PL_lex_inpat)
4419                 OPERATOR(',');
4420             else
4421                 Aop(OP_CONCAT);
4422         }
4423         return yylex();
4424
4425     case LEX_INTERPENDMAYBE:
4426         if (intuit_more(PL_bufptr)) {
4427             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4428             break;
4429         }
4430         /* FALL THROUGH */
4431
4432     case LEX_INTERPEND:
4433         if (PL_lex_dojoin) {
4434             PL_lex_dojoin = FALSE;
4435             PL_lex_state = LEX_INTERPCONCAT;
4436 #ifdef PERL_MAD
4437             if (PL_madskills) {
4438                 if (PL_thistoken)
4439                     sv_free(PL_thistoken);
4440                 PL_thistoken = newSVpvs("");
4441             }
4442 #endif
4443             return REPORT(')');
4444         }
4445         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4446             && SvEVALED(PL_lex_repl))
4447         {
4448             if (PL_bufptr != PL_bufend)
4449                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4450             PL_lex_repl = NULL;
4451         }
4452         /* FALLTHROUGH */
4453     case LEX_INTERPCONCAT:
4454 #ifdef DEBUGGING
4455         if (PL_lex_brackets)
4456             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4457 #endif
4458         if (PL_bufptr == PL_bufend)
4459             return REPORT(sublex_done());
4460
4461         if (SvIVX(PL_linestr) == '\'') {
4462             SV *sv = newSVsv(PL_linestr);
4463             if (!PL_lex_inpat)
4464                 sv = tokeq(sv);
4465             else if ( PL_hints & HINT_NEW_RE )
4466                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4467             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4468             s = PL_bufend;
4469         }
4470         else {
4471             s = scan_const(PL_bufptr);
4472             if (*s == '\\')
4473                 PL_lex_state = LEX_INTERPCASEMOD;
4474             else
4475                 PL_lex_state = LEX_INTERPSTART;
4476         }
4477
4478         if (s != PL_bufptr) {
4479             start_force(PL_curforce);
4480             if (PL_madskills) {
4481                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4482             }
4483             NEXTVAL_NEXTTOKE = pl_yylval;
4484             PL_expect = XTERM;
4485             force_next(THING);
4486             if (PL_lex_starts++) {
4487 #ifdef PERL_MAD
4488                 if (PL_madskills) {
4489                     if (PL_thistoken)
4490                         sv_free(PL_thistoken);
4491                     PL_thistoken = newSVpvs("");
4492                 }
4493 #endif
4494                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4495                 if (!PL_lex_casemods && PL_lex_inpat)
4496                     OPERATOR(',');
4497                 else
4498                     Aop(OP_CONCAT);
4499             }
4500             else {
4501                 PL_bufptr = s;
4502                 return yylex();
4503             }
4504         }
4505
4506         return yylex();
4507     case LEX_FORMLINE:
4508         PL_lex_state = LEX_NORMAL;
4509         s = scan_formline(PL_bufptr);
4510         if (!PL_lex_formbrack)
4511             goto rightbracket;
4512         OPERATOR(';');
4513     }
4514
4515     s = PL_bufptr;
4516     PL_oldoldbufptr = PL_oldbufptr;
4517     PL_oldbufptr = s;
4518
4519   retry:
4520 #ifdef PERL_MAD
4521     if (PL_thistoken) {
4522         sv_free(PL_thistoken);
4523         PL_thistoken = 0;
4524     }
4525     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4526 #endif
4527     switch (*s) {
4528     default:
4529         if (isIDFIRST_lazy_if(s,UTF))
4530             goto keylookup;
4531         {
4532         unsigned char c = *s;
4533         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4534         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4535             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4536         } else {
4537             d = PL_linestart;
4538         }
4539         *s = '\0';
4540         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4541     }
4542     case 4:
4543     case 26:
4544         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4545     case 0:
4546 #ifdef PERL_MAD
4547         if (PL_madskills)
4548             PL_faketokens = 0;
4549 #endif
4550         if (!PL_rsfp) {
4551             PL_last_uni = 0;
4552             PL_last_lop = 0;
4553             if (PL_lex_brackets) {
4554                 yyerror((const char *)
4555                         (PL_lex_formbrack
4556                          ? "Format not terminated"
4557                          : "Missing right curly or square bracket"));
4558             }
4559             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4560                         "### Tokener got EOF\n");
4561             } );
4562             TOKEN(0);
4563         }
4564         if (s++ < PL_bufend)
4565             goto retry;                 /* ignore stray nulls */
4566         PL_last_uni = 0;
4567         PL_last_lop = 0;
4568         if (!PL_in_eval && !PL_preambled) {
4569             PL_preambled = TRUE;
4570 #ifdef PERL_MAD
4571             if (PL_madskills)
4572                 PL_faketokens = 1;
4573 #endif
4574             if (PL_perldb) {
4575                 /* Generate a string of Perl code to load the debugger.
4576                  * If PERL5DB is set, it will return the contents of that,
4577                  * otherwise a compile-time require of perl5db.pl.  */
4578
4579                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4580
4581                 if (pdb) {
4582                     sv_setpv(PL_linestr, pdb);
4583                     sv_catpvs(PL_linestr,";");
4584                 } else {
4585                     SETERRNO(0,SS_NORMAL);
4586                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4587                 }
4588             } else
4589                 sv_setpvs(PL_linestr,"");
4590             if (PL_preambleav) {
4591                 SV **svp = AvARRAY(PL_preambleav);
4592                 SV **const end = svp + AvFILLp(PL_preambleav);
4593                 while(svp <= end) {
4594                     sv_catsv(PL_linestr, *svp);
4595                     ++svp;
4596                     sv_catpvs(PL_linestr, ";");
4597                 }
4598                 sv_free(MUTABLE_SV(PL_preambleav));
4599                 PL_preambleav = NULL;
4600             }
4601             if (PL_minus_E)
4602                 sv_catpvs(PL_linestr,
4603                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4604             if (PL_minus_n || PL_minus_p) {
4605                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4606                 if (PL_minus_l)
4607                     sv_catpvs(PL_linestr,"chomp;");
4608                 if (PL_minus_a) {
4609                     if (PL_minus_F) {
4610                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4611                              || *PL_splitstr == '"')
4612                               && strchr(PL_splitstr + 1, *PL_splitstr))
4613                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4614                         else {
4615                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4616                                bytes can be used as quoting characters.  :-) */
4617                             const char *splits = PL_splitstr;
4618                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4619                             do {
4620                                 /* Need to \ \s  */
4621                                 if (*splits == '\\')
4622                                     sv_catpvn(PL_linestr, splits, 1);
4623                                 sv_catpvn(PL_linestr, splits, 1);
4624                             } while (*splits++);
4625                             /* This loop will embed the trailing NUL of
4626                                PL_linestr as the last thing it does before
4627                                terminating.  */
4628                             sv_catpvs(PL_linestr, ");");
4629                         }
4630                     }
4631                     else
4632                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4633                 }
4634             }
4635             sv_catpvs(PL_linestr, "\n");
4636             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4637             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4638             PL_last_lop = PL_last_uni = NULL;
4639             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4640                 update_debugger_info(PL_linestr, NULL, 0);
4641             goto retry;
4642         }
4643         do {
4644             fake_eof = 0;
4645             bof = PL_rsfp ? TRUE : FALSE;
4646             if (0) {
4647               fake_eof:
4648                 fake_eof = LEX_FAKE_EOF;
4649             }
4650             PL_bufptr = PL_bufend;
4651             CopLINE_inc(PL_curcop);
4652             if (!lex_next_chunk(fake_eof)) {
4653                 CopLINE_dec(PL_curcop);
4654                 s = PL_bufptr;
4655                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4656             }
4657             CopLINE_dec(PL_curcop);
4658 #ifdef PERL_MAD
4659             if (!PL_rsfp)
4660                 PL_realtokenstart = -1;
4661 #endif
4662             s = PL_bufptr;
4663             /* If it looks like the start of a BOM or raw UTF-16,
4664              * check if it in fact is. */
4665             if (bof && PL_rsfp &&
4666                      (*s == 0 ||
4667                       *(U8*)s == 0xEF ||
4668                       *(U8*)s >= 0xFE ||
4669                       s[1] == 0)) {
4670                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4671                 if (bof) {
4672                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4673                     s = swallow_bom((U8*)s);
4674                 }
4675             }
4676             if (PL_doextract) {
4677                 /* Incest with pod. */
4678 #ifdef PERL_MAD
4679                 if (PL_madskills)
4680                     sv_catsv(PL_thiswhite, PL_linestr);
4681 #endif
4682                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4683                     sv_setpvs(PL_linestr, "");
4684                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4685                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4686                     PL_last_lop = PL_last_uni = NULL;
4687                     PL_doextract = FALSE;
4688                 }
4689             }
4690             if (PL_rsfp)
4691                 incline(s);
4692         } while (PL_doextract);
4693         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4694         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4695         PL_last_lop = PL_last_uni = NULL;
4696         if (CopLINE(PL_curcop) == 1) {
4697             while (s < PL_bufend && isSPACE(*s))
4698                 s++;
4699             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4700                 s++;
4701 #ifdef PERL_MAD
4702             if (PL_madskills)
4703                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4704 #endif
4705             d = NULL;
4706             if (!PL_in_eval) {
4707                 if (*s == '#' && *(s+1) == '!')
4708                     d = s + 2;
4709 #ifdef ALTERNATE_SHEBANG
4710                 else {
4711                     static char const as[] = ALTERNATE_SHEBANG;
4712                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4713                         d = s + (sizeof(as) - 1);
4714                 }
4715 #endif /* ALTERNATE_SHEBANG */
4716             }
4717             if (d) {
4718                 char *ipath;
4719                 char *ipathend;
4720
4721                 while (isSPACE(*d))
4722                     d++;
4723                 ipath = d;
4724                 while (*d && !isSPACE(*d))
4725                     d++;
4726                 ipathend = d;
4727
4728 #ifdef ARG_ZERO_IS_SCRIPT
4729                 if (ipathend > ipath) {
4730                     /*
4731                      * HP-UX (at least) sets argv[0] to the script name,
4732                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4733                      * at least, set argv[0] to the basename of the Perl
4734                      * interpreter. So, having found "#!", we'll set it right.
4735                      */
4736                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4737                                                     SVt_PV)); /* $^X */
4738                     assert(SvPOK(x) || SvGMAGICAL(x));
4739                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4740                         sv_setpvn(x, ipath, ipathend - ipath);
4741                         SvSETMAGIC(x);
4742                     }
4743                     else {
4744                         STRLEN blen;
4745                         STRLEN llen;
4746                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4747                         const char * const lstart = SvPV_const(x,llen);
4748                         if (llen < blen) {
4749                             bstart += blen - llen;
4750                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4751                                 sv_setpvn(x, ipath, ipathend - ipath);
4752                                 SvSETMAGIC(x);
4753                             }
4754                         }
4755                     }
4756                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4757                 }
4758 #endif /* ARG_ZERO_IS_SCRIPT */
4759
4760                 /*
4761                  * Look for options.
4762                  */
4763                 d = instr(s,"perl -");
4764                 if (!d) {
4765                     d = instr(s,"perl");
4766 #if defined(DOSISH)
4767                     /* avoid getting into infinite loops when shebang
4768                      * line contains "Perl" rather than "perl" */
4769                     if (!d) {
4770                         for (d = ipathend-4; d >= ipath; --d) {
4771                             if ((*d == 'p' || *d == 'P')
4772                                 && !ibcmp(d, "perl", 4))
4773                             {
4774                                 break;
4775                             }
4776                         }
4777                         if (d < ipath)
4778                             d = NULL;
4779                     }
4780 #endif
4781                 }
4782 #ifdef ALTERNATE_SHEBANG
4783                 /*
4784                  * If the ALTERNATE_SHEBANG on this system starts with a
4785                  * character that can be part of a Perl expression, then if
4786                  * we see it but not "perl", we're probably looking at the
4787                  * start of Perl code, not a request to hand off to some
4788                  * other interpreter.  Similarly, if "perl" is there, but
4789                  * not in the first 'word' of the line, we assume the line
4790                  * contains the start of the Perl program.
4791                  */
4792                 if (d && *s != '#') {
4793                     const char *c = ipath;
4794                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4795                         c++;
4796                     if (c < d)
4797                         d = NULL;       /* "perl" not in first word; ignore */
4798                     else
4799                         *s = '#';       /* Don't try to parse shebang line */
4800                 }
4801 #endif /* ALTERNATE_SHEBANG */
4802                 if (!d &&
4803                     *s == '#' &&
4804                     ipathend > ipath &&
4805                     !PL_minus_c &&
4806                     !instr(s,"indir") &&
4807                     instr(PL_origargv[0],"perl"))
4808                 {
4809                     dVAR;
4810                     char **newargv;
4811
4812                     *ipathend = '\0';
4813                     s = ipathend + 1;
4814                     while (s < PL_bufend && isSPACE(*s))
4815                         s++;
4816                     if (s < PL_bufend) {
4817                         Newx(newargv,PL_origargc+3,char*);
4818                         newargv[1] = s;
4819                         while (s < PL_bufend && !isSPACE(*s))
4820                             s++;
4821                         *s = '\0';
4822                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4823                     }
4824                     else
4825                         newargv = PL_origargv;
4826                     newargv[0] = ipath;
4827                     PERL_FPU_PRE_EXEC
4828                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4829                     PERL_FPU_POST_EXEC
4830                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4831                 }
4832                 if (d) {
4833                     while (*d && !isSPACE(*d))
4834                         d++;
4835                     while (SPACE_OR_TAB(*d))
4836                         d++;
4837
4838                     if (*d++ == '-') {
4839                         const bool switches_done = PL_doswitches;
4840                         const U32 oldpdb = PL_perldb;
4841                         const bool oldn = PL_minus_n;
4842                         const bool oldp = PL_minus_p;
4843                         const char *d1 = d;
4844
4845                         do {
4846                             bool baduni = FALSE;
4847                             if (*d1 == 'C') {
4848                                 const char *d2 = d1 + 1;
4849                                 if (parse_unicode_opts((const char **)&d2)
4850                                     != PL_unicode)
4851                                     baduni = TRUE;
4852                             }
4853                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4854                                 const char * const m = d1;
4855                                 while (*d1 && !isSPACE(*d1))
4856                                     d1++;
4857                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4858                                       (int)(d1 - m), m);
4859                             }
4860                             d1 = moreswitches(d1);
4861                         } while (d1);
4862                         if (PL_doswitches && !switches_done) {
4863                             int argc = PL_origargc;
4864                             char **argv = PL_origargv;
4865                             do {
4866                                 argc--,argv++;
4867                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4868                             init_argv_symbols(argc,argv);
4869                         }
4870                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4871                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4872                               /* if we have already added "LINE: while (<>) {",
4873                                  we must not do it again */
4874                         {
4875                             sv_setpvs(PL_linestr, "");
4876                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4877                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4878                             PL_last_lop = PL_last_uni = NULL;
4879                             PL_preambled = FALSE;
4880                             if (PERLDB_LINE || PERLDB_SAVESRC)
4881                                 (void)gv_fetchfile(PL_origfilename);
4882                             goto retry;
4883                         }
4884                     }
4885                 }
4886             }
4887         }
4888         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4889             PL_bufptr = s;
4890             PL_lex_state = LEX_FORMLINE;
4891             return yylex();
4892         }
4893         goto retry;
4894     case '\r':
4895 #ifdef PERL_STRICT_CR
4896         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4897         Perl_croak(aTHX_
4898       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4899 #endif
4900     case ' ': case '\t': case '\f': case 013:
4901 #ifdef PERL_MAD
4902         PL_realtokenstart = -1;
4903         if (!PL_thiswhite)
4904             PL_thiswhite = newSVpvs("");
4905         sv_catpvn(PL_thiswhite, s, 1);
4906 #endif
4907         s++;
4908         goto retry;
4909     case '#':
4910     case '\n':
4911 #ifdef PERL_MAD
4912         PL_realtokenstart = -1;
4913         if (PL_madskills)
4914             PL_faketokens = 0;
4915 #endif
4916         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4917             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4918                 /* handle eval qq[#line 1 "foo"\n ...] */
4919                 CopLINE_dec(PL_curcop);
4920                 incline(s);
4921             }
4922             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4923                 s = SKIPSPACE0(s);
4924                 if (!PL_in_eval || PL_rsfp)
4925                     incline(s);
4926             }
4927             else {
4928                 d = s;
4929                 while (d < PL_bufend && *d != '\n')
4930                     d++;
4931                 if (d < PL_bufend)
4932                     d++;
4933                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4934                   Perl_croak(aTHX_ "panic: input overflow");
4935 #ifdef PERL_MAD
4936                 if (PL_madskills)
4937                     PL_thiswhite = newSVpvn(s, d - s);
4938 #endif
4939                 s = d;
4940                 incline(s);
4941             }
4942             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4943                 PL_bufptr = s;
4944                 PL_lex_state = LEX_FORMLINE;
4945                 return yylex();
4946             }
4947         }
4948         else {
4949 #ifdef PERL_MAD
4950             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4951                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4952                     PL_faketokens = 0;
4953                     s = SKIPSPACE0(s);
4954                     TOKEN(PEG); /* make sure any #! line is accessible */
4955                 }
4956                 s = SKIPSPACE0(s);
4957             }
4958             else {
4959 /*              if (PL_madskills && PL_lex_formbrack) { */
4960                     d = s;
4961                     while (d < PL_bufend && *d != '\n')
4962                         d++;
4963                     if (d < PL_bufend)
4964                         d++;
4965                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4966                       Perl_croak(aTHX_ "panic: input overflow");
4967                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4968                         if (!PL_thiswhite)
4969                             PL_thiswhite = newSVpvs("");
4970                         if (CopLINE(PL_curcop) == 1) {
4971                             sv_setpvs(PL_thiswhite, "");
4972                             PL_faketokens = 0;
4973                         }
4974                         sv_catpvn(PL_thiswhite, s, d - s);
4975                     }
4976                     s = d;
4977 /*              }
4978                 *s = '\0';
4979                 PL_bufend = s; */
4980             }
4981 #else
4982             *s = '\0';
4983             PL_bufend = s;
4984 #endif
4985         }
4986         goto retry;
4987     case '-':
4988         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4989             I32 ftst = 0;
4990             char tmp;
4991
4992             s++;
4993             PL_bufptr = s;
4994             tmp = *s++;
4995
4996             while (s < PL_bufend && SPACE_OR_TAB(*s))
4997                 s++;
4998
4999             if (strnEQ(s,"=>",2)) {
5000                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5001                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5002                 OPERATOR('-');          /* unary minus */
5003             }
5004             PL_last_uni = PL_oldbufptr;
5005             switch (tmp) {
5006             case 'r': ftst = OP_FTEREAD;        break;
5007             case 'w': ftst = OP_FTEWRITE;       break;
5008             case 'x': ftst = OP_FTEEXEC;        break;
5009             case 'o': ftst = OP_FTEOWNED;       break;
5010             case 'R': ftst = OP_FTRREAD;        break;
5011             case 'W': ftst = OP_FTRWRITE;       break;
5012             case 'X': ftst = OP_FTREXEC;        break;
5013             case 'O': ftst = OP_FTROWNED;       break;
5014             case 'e': ftst = OP_FTIS;           break;
5015             case 'z': ftst = OP_FTZERO;         break;
5016             case 's': ftst = OP_FTSIZE;         break;
5017             case 'f': ftst = OP_FTFILE;         break;
5018             case 'd': ftst = OP_FTDIR;          break;
5019             case 'l': ftst = OP_FTLINK;         break;
5020             case 'p': ftst = OP_FTPIPE;         break;
5021             case 'S': ftst = OP_FTSOCK;         break;
5022             case 'u': ftst = OP_FTSUID;         break;
5023             case 'g': ftst = OP_FTSGID;         break;
5024             case 'k': ftst = OP_FTSVTX;         break;
5025             case 'b': ftst = OP_FTBLK;          break;
5026             case 'c': ftst = OP_FTCHR;          break;
5027             case 't': ftst = OP_FTTTY;          break;
5028             case 'T': ftst = OP_FTTEXT;         break;
5029             case 'B': ftst = OP_FTBINARY;       break;
5030             case 'M': case 'A': case 'C':
5031                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5032                 switch (tmp) {
5033                 case 'M': ftst = OP_FTMTIME;    break;
5034                 case 'A': ftst = OP_FTATIME;    break;
5035                 case 'C': ftst = OP_FTCTIME;    break;
5036                 default:                        break;
5037                 }
5038                 break;
5039             default:
5040                 break;
5041             }
5042             if (ftst) {
5043                 PL_last_lop_op = (OPCODE)ftst;
5044                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5045                         "### Saw file test %c\n", (int)tmp);
5046                 } );
5047                 FTST(ftst);
5048             }
5049             else {
5050                 /* Assume it was a minus followed by a one-letter named
5051                  * subroutine call (or a -bareword), then. */
5052                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5053                         "### '-%c' looked like a file test but was not\n",
5054                         (int) tmp);
5055                 } );
5056                 s = --PL_bufptr;
5057             }
5058         }
5059         {
5060             const char tmp = *s++;
5061             if (*s == tmp) {
5062                 s++;
5063                 if (PL_expect == XOPERATOR)
5064                     TERM(POSTDEC);
5065                 else
5066                     OPERATOR(PREDEC);
5067             }
5068             else if (*s == '>') {
5069                 s++;
5070                 s = SKIPSPACE1(s);
5071                 if (isIDFIRST_lazy_if(s,UTF)) {
5072                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5073                     TOKEN(ARROW);
5074                 }
5075                 else if (*s == '$')
5076                     OPERATOR(ARROW);
5077                 else
5078                     TERM(ARROW);
5079             }
5080             if (PL_expect == XOPERATOR)
5081                 Aop(OP_SUBTRACT);
5082             else {
5083                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5084                     check_uni();
5085                 OPERATOR('-');          /* unary minus */
5086             }
5087         }
5088
5089     case '+':
5090         {
5091             const char tmp = *s++;
5092             if (*s == tmp) {
5093                 s++;
5094                 if (PL_expect == XOPERATOR)
5095                     TERM(POSTINC);
5096                 else
5097                     OPERATOR(PREINC);
5098             }
5099             if (PL_expect == XOPERATOR)
5100                 Aop(OP_ADD);
5101             else {
5102                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5103                     check_uni();
5104                 OPERATOR('+');
5105             }
5106         }
5107
5108     case '*':
5109         if (PL_expect != XOPERATOR) {
5110             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5111             PL_expect = XOPERATOR;
5112             force_ident(PL_tokenbuf, '*');
5113             if (!*PL_tokenbuf)
5114                 PREREF('*');
5115             TERM('*');
5116         }
5117         s++;
5118         if (*s == '*') {
5119             s++;
5120             PWop(OP_POW);
5121         }
5122         Mop(OP_MULTIPLY);
5123
5124     case '%':
5125         if (PL_expect == XOPERATOR) {
5126             ++s;
5127             Mop(OP_MODULO);
5128         }
5129         PL_tokenbuf[0] = '%';
5130         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5131                 sizeof PL_tokenbuf - 1, FALSE);
5132         if (!PL_tokenbuf[1]) {
5133             PREREF('%');
5134         }
5135         PL_pending_ident = '%';
5136         TERM('%');
5137
5138     case '^':
5139         s++;
5140         BOop(OP_BIT_XOR);
5141     case '[':
5142         PL_lex_brackets++;
5143         {
5144             const char tmp = *s++;
5145             OPERATOR(tmp);
5146         }
5147     case '~':
5148         if (s[1] == '~'
5149             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5150         {
5151             s += 2;
5152             Eop(OP_SMARTMATCH);
5153         }
5154     case ',':
5155         {
5156             const char tmp = *s++;
5157             OPERATOR(tmp);
5158         }
5159     case ':':
5160         if (s[1] == ':') {
5161             len = 0;
5162             goto just_a_word_zero_gv;
5163         }
5164         s++;
5165         switch (PL_expect) {
5166             OP *attrs;
5167 #ifdef PERL_MAD
5168             I32 stuffstart;
5169 #endif
5170         case XOPERATOR:
5171             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5172                 break;
5173             PL_bufptr = s;      /* update in case we back off */
5174             if (*s == '=') {
5175                 deprecate(":= for an empty attribute list");
5176             }
5177             goto grabattrs;
5178         case XATTRBLOCK:
5179             PL_expect = XBLOCK;
5180             goto grabattrs;
5181         case XATTRTERM:
5182             PL_expect = XTERMBLOCK;
5183          grabattrs:
5184 #ifdef PERL_MAD
5185             stuffstart = s - SvPVX(PL_linestr) - 1;
5186 #endif
5187             s = PEEKSPACE(s);
5188             attrs = NULL;
5189             while (isIDFIRST_lazy_if(s,UTF)) {
5190                 I32 tmp;
5191                 SV *sv;
5192                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5193                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5194                     if (tmp < 0) tmp = -tmp;
5195                     switch (tmp) {
5196                     case KEY_or:
5197                     case KEY_and:
5198                     case KEY_for:
5199                     case KEY_foreach:
5200                     case KEY_unless:
5201                     case KEY_if:
5202                     case KEY_while:
5203                     case KEY_until:
5204                         goto got_attrs;
5205                     default:
5206                         break;
5207                     }
5208                 }
5209                 sv = newSVpvn(s, len);
5210                 if (*d == '(') {
5211                     d = scan_str(d,TRUE,TRUE);
5212                     if (!d) {
5213                         /* MUST advance bufptr here to avoid bogus
5214                            "at end of line" context messages from yyerror().
5215                          */
5216                         PL_bufptr = s + len;
5217                         yyerror("Unterminated attribute parameter in attribute list");
5218                         if (attrs)
5219                             op_free(attrs);
5220                         sv_free(sv);
5221                         return REPORT(0);       /* EOF indicator */
5222                     }
5223                 }
5224                 if (PL_lex_stuff) {
5225                     sv_catsv(sv, PL_lex_stuff);
5226                     attrs = append_elem(OP_LIST, attrs,
5227                                         newSVOP(OP_CONST, 0, sv));
5228                     SvREFCNT_dec(PL_lex_stuff);
5229                     PL_lex_stuff = NULL;
5230                 }
5231                 else {
5232                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5233                         sv_free(sv);
5234                         if (PL_in_my == KEY_our) {
5235                             deprecate(":unique");
5236                         }
5237                         else
5238                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5239                     }
5240
5241                     /* NOTE: any CV attrs applied here need to be part of
5242                        the CVf_BUILTIN_ATTRS define in cv.h! */
5243                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5244                         sv_free(sv);
5245                         CvLVALUE_on(PL_compcv);
5246                     }
5247                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5248                         sv_free(sv);
5249                         deprecate(":locked");
5250                     }
5251                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5252                         sv_free(sv);
5253                         CvMETHOD_on(PL_compcv);
5254                     }
5255                     /* After we've set the flags, it could be argued that
5256                        we don't need to do the attributes.pm-based setting
5257                        process, and shouldn't bother appending recognized
5258                        flags.  To experiment with that, uncomment the
5259                        following "else".  (Note that's already been
5260                        uncommented.  That keeps the above-applied built-in
5261                        attributes from being intercepted (and possibly
5262                        rejected) by a package's attribute routines, but is
5263                        justified by the performance win for the common case
5264                        of applying only built-in attributes.) */
5265                     else
5266                         attrs = append_elem(OP_LIST, attrs,
5267                                             newSVOP(OP_CONST, 0,
5268                                                     sv));
5269                 }
5270                 s = PEEKSPACE(d);
5271                 if (*s == ':' && s[1] != ':')
5272                     s = PEEKSPACE(s+1);
5273                 else if (s == d)
5274                     break;      /* require real whitespace or :'s */
5275                 /* XXX losing whitespace on sequential attributes here */
5276             }
5277             {
5278                 const char tmp
5279                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5280                 if (*s != ';' && *s != '}' && *s != tmp
5281                     && (tmp != '=' || *s != ')')) {
5282                     const char q = ((*s == '\'') ? '"' : '\'');
5283                     /* If here for an expression, and parsed no attrs, back
5284                        off. */
5285                     if (tmp == '=' && !attrs) {
5286                         s = PL_bufptr;
5287                         break;
5288                     }
5289                     /* MUST advance bufptr here to avoid bogus "at end of line"
5290                        context messages from yyerror().
5291                     */
5292                     PL_bufptr = s;
5293                     yyerror( (const char *)
5294                              (*s
5295                               ? Perl_form(aTHX_ "Invalid separator character "
5296                                           "%c%c%c in attribute list", q, *s, q)
5297                               : "Unterminated attribute list" ) );
5298                     if (attrs)
5299                         op_free(attrs);
5300                     OPERATOR(':');
5301                 }
5302             }
5303         got_attrs:
5304             if (attrs) {
5305                 start_force(PL_curforce);
5306                 NEXTVAL_NEXTTOKE.opval = attrs;
5307                 CURMAD('_', PL_nextwhite);
5308                 force_next(THING);
5309             }
5310 #ifdef PERL_MAD
5311             if (PL_madskills) {
5312                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5313                                      (s - SvPVX(PL_linestr)) - stuffstart);
5314             }
5315 #endif
5316             TOKEN(COLONATTR);
5317         }
5318         OPERATOR(':');
5319     case '(':
5320         s++;
5321         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5322             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5323         else
5324             PL_expect = XTERM;
5325         s = SKIPSPACE1(s);
5326         TOKEN('(');
5327     case ';':
5328         CLINE;
5329         {
5330             const char tmp = *s++;
5331             OPERATOR(tmp);
5332         }
5333     case ')':
5334         {
5335             const char tmp = *s++;
5336             s = SKIPSPACE1(s);
5337             if (*s == '{')
5338                 PREBLOCK(tmp);
5339             TERM(tmp);
5340         }
5341     case ']':
5342         s++;
5343         if (PL_lex_brackets <= 0)
5344             yyerror("Unmatched right square bracket");
5345         else
5346             --PL_lex_brackets;
5347         if (PL_lex_state == LEX_INTERPNORMAL) {
5348             if (PL_lex_brackets == 0) {
5349                 if (*s == '-' && s[1] == '>')
5350                     PL_lex_state = LEX_INTERPENDMAYBE;
5351                 else if (*s != '[' && *s != '{')
5352                     PL_lex_state = LEX_INTERPEND;
5353             }
5354         }
5355         TERM(']');
5356     case '{':
5357       leftbracket:
5358         s++;
5359         if (PL_lex_brackets > 100) {
5360             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5361         }
5362         switch (PL_expect) {
5363         case XTERM:
5364             if (PL_lex_formbrack) {
5365                 s--;
5366                 PRETERMBLOCK(DO);
5367             }
5368             if (PL_oldoldbufptr == PL_last_lop)
5369                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5370             else
5371                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5372             OPERATOR(HASHBRACK);
5373         case XOPERATOR:
5374             while (s < PL_bufend && SPACE_OR_TAB(*s))
5375                 s++;
5376             d = s;
5377             PL_tokenbuf[0] = '\0';
5378             if (d < PL_bufend && *d == '-') {
5379                 PL_tokenbuf[0] = '-';
5380                 d++;
5381                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5382                     d++;
5383             }
5384             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5385                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5386                               FALSE, &len);
5387                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5388                     d++;
5389                 if (*d == '}') {
5390                     const char minus = (PL_tokenbuf[0] == '-');
5391                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5392                     if (minus)
5393                         force_next('-');
5394                 }
5395             }
5396             /* FALL THROUGH */
5397         case XATTRBLOCK:
5398         case XBLOCK:
5399             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5400             PL_expect = XSTATE;
5401             break;
5402         case XATTRTERM:
5403         case XTERMBLOCK:
5404             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5405             PL_expect = XSTATE;
5406             break;
5407         default: {
5408                 const char *t;
5409                 if (PL_oldoldbufptr == PL_last_lop)
5410                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5411                 else
5412                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5413                 s = SKIPSPACE1(s);
5414                 if (*s == '}') {
5415                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5416                         PL_expect = XTERM;
5417                         /* This hack is to get the ${} in the message. */
5418                         PL_bufptr = s+1;
5419                         yyerror("syntax error");
5420                         break;
5421                     }
5422                     OPERATOR(HASHBRACK);
5423                 }
5424                 /* This hack serves to disambiguate a pair of curlies
5425                  * as being a block or an anon hash.  Normally, expectation
5426                  * determines that, but in cases where we're not in a
5427                  * position to expect anything in particular (like inside
5428                  * eval"") we have to resolve the ambiguity.  This code
5429                  * covers the case where the first term in the curlies is a
5430                  * quoted string.  Most other cases need to be explicitly
5431                  * disambiguated by prepending a "+" before the opening
5432                  * curly in order to force resolution as an anon hash.
5433                  *
5434                  * XXX should probably propagate the outer expectation
5435                  * into eval"" to rely less on this hack, but that could
5436                  * potentially break current behavior of eval"".
5437                  * GSAR 97-07-21
5438                  */
5439                 t = s;
5440                 if (*s == '\'' || *s == '"' || *s == '`') {
5441                     /* common case: get past first string, handling escapes */
5442                     for (t++; t < PL_bufend && *t != *s;)
5443                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5444                             t++;
5445                     t++;
5446                 }
5447                 else if (*s == 'q') {
5448                     if (++t < PL_bufend
5449                         && (!isALNUM(*t)
5450                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5451                                 && !isALNUM(*t))))
5452                     {
5453                         /* skip q//-like construct */
5454                         const char *tmps;
5455                         char open, close, term;
5456                         I32 brackets = 1;
5457
5458                         while (t < PL_bufend && isSPACE(*t))
5459                             t++;
5460                         /* check for q => */
5461                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5462                             OPERATOR(HASHBRACK);
5463                         }
5464                         term = *t;
5465                         open = term;
5466                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5467                             term = tmps[5];
5468                         close = term;
5469                         if (open == close)
5470                             for (t++; t < PL_bufend; t++) {
5471                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5472                                     t++;
5473                                 else if (*t == open)
5474                                     break;
5475                             }
5476                         else {
5477                             for (t++; t < PL_bufend; t++) {
5478                                 if (*t == '\\' && t+1 < PL_bufend)
5479                                     t++;
5480                                 else if (*t == close && --brackets <= 0)
5481                                     break;
5482                                 else if (*t == open)
5483                                     brackets++;
5484                             }
5485                         }
5486                         t++;
5487                     }
5488                     else
5489                         /* skip plain q word */
5490                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5491                              t += UTF8SKIP(t);
5492                 }
5493                 else if (isALNUM_lazy_if(t,UTF)) {
5494                     t += UTF8SKIP(t);
5495                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5496                          t += UTF8SKIP(t);
5497                 }
5498                 while (t < PL_bufend && isSPACE(*t))
5499                     t++;
5500                 /* if comma follows first term, call it an anon hash */
5501                 /* XXX it could be a comma expression with loop modifiers */
5502                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5503                                    || (*t == '=' && t[1] == '>')))
5504                     OPERATOR(HASHBRACK);
5505                 if (PL_expect == XREF)
5506                     PL_expect = XTERM;
5507                 else {
5508                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5509                     PL_expect = XSTATE;
5510                 }
5511             }
5512             break;
5513         }
5514         pl_yylval.ival = CopLINE(PL_curcop);
5515         if (isSPACE(*s) || *s == '#')
5516             PL_copline = NOLINE;   /* invalidate current command line number */
5517         TOKEN('{');
5518     case '}':
5519       rightbracket:
5520         s++;
5521         if (PL_lex_brackets <= 0)
5522             yyerror("Unmatched right curly bracket");
5523         else
5524             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5525         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5526             PL_lex_formbrack = 0;
5527         if (PL_lex_state == LEX_INTERPNORMAL) {
5528             if (PL_lex_brackets == 0) {
5529                 if (PL_expect & XFAKEBRACK) {
5530                     PL_expect &= XENUMMASK;
5531                     PL_lex_state = LEX_INTERPEND;
5532                     PL_bufptr = s;
5533 #if 0
5534                     if (PL_madskills) {
5535                         if (!PL_thiswhite)
5536                             PL_thiswhite = newSVpvs("");
5537                         sv_catpvs(PL_thiswhite,"}");
5538                     }
5539 #endif
5540                     return yylex();     /* ignore fake brackets */
5541                 }
5542                 if (*s == '-' && s[1] == '>')
5543                     PL_lex_state = LEX_INTERPENDMAYBE;
5544                 else if (*s != '[' && *s != '{')
5545                     PL_lex_state = LEX_INTERPEND;
5546             }
5547         }
5548         if (PL_expect & XFAKEBRACK) {
5549             PL_expect &= XENUMMASK;
5550             PL_bufptr = s;
5551             return yylex();             /* ignore fake brackets */
5552         }
5553         start_force(PL_curforce);
5554         if (PL_madskills) {
5555             curmad('X', newSVpvn(s-1,1));
5556             CURMAD('_', PL_thiswhite);
5557         }
5558         force_next('}');
5559 #ifdef PERL_MAD
5560         if (!PL_thistoken)
5561             PL_thistoken = newSVpvs("");
5562 #endif
5563         TOKEN(';');
5564     case '&':
5565         s++;
5566         if (*s++ == '&')
5567             AOPERATOR(ANDAND);
5568         s--;
5569         if (PL_expect == XOPERATOR) {
5570             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5571                 && isIDFIRST_lazy_if(s,UTF))
5572             {
5573                 CopLINE_dec(PL_curcop);
5574                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5575                 CopLINE_inc(PL_curcop);
5576             }
5577             BAop(OP_BIT_AND);
5578         }
5579
5580         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5581         if (*PL_tokenbuf) {
5582             PL_expect = XOPERATOR;
5583             force_ident(PL_tokenbuf, '&');
5584         }
5585         else
5586             PREREF('&');
5587         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5588         TERM('&');
5589
5590     case '|':
5591         s++;
5592         if (*s++ == '|')
5593             AOPERATOR(OROR);
5594         s--;
5595         BOop(OP_BIT_OR);
5596     case '=':
5597         s++;
5598         {
5599             const char tmp = *s++;
5600             if (tmp == '=')
5601                 Eop(OP_EQ);
5602             if (tmp == '>')
5603                 OPERATOR(',');
5604             if (tmp == '~')
5605                 PMop(OP_MATCH);
5606             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5607                 && strchr("+-*/%.^&|<",tmp))
5608                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5609                             "Reversed %c= operator",(int)tmp);
5610             s--;
5611             if (PL_expect == XSTATE && isALPHA(tmp) &&
5612                 (s == PL_linestart+1 || s[-2] == '\n') )
5613                 {
5614                     if (PL_in_eval && !PL_rsfp) {
5615                         d = PL_bufend;
5616                         while (s < d) {
5617                             if (*s++ == '\n') {
5618                                 incline(s);
5619                                 if (strnEQ(s,"=cut",4)) {
5620                                     s = strchr(s,'\n');
5621                                     if (s)
5622                                         s++;
5623                                     else
5624                                         s = d;
5625                                     incline(s);
5626                                     goto retry;
5627                                 }
5628                             }
5629                         }
5630                         goto retry;
5631                     }
5632 #ifdef PERL_MAD
5633                     if (PL_madskills) {
5634                         if (!PL_thiswhite)
5635                             PL_thiswhite = newSVpvs("");
5636                         sv_catpvn(PL_thiswhite, PL_linestart,
5637                                   PL_bufend - PL_linestart);
5638                     }
5639 #endif
5640                     s = PL_bufend;
5641                     PL_doextract = TRUE;
5642                     goto retry;
5643                 }
5644         }
5645         if (PL_lex_brackets < PL_lex_formbrack) {
5646             const char *t = s;
5647 #ifdef PERL_STRICT_CR
5648             while (SPACE_OR_TAB(*t))
5649 #else
5650             while (SPACE_OR_TAB(*t) || *t == '\r')
5651 #endif
5652                 t++;
5653             if (*t == '\n' || *t == '#') {
5654                 s--;
5655                 PL_expect = XBLOCK;
5656                 goto leftbracket;
5657             }
5658         }
5659         pl_yylval.ival = 0;
5660         OPERATOR(ASSIGNOP);
5661     case '!':
5662         s++;
5663         {
5664             const char tmp = *s++;
5665             if (tmp == '=') {
5666                 /* was this !=~ where !~ was meant?
5667                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5668
5669                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5670                     const char *t = s+1;
5671
5672                     while (t < PL_bufend && isSPACE(*t))
5673                         ++t;
5674
5675                     if (*t == '/' || *t == '?' ||
5676                         ((*t == 'm' || *t == 's' || *t == 'y')
5677                          && !isALNUM(t[1])) ||
5678                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5679                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5680                                     "!=~ should be !~");
5681                 }
5682                 Eop(OP_NE);
5683             }
5684             if (tmp == '~')
5685                 PMop(OP_NOT);
5686         }
5687         s--;
5688         OPERATOR('!');
5689     case '<':
5690         if (PL_expect != XOPERATOR) {
5691             if (s[1] != '<' && !strchr(s,'>'))
5692                 check_uni();
5693             if (s[1] == '<')
5694                 s = scan_heredoc(s);
5695             else
5696                 s = scan_inputsymbol(s);
5697             TERM(sublex_start());
5698         }
5699         s++;
5700         {
5701             char tmp = *s++;
5702             if (tmp == '<')
5703                 SHop(OP_LEFT_SHIFT);
5704             if (tmp == '=') {
5705                 tmp = *s++;
5706                 if (tmp == '>')
5707                     Eop(OP_NCMP);
5708                 s--;
5709                 Rop(OP_LE);
5710             }
5711         }
5712         s--;
5713         Rop(OP_LT);
5714     case '>':
5715         s++;
5716         {
5717             const char tmp = *s++;
5718             if (tmp == '>')
5719                 SHop(OP_RIGHT_SHIFT);
5720             else if (tmp == '=')
5721                 Rop(OP_GE);
5722         }
5723         s--;
5724         Rop(OP_GT);
5725
5726     case '$':
5727         CLINE;
5728
5729         if (PL_expect == XOPERATOR) {
5730             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5731                 return deprecate_commaless_var_list();
5732             }
5733         }
5734
5735         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5736             PL_tokenbuf[0] = '@';
5737             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5738                            sizeof PL_tokenbuf - 1, FALSE);
5739             if (PL_expect == XOPERATOR)
5740                 no_op("Array length", s);
5741             if (!PL_tokenbuf[1])
5742                 PREREF(DOLSHARP);
5743             PL_expect = XOPERATOR;
5744             PL_pending_ident = '#';
5745             TOKEN(DOLSHARP);
5746         }
5747
5748         PL_tokenbuf[0] = '$';
5749         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5750                        sizeof PL_tokenbuf - 1, FALSE);
5751         if (PL_expect == XOPERATOR)
5752             no_op("Scalar", s);
5753         if (!PL_tokenbuf[1]) {
5754             if (s == PL_bufend)
5755                 yyerror("Final $ should be \\$ or $name");
5756             PREREF('$');
5757         }
5758
5759         /* This kludge not intended to be bulletproof. */
5760         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5761             pl_yylval.opval = newSVOP(OP_CONST, 0,
5762                                    newSViv(CopARYBASE_get(&PL_compiling)));
5763             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5764             TERM(THING);
5765         }
5766
5767         d = s;
5768         {
5769             const char tmp = *s;
5770             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5771                 s = SKIPSPACE1(s);
5772
5773             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5774                 && intuit_more(s)) {
5775                 if (*s == '[') {
5776                     PL_tokenbuf[0] = '@';
5777                     if (ckWARN(WARN_SYNTAX)) {
5778                         char *t = s+1;
5779
5780                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5781                             t++;
5782                         if (*t++ == ',') {
5783                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5784                             while (t < PL_bufend && *t != ']')
5785                                 t++;
5786                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5787                                         "Multidimensional syntax %.*s not supported",
5788                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5789                         }
5790                     }
5791                 }
5792                 else if (*s == '{') {
5793                     char *t;
5794                     PL_tokenbuf[0] = '%';
5795                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5796                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5797                         {
5798                             char tmpbuf[sizeof PL_tokenbuf];
5799                             do {
5800                                 t++;
5801                             } while (isSPACE(*t));
5802                             if (isIDFIRST_lazy_if(t,UTF)) {
5803                                 STRLEN len;
5804                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5805                                               &len);
5806                                 while (isSPACE(*t))
5807                                     t++;
5808                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5809                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5810                                                 "You need to quote \"%s\"",
5811                                                 tmpbuf);
5812                             }
5813                         }
5814                 }
5815             }
5816
5817             PL_expect = XOPERATOR;
5818             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5819                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5820                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5821                     PL_expect = XOPERATOR;
5822                 else if (strchr("$@\"'`q", *s))
5823                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5824                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5825                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5826                 else if (isIDFIRST_lazy_if(s,UTF)) {
5827                     char tmpbuf[sizeof PL_tokenbuf];
5828                     int t2;
5829                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5830                     if ((t2 = keyword(tmpbuf, len, 0))) {
5831                         /* binary operators exclude handle interpretations */
5832                         switch (t2) {
5833                         case -KEY_x:
5834                         case -KEY_eq:
5835                         case -KEY_ne:
5836                         case -KEY_gt:
5837                         case -KEY_lt:
5838                         case -KEY_ge:
5839                         case -KEY_le:
5840                         case -KEY_cmp:
5841                             break;
5842                         default:
5843                             PL_expect = XTERM;  /* e.g. print $fh length() */
5844                             break;
5845                         }
5846                     }
5847                     else {
5848                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5849                     }
5850                 }
5851                 else if (isDIGIT(*s))
5852                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5853                 else if (*s == '.' && isDIGIT(s[1]))
5854                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5855                 else if ((*s == '?' || *s == '-' || *s == '+')
5856                          && !isSPACE(s[1]) && s[1] != '=')
5857                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5858                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5859                          && s[1] != '/')
5860                     PL_expect = XTERM;          /* e.g. print $fh /.../
5861                                                    XXX except DORDOR operator
5862                                                 */
5863                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5864                          && s[2] != '=')
5865                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5866             }
5867         }
5868         PL_pending_ident = '$';
5869         TOKEN('$');
5870
5871     case '@':
5872         if (PL_expect == XOPERATOR)
5873             no_op("Array", s);
5874         PL_tokenbuf[0] = '@';
5875         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5876         if (!PL_tokenbuf[1]) {
5877             PREREF('@');
5878         }
5879         if (PL_lex_state == LEX_NORMAL)
5880             s = SKIPSPACE1(s);
5881         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5882             if (*s == '{')
5883                 PL_tokenbuf[0] = '%';
5884
5885             /* Warn about @ where they meant $. */
5886             if (*s == '[' || *s == '{') {
5887                 if (ckWARN(WARN_SYNTAX)) {
5888                     const char *t = s + 1;
5889                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5890                         t++;
5891                     if (*t == '}' || *t == ']') {
5892                         t++;
5893                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5894                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5895                             "Scalar value %.*s better written as $%.*s",
5896                             (int)(t-PL_bufptr), PL_bufptr,
5897                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5898                     }
5899                 }
5900             }
5901         }
5902         PL_pending_ident = '@';
5903         TERM('@');
5904
5905      case '/':                  /* may be division, defined-or, or pattern */
5906         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5907             s += 2;
5908             AOPERATOR(DORDOR);
5909         }
5910      case '?':                  /* may either be conditional or pattern */
5911         if (PL_expect == XOPERATOR) {
5912              char tmp = *s++;
5913              if(tmp == '?') {
5914                 OPERATOR('?');
5915              }
5916              else {
5917                  tmp = *s++;
5918                  if(tmp == '/') {
5919                      /* A // operator. */
5920                     AOPERATOR(DORDOR);
5921                  }
5922                  else {
5923                      s--;
5924                      Mop(OP_DIVIDE);
5925                  }
5926              }
5927          }
5928          else {
5929              /* Disable warning on "study /blah/" */
5930              if (PL_oldoldbufptr == PL_last_uni
5931               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5932                   || memNE(PL_last_uni, "study", 5)
5933                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5934               ))
5935                  check_uni();
5936              s = scan_pat(s,OP_MATCH);
5937              TERM(sublex_start());
5938          }
5939
5940     case '.':
5941         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5942 #ifdef PERL_STRICT_CR
5943             && s[1] == '\n'
5944 #else
5945             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5946 #endif
5947             && (s == PL_linestart || s[-1] == '\n') )
5948         {
5949             PL_lex_formbrack = 0;
5950             PL_expect = XSTATE;
5951             goto rightbracket;
5952         }
5953         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5954             s += 3;
5955             OPERATOR(YADAYADA);
5956         }
5957         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5958             char tmp = *s++;
5959             if (*s == tmp) {
5960                 s++;
5961                 if (*s == tmp) {
5962                     s++;
5963                     pl_yylval.ival = OPf_SPECIAL;
5964                 }
5965                 else
5966                     pl_yylval.ival = 0;
5967                 OPERATOR(DOTDOT);
5968             }
5969             Aop(OP_CONCAT);
5970         }
5971         /* FALL THROUGH */
5972     case '0': case '1': case '2': case '3': case '4':
5973     case '5': case '6': case '7': case '8': case '9':
5974         s = scan_num(s, &pl_yylval);
5975         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5976         if (PL_expect == XOPERATOR)
5977             no_op("Number",s);
5978         TERM(THING);
5979
5980     case '\'':
5981         s = scan_str(s,!!PL_madskills,FALSE);
5982         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5983         if (PL_expect == XOPERATOR) {
5984             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5985                 return deprecate_commaless_var_list();
5986             }
5987             else
5988                 no_op("String",s);
5989         }
5990         if (!s)
5991             missingterm(NULL);
5992         pl_yylval.ival = OP_CONST;
5993         TERM(sublex_start());
5994
5995     case '"':
5996         s = scan_str(s,!!PL_madskills,FALSE);
5997         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5998         if (PL_expect == XOPERATOR) {
5999             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6000                 return deprecate_commaless_var_list();
6001             }
6002             else
6003                 no_op("String",s);
6004         }
6005         if (!s)
6006             missingterm(NULL);
6007         pl_yylval.ival = OP_CONST;
6008         /* FIXME. I think that this can be const if char *d is replaced by
6009            more localised variables.  */
6010         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6011             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6012                 pl_yylval.ival = OP_STRINGIFY;
6013                 break;
6014             }
6015         }
6016         TERM(sublex_start());
6017
6018     case '`':
6019         s = scan_str(s,!!PL_madskills,FALSE);
6020         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6021         if (PL_expect == XOPERATOR)
6022             no_op("Backticks",s);
6023         if (!s)
6024             missingterm(NULL);
6025         readpipe_override();
6026         TERM(sublex_start());
6027
6028     case '\\':
6029         s++;
6030         if (PL_lex_inwhat && isDIGIT(*s))
6031             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6032                            *s, *s);
6033         if (PL_expect == XOPERATOR)
6034             no_op("Backslash",s);
6035         OPERATOR(REFGEN);
6036
6037     case 'v':
6038         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6039             char *start = s + 2;
6040             while (isDIGIT(*start) || *start == '_')
6041                 start++;
6042             if (*start == '.' && isDIGIT(start[1])) {
6043                 s = scan_num(s, &pl_yylval);
6044                 TERM(THING);
6045             }
6046             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6047             else if (!isALPHA(*start) && (PL_expect == XTERM
6048                         || PL_expect == XREF || PL_expect == XSTATE
6049                         || PL_expect == XTERMORDORDOR)) {
6050                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6051                 if (!gv) {
6052                     s = scan_num(s, &pl_yylval);
6053                     TERM(THING);
6054                 }
6055             }
6056         }
6057         goto keylookup;
6058     case 'x':
6059         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6060             s++;
6061             Mop(OP_REPEAT);
6062         }
6063         goto keylookup;
6064
6065     case '_':
6066     case 'a': case 'A':
6067     case 'b': case 'B':
6068     case 'c': case 'C':
6069     case 'd': case 'D':
6070     case 'e': case 'E':
6071     case 'f': case 'F':
6072     case 'g': case 'G':
6073     case 'h': case 'H':
6074     case 'i': case 'I':
6075     case 'j': case 'J':
6076     case 'k': case 'K':
6077     case 'l': case 'L':
6078     case 'm': case 'M':
6079     case 'n': case 'N':
6080     case 'o': case 'O':
6081     case 'p': case 'P':
6082     case 'q': case 'Q':
6083     case 'r': case 'R':
6084     case 's': case 'S':
6085     case 't': case 'T':
6086     case 'u': case 'U':
6087               case 'V':
6088     case 'w': case 'W':
6089               case 'X':
6090     case 'y': case 'Y':
6091     case 'z': case 'Z':
6092
6093       keylookup: {
6094         bool anydelim;
6095         I32 tmp;
6096
6097         orig_keyword = 0;
6098         gv = NULL;
6099         gvp = NULL;
6100
6101         PL_bufptr = s;
6102         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6103
6104         /* Some keywords can be followed by any delimiter, including ':' */
6105         anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
6106                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6107                              (PL_tokenbuf[0] == 'q' &&
6108                               strchr("qwxr", PL_tokenbuf[1])))));
6109
6110         /* x::* is just a word, unless x is "CORE" */
6111         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6112             goto just_a_word;
6113
6114         d = s;
6115         while (d < PL_bufend && isSPACE(*d))
6116                 d++;    /* no comments skipped here, or s### is misparsed */
6117
6118         /* Is this a word before a => operator? */
6119         if (*d == '=' && d[1] == '>') {
6120             CLINE;
6121             pl_yylval.opval
6122                 = (OP*)newSVOP(OP_CONST, 0,
6123                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6124             pl_yylval.opval->op_private = OPpCONST_BARE;
6125             TERM(WORD);
6126         }
6127
6128         /* Check for plugged-in keyword */
6129         {
6130             OP *o;
6131             int result;
6132             char *saved_bufptr = PL_bufptr;
6133             PL_bufptr = s;
6134             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6135             s = PL_bufptr;
6136             if (result == KEYWORD_PLUGIN_DECLINE) {
6137                 /* not a plugged-in keyword */
6138                 PL_bufptr = saved_bufptr;
6139             } else if (result == KEYWORD_PLUGIN_STMT) {
6140                 pl_yylval.opval = o;
6141                 CLINE;
6142                 PL_expect = XSTATE;
6143                 return REPORT(PLUGSTMT);
6144             } else if (result == KEYWORD_PLUGIN_EXPR) {
6145                 pl_yylval.opval = o;
6146                 CLINE;
6147                 PL_expect = XOPERATOR;
6148                 return REPORT(PLUGEXPR);
6149             } else {
6150                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6151                                         PL_tokenbuf);
6152             }
6153         }
6154
6155         /* Check for built-in keyword */
6156         tmp = keyword(PL_tokenbuf, len, 0);
6157
6158         /* Is this a label? */
6159         if (!anydelim && PL_expect == XSTATE
6160               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6161             s = d + 1;
6162             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6163             CLINE;
6164             TOKEN(LABEL);
6165         }
6166
6167         if (tmp < 0) {                  /* second-class keyword? */
6168             GV *ogv = NULL;     /* override (winner) */
6169             GV *hgv = NULL;     /* hidden (loser) */
6170             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6171                 CV *cv;
6172                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6173                     (cv = GvCVu(gv)))
6174                 {
6175                     if (GvIMPORTED_CV(gv))
6176                         ogv = gv;
6177                     else if (! CvMETHOD(cv))
6178                         hgv = gv;
6179                 }
6180                 if (!ogv &&
6181                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6182                     (gv = *gvp) && isGV_with_GP(gv) &&
6183                     GvCVu(gv) && GvIMPORTED_CV(gv))
6184                 {
6185                     ogv = gv;
6186                 }
6187             }
6188             if (ogv) {
6189                 orig_keyword = tmp;
6190                 tmp = 0;                /* overridden by import or by GLOBAL */
6191             }
6192             else if (gv && !gvp
6193                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6194                      && GvCVu(gv))
6195             {
6196                 tmp = 0;                /* any sub overrides "weak" keyword */
6197             }
6198             else {                      /* no override */
6199                 tmp = -tmp;
6200                 if (tmp == KEY_dump) {
6201                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6202                                    "dump() better written as CORE::dump()");
6203                 }
6204                 gv = NULL;
6205                 gvp = 0;
6206                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6207                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6208                                    "Ambiguous call resolved as CORE::%s(), "
6209                                    "qualify as such or use &",
6210                                    GvENAME(hgv));
6211             }
6212         }
6213
6214       reserved_word:
6215         switch (tmp) {
6216
6217         default:                        /* not a keyword */
6218             /* Trade off - by using this evil construction we can pull the
6219                variable gv into the block labelled keylookup. If not, then
6220                we have to give it function scope so that the goto from the
6221                earlier ':' case doesn't bypass the initialisation.  */
6222             if (0) {
6223             just_a_word_zero_gv:
6224                 gv = NULL;
6225                 gvp = NULL;
6226                 orig_keyword = 0;
6227             }
6228           just_a_word: {
6229                 SV *sv;
6230                 int pkgname = 0;
6231                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6232                 OP *rv2cv_op;
6233                 CV *cv;
6234 #ifdef PERL_MAD
6235                 SV *nextPL_nextwhite = 0;
6236 #endif
6237
6238
6239                 /* Get the rest if it looks like a package qualifier */
6240
6241                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6242                     STRLEN morelen;
6243                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6244                                   TRUE, &morelen);
6245                     if (!morelen)
6246                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6247                                 *s == '\'' ? "'" : "::");
6248                     len += morelen;
6249                     pkgname = 1;
6250                 }
6251
6252                 if (PL_expect == XOPERATOR) {
6253                     if (PL_bufptr == PL_linestart) {
6254                         CopLINE_dec(PL_curcop);
6255                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6256                         CopLINE_inc(PL_curcop);
6257                     }
6258                     else
6259                         no_op("Bareword",s);
6260                 }
6261
6262                 /* Look for a subroutine with this name in current package,
6263                    unless name is "Foo::", in which case Foo is a bearword
6264                    (and a package name). */
6265
6266                 if (len > 2 && !PL_madskills &&
6267                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6268                 {
6269                     if (ckWARN(WARN_BAREWORD)
6270                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6271                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6272                             "Bareword \"%s\" refers to nonexistent package",
6273                              PL_tokenbuf);
6274                     len -= 2;
6275                     PL_tokenbuf[len] = '\0';
6276                     gv = NULL;
6277                     gvp = 0;
6278                 }
6279                 else {
6280                     if (!gv) {
6281                         /* Mustn't actually add anything to a symbol table.
6282                            But also don't want to "initialise" any placeholder
6283                            constants that might already be there into full
6284                            blown PVGVs with attached PVCV.  */
6285                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6286                                                GV_NOADD_NOINIT, SVt_PVCV);
6287                     }
6288                     len = 0;
6289                 }
6290
6291                 /* if we saw a global override before, get the right name */
6292
6293                 if (gvp) {
6294                     sv = newSVpvs("CORE::GLOBAL::");
6295                     sv_catpv(sv,PL_tokenbuf);
6296                 }
6297                 else {
6298                     /* If len is 0, newSVpv does strlen(), which is correct.
6299                        If len is non-zero, then it will be the true length,
6300                        and so the scalar will be created correctly.  */
6301                     sv = newSVpv(PL_tokenbuf,len);
6302                 }
6303 #ifdef PERL_MAD
6304                 if (PL_madskills && !PL_thistoken) {
6305                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6306                     PL_thistoken = newSVpvn(start,s - start);
6307                     PL_realtokenstart = s - SvPVX(PL_linestr);
6308                 }
6309 #endif
6310
6311                 /* Presume this is going to be a bareword of some sort. */
6312
6313                 CLINE;
6314                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6315                 pl_yylval.opval->op_private = OPpCONST_BARE;
6316                 /* UTF-8 package name? */
6317                 if (UTF && !IN_BYTES &&
6318                     is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
6319                     SvUTF8_on(sv);
6320
6321                 /* And if "Foo::", then that's what it certainly is. */
6322
6323                 if (len)
6324                     goto safe_bareword;
6325
6326                 cv = NULL;
6327                 {
6328                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6329                     const_op->op_private = OPpCONST_BARE;
6330                     rv2cv_op = newCVREF(0, const_op);
6331                 }
6332                 if (rv2cv_op->op_type == OP_RV2CV &&
6333                         (rv2cv_op->op_flags & OPf_KIDS)) {
6334                     OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6335                     switch (rv_op->op_type) {
6336                         case OP_CONST: {
6337                             SV *sv = cSVOPx_sv(rv_op);
6338                             if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6339                                 cv = (CV*)SvRV(sv);
6340                         } break;
6341                         case OP_GV: {
6342                             GV *gv = cGVOPx_gv(rv_op);
6343                             CV *maybe_cv = GvCVu(gv);
6344                             if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6345                                 cv = maybe_cv;
6346                         } break;
6347                     }
6348                 }
6349
6350                 /* See if it's the indirect object for a list operator. */
6351
6352                 if (PL_oldoldbufptr &&
6353                     PL_oldoldbufptr < PL_bufptr &&
6354                     (PL_oldoldbufptr == PL_last_lop
6355                      || PL_oldoldbufptr == PL_last_uni) &&
6356                     /* NO SKIPSPACE BEFORE HERE! */
6357                     (PL_expect == XREF ||
6358                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6359                 {
6360                     bool immediate_paren = *s == '(';
6361
6362                     /* (Now we can afford to cross potential line boundary.) */
6363                     s = SKIPSPACE2(s,nextPL_nextwhite);
6364 #ifdef PERL_MAD
6365                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6366 #endif
6367
6368                     /* Two barewords in a row may indicate method call. */
6369
6370                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6371                         (tmp = intuit_method(s, gv, cv))) {
6372                         op_free(rv2cv_op);
6373                         return REPORT(tmp);
6374                     }
6375
6376                     /* If not a declared subroutine, it's an indirect object. */
6377                     /* (But it's an indir obj regardless for sort.) */
6378                     /* Also, if "_" follows a filetest operator, it's a bareword */
6379
6380                     if (
6381                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6382                          (!cv &&
6383                         (PL_last_lop_op != OP_MAPSTART &&
6384                          PL_last_lop_op != OP_GREPSTART))))
6385                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6386                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6387                        )
6388                     {
6389                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6390                         goto bareword;
6391                     }
6392                 }
6393
6394                 PL_expect = XOPERATOR;
6395 #ifdef PERL_MAD
6396                 if (isSPACE(*s))
6397                     s = SKIPSPACE2(s,nextPL_nextwhite);
6398                 PL_nextwhite = nextPL_nextwhite;
6399 #else
6400                 s = skipspace(s);
6401 #endif
6402
6403                 /* Is this a word before a => operator? */
6404                 if (*s == '=' && s[1] == '>' && !pkgname) {
6405                     op_free(rv2cv_op);
6406                     CLINE;
6407                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6408                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6409                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6410                     TERM(WORD);
6411                 }
6412
6413                 /* If followed by a paren, it's certainly a subroutine. */
6414                 if (*s == '(') {
6415                     CLINE;
6416                     if (cv) {
6417                         d = s + 1;
6418                         while (SPACE_OR_TAB(*d))
6419                             d++;
6420                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6421                             s = d + 1;
6422                             goto its_constant;
6423                         }
6424                     }
6425 #ifdef PERL_MAD
6426                     if (PL_madskills) {
6427                         PL_nextwhite = PL_thiswhite;
6428                         PL_thiswhite = 0;
6429                     }
6430                     start_force(PL_curforce);
6431 #endif
6432                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6433                     PL_expect = XOPERATOR;
6434 #ifdef PERL_MAD
6435                     if (PL_madskills) {
6436                         PL_nextwhite = nextPL_nextwhite;
6437                         curmad('X', PL_thistoken);
6438                         PL_thistoken = newSVpvs("");
6439                     }
6440 #endif
6441                     op_free(rv2cv_op);
6442                     force_next(WORD);
6443                     pl_yylval.ival = 0;
6444                     TOKEN('&');
6445                 }
6446
6447                 /* If followed by var or block, call it a method (unless sub) */
6448
6449                 if ((*s == '$' || *s == '{') && !cv) {
6450                     op_free(rv2cv_op);
6451                     PL_last_lop = PL_oldbufptr;
6452                     PL_last_lop_op = OP_METHOD;
6453                     PREBLOCK(METHOD);
6454                 }
6455
6456                 /* If followed by a bareword, see if it looks like indir obj. */
6457
6458                 if (!orig_keyword
6459                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6460                         && (tmp = intuit_method(s, gv, cv))) {
6461                     op_free(rv2cv_op);
6462                     return REPORT(tmp);
6463                 }
6464
6465                 /* Not a method, so call it a subroutine (if defined) */
6466
6467                 if (cv) {
6468                     if (lastchar == '-')
6469                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6470                                          "Ambiguous use of -%s resolved as -&%s()",
6471                                          PL_tokenbuf, PL_tokenbuf);
6472                     /* Check for a constant sub */
6473                     if ((sv = cv_const_sv(cv))) {
6474                   its_constant:
6475                         op_free(rv2cv_op);
6476                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6477                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6478                         pl_yylval.opval->op_private = 0;
6479                         TOKEN(WORD);
6480                     }
6481
6482                     op_free(pl_yylval.opval);
6483                     pl_yylval.opval = rv2cv_op;
6484                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6485                     PL_last_lop = PL_oldbufptr;
6486                     PL_last_lop_op = OP_ENTERSUB;
6487                     /* Is there a prototype? */
6488                     if (
6489 #ifdef PERL_MAD
6490                         cv &&
6491 #endif
6492                         SvPOK(cv))
6493                     {
6494                         STRLEN protolen;
6495                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6496                         if (!protolen)
6497                             TERM(FUNC0SUB);
6498                         while (*proto == ';')
6499                             proto++;
6500                         if (
6501                             (
6502                                 (
6503                                     *proto == '$' || *proto == '_'
6504                                  || *proto == '*'
6505                                 )
6506                              && proto[1] == '\0'
6507                             )
6508                          || (
6509                              *proto == '\\' && proto[1] && proto[2] == '\0'
6510                             )
6511                         )
6512                             OPERATOR(UNIOPSUB);
6513                         if (*proto == '\\' && proto[1] == '[') {
6514                             const char *p = proto + 2;
6515                             while(*p && *p != ']')
6516                                 ++p;
6517                             if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6518                         }
6519                         if (*proto == '&' && *s == '{') {
6520                             if (PL_curstash)
6521                                 sv_setpvs(PL_subname, "__ANON__");
6522                             else
6523                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6524                             PREBLOCK(LSTOPSUB);
6525                         }
6526                     }
6527 #ifdef PERL_MAD
6528                     {
6529                         if (PL_madskills) {
6530                             PL_nextwhite = PL_thiswhite;
6531                             PL_thiswhite = 0;
6532                         }
6533                         start_force(PL_curforce);
6534                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6535                         PL_expect = XTERM;
6536                         if (PL_madskills) {
6537                             PL_nextwhite = nextPL_nextwhite;
6538                             curmad('X', PL_thistoken);
6539                             PL_thistoken = newSVpvs("");
6540                         }
6541                         force_next(WORD);
6542                         TOKEN(NOAMP);
6543                     }
6544                 }
6545
6546                 /* Guess harder when madskills require "best effort". */
6547                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6548                     int probable_sub = 0;
6549                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6550                         probable_sub = 1;
6551                     else if (isALPHA(*s)) {
6552                         char tmpbuf[1024];
6553                         STRLEN tmplen;
6554                         d = s;
6555                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6556                         if (!keyword(tmpbuf, tmplen, 0))
6557                             probable_sub = 1;
6558                         else {
6559                             while (d < PL_bufend && isSPACE(*d))
6560                                 d++;
6561                             if (*d == '=' && d[1] == '>')
6562                                 probable_sub = 1;
6563                         }
6564                     }
6565                     if (probable_sub) {
6566                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6567                         op_free(pl_yylval.opval);
6568                         pl_yylval.opval = rv2cv_op;
6569                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6570                         PL_last_lop = PL_oldbufptr;
6571                         PL_last_lop_op = OP_ENTERSUB;
6572                         PL_nextwhite = PL_thiswhite;
6573                         PL_thiswhite = 0;
6574                         start_force(PL_curforce);
6575                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6576                         PL_expect = XTERM;
6577                         PL_nextwhite = nextPL_nextwhite;
6578                         curmad('X', PL_thistoken);
6579                         PL_thistoken = newSVpvs("");
6580                         force_next(WORD);
6581                         TOKEN(NOAMP);
6582                     }
6583 #else
6584                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6585                     PL_expect = XTERM;
6586                     force_next(WORD);
6587                     TOKEN(NOAMP);
6588 #endif
6589                 }
6590
6591                 /* Call it a bare word */
6592
6593                 if (PL_hints & HINT_STRICT_SUBS)
6594                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6595                 else {
6596                 bareword:
6597                     /* after "print" and similar functions (corresponding to
6598                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6599                      * a filehandle should be subject to "strict subs".
6600                      * Likewise for the optional indirect-object argument to system
6601                      * or exec, which can't be a bareword */
6602                     if ((PL_last_lop_op == OP_PRINT
6603                             || PL_last_lop_op == OP_PRTF
6604                             || PL_last_lop_op == OP_SAY
6605                             || PL_last_lop_op == OP_SYSTEM
6606                             || PL_last_lop_op == OP_EXEC)
6607                             && (PL_hints & HINT_STRICT_SUBS))
6608                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6609                     if (lastchar != '-') {
6610                         if (ckWARN(WARN_RESERVED)) {
6611                             d = PL_tokenbuf;
6612                             while (isLOWER(*d))
6613                                 d++;
6614                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6615                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6616                                        PL_tokenbuf);
6617                         }
6618                     }
6619                 }
6620                 op_free(rv2cv_op);
6621
6622             safe_bareword:
6623                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6624                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6625                                      "Operator or semicolon missing before %c%s",
6626                                      lastchar, PL_tokenbuf);
6627                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6628                                      "Ambiguous use of %c resolved as operator %c",
6629                                      lastchar, lastchar);
6630                 }
6631                 TOKEN(WORD);
6632             }
6633
6634         case KEY___FILE__:
6635             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6636                                         newSVpv(CopFILE(PL_curcop),0));
6637             TERM(THING);
6638
6639         case KEY___LINE__:
6640             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6641                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6642             TERM(THING);
6643
6644         case KEY___PACKAGE__:
6645             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6646                                         (PL_curstash
6647                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6648                                          : &PL_sv_undef));
6649             TERM(THING);
6650
6651         case KEY___DATA__:
6652         case KEY___END__: {
6653             GV *gv;
6654             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6655                 const char *pname = "main";
6656                 if (PL_tokenbuf[2] == 'D')
6657                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6658                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6659                                 SVt_PVIO);
6660                 GvMULTI_on(gv);
6661                 if (!GvIO(gv))
6662                     GvIOp(gv) = newIO();
6663                 IoIFP(GvIOp(gv)) = PL_rsfp;
6664 #if defined(HAS_FCNTL) && defined(F_SETFD)
6665                 {
6666                     const int fd = PerlIO_fileno(PL_rsfp);
6667                     fcntl(fd,F_SETFD,fd >= 3);
6668                 }
6669 #endif
6670                 /* Mark this internal pseudo-handle as clean */
6671                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6672                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6673                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6674                 else
6675                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6676 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6677                 /* if the script was opened in binmode, we need to revert
6678                  * it to text mode for compatibility; but only iff it has CRs
6679                  * XXX this is a questionable hack at best. */
6680                 if (PL_bufend-PL_bufptr > 2
6681                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6682                 {
6683                     Off_t loc = 0;
6684                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6685                         loc = PerlIO_tell(PL_rsfp);
6686                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6687                     }
6688 #ifdef NETWARE
6689                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6690 #else
6691                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6692 #endif  /* NETWARE */
6693 #ifdef PERLIO_IS_STDIO /* really? */
6694 #  if defined(__BORLANDC__)
6695                         /* XXX see note in do_binmode() */
6696                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6697 #  endif
6698 #endif
6699                         if (loc > 0)
6700                             PerlIO_seek(PL_rsfp, loc, 0);
6701                     }
6702                 }
6703 #endif
6704 #ifdef PERLIO_LAYERS
6705                 if (!IN_BYTES) {
6706                     if (UTF)
6707                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6708                     else if (PL_encoding) {
6709                         SV *name;
6710                         dSP;
6711                         ENTER;
6712                         SAVETMPS;
6713                         PUSHMARK(sp);
6714                         EXTEND(SP, 1);
6715                         XPUSHs(PL_encoding);
6716                         PUTBACK;
6717                         call_method("name", G_SCALAR);
6718                         SPAGAIN;
6719                         name = POPs;
6720                         PUTBACK;
6721                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6722                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6723                                                       SVfARG(name)));
6724                         FREETMPS;
6725                         LEAVE;
6726                     }
6727                 }
6728 #endif
6729 #ifdef PERL_MAD
6730                 if (PL_madskills) {
6731                     if (PL_realtokenstart >= 0) {
6732                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6733                         if (!PL_endwhite)
6734                             PL_endwhite = newSVpvs("");
6735                         sv_catsv(PL_endwhite, PL_thiswhite);
6736                         PL_thiswhite = 0;
6737                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6738                         PL_realtokenstart = -1;
6739                     }
6740                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6741                            != NULL) ;
6742                 }
6743 #endif
6744                 PL_rsfp = NULL;
6745             }
6746             goto fake_eof;
6747         }
6748
6749         case KEY_AUTOLOAD:
6750         case KEY_DESTROY:
6751         case KEY_BEGIN:
6752         case KEY_UNITCHECK:
6753         case KEY_CHECK:
6754         case KEY_INIT:
6755         case KEY_END:
6756             if (PL_expect == XSTATE) {
6757                 s = PL_bufptr;
6758                 goto really_sub;
6759             }
6760             goto just_a_word;
6761
6762         case KEY_CORE:
6763             if (*s == ':' && s[1] == ':') {
6764                 s += 2;
6765                 d = s;
6766                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6767                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6768                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6769                 if (tmp < 0)
6770                     tmp = -tmp;
6771                 else if (tmp == KEY_require || tmp == KEY_do)
6772                     /* that's a way to remember we saw "CORE::" */
6773                     orig_keyword = tmp;
6774                 goto reserved_word;
6775             }
6776             goto just_a_word;
6777
6778         case KEY_abs:
6779             UNI(OP_ABS);
6780
6781         case KEY_alarm:
6782             UNI(OP_ALARM);
6783
6784         case KEY_accept:
6785             LOP(OP_ACCEPT,XTERM);
6786
6787         case KEY_and:
6788             OPERATOR(ANDOP);
6789
6790         case KEY_atan2:
6791             LOP(OP_ATAN2,XTERM);
6792
6793         case KEY_bind:
6794             LOP(OP_BIND,XTERM);
6795
6796         case KEY_binmode:
6797             LOP(OP_BINMODE,XTERM);
6798
6799         case KEY_bless:
6800             LOP(OP_BLESS,XTERM);
6801
6802         case KEY_break:
6803             FUN0(OP_BREAK);
6804
6805         case KEY_chop:
6806             UNI(OP_CHOP);
6807
6808         case KEY_continue:
6809             /* When 'use switch' is in effect, continue has a dual
6810                life as a control operator. */
6811             {
6812                 if (!FEATURE_IS_ENABLED("switch"))
6813                     PREBLOCK(CONTINUE);
6814                 else {
6815                     /* We have to disambiguate the two senses of
6816                       "continue". If the next token is a '{' then
6817                       treat it as the start of a continue block;
6818                       otherwise treat it as a control operator.
6819                      */
6820                     s = skipspace(s);
6821                     if (*s == '{')
6822             PREBLOCK(CONTINUE);
6823                     else
6824                         FUN0(OP_CONTINUE);
6825                 }
6826             }
6827
6828         case KEY_chdir:
6829             /* may use HOME */
6830             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6831             UNI(OP_CHDIR);
6832
6833         case KEY_close:
6834             UNI(OP_CLOSE);
6835
6836         case KEY_closedir:
6837             UNI(OP_CLOSEDIR);
6838
6839         case KEY_cmp:
6840             Eop(OP_SCMP);
6841
6842         case KEY_caller:
6843             UNI(OP_CALLER);
6844
6845         case KEY_crypt:
6846 #ifdef FCRYPT
6847             if (!PL_cryptseen) {
6848                 PL_cryptseen = TRUE;
6849                 init_des();
6850             }
6851 #endif
6852             LOP(OP_CRYPT,XTERM);
6853
6854         case KEY_chmod:
6855             LOP(OP_CHMOD,XTERM);
6856
6857         case KEY_chown:
6858             LOP(OP_CHOWN,XTERM);
6859
6860         case KEY_connect:
6861             LOP(OP_CONNECT,XTERM);
6862
6863         case KEY_chr:
6864             UNI(OP_CHR);
6865
6866         case KEY_cos:
6867             UNI(OP_COS);
6868
6869         case KEY_chroot:
6870             UNI(OP_CHROOT);
6871
6872         case KEY_default:
6873             PREBLOCK(DEFAULT);
6874
6875         case KEY_do:
6876             s = SKIPSPACE1(s);
6877             if (*s == '{')
6878                 PRETERMBLOCK(DO);
6879             if (*s != '\'')
6880                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6881             if (orig_keyword == KEY_do) {
6882                 orig_keyword = 0;
6883                 pl_yylval.ival = 1;
6884             }
6885             else
6886                 pl_yylval.ival = 0;
6887             OPERATOR(DO);
6888
6889         case KEY_die:
6890             PL_hints |= HINT_BLOCK_SCOPE;
6891             LOP(OP_DIE,XTERM);
6892
6893         case KEY_defined:
6894             UNI(OP_DEFINED);
6895
6896         case KEY_delete:
6897             UNI(OP_DELETE);
6898
6899         case KEY_dbmopen:
6900             gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6901             LOP(OP_DBMOPEN,XTERM);
6902
6903         case KEY_dbmclose:
6904             UNI(OP_DBMCLOSE);
6905
6906         case KEY_dump:
6907             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6908             LOOPX(OP_DUMP);
6909
6910         case KEY_else:
6911             PREBLOCK(ELSE);
6912
6913         case KEY_elsif:
6914             pl_yylval.ival = CopLINE(PL_curcop);
6915             OPERATOR(ELSIF);
6916
6917         case KEY_eq:
6918             Eop(OP_SEQ);
6919
6920         case KEY_exists:
6921             UNI(OP_EXISTS);
6922
6923         case KEY_exit:
6924             if (PL_madskills)
6925                 UNI(OP_INT);
6926             UNI(OP_EXIT);
6927
6928         case KEY_eval:
6929             s = SKIPSPACE1(s);
6930             if (*s == '{') { /* block eval */
6931                 PL_expect = XTERMBLOCK;
6932                 UNIBRACK(OP_ENTERTRY);
6933             }
6934             else { /* string eval */
6935                 PL_expect = XTERM;
6936                 UNIBRACK(OP_ENTEREVAL);
6937             }
6938
6939         case KEY_eof:
6940             UNI(OP_EOF);
6941
6942         case KEY_exp:
6943             UNI(OP_EXP);
6944
6945         case KEY_each:
6946             UNI(OP_EACH);
6947
6948         case KEY_exec:
6949             LOP(OP_EXEC,XREF);
6950
6951         case KEY_endhostent:
6952             FUN0(OP_EHOSTENT);
6953
6954         case KEY_endnetent:
6955             FUN0(OP_ENETENT);
6956
6957         case KEY_endservent:
6958             FUN0(OP_ESERVENT);
6959
6960         case KEY_endprotoent:
6961             FUN0(OP_EPROTOENT);
6962
6963         case KEY_endpwent:
6964             FUN0(OP_EPWENT);
6965
6966         case KEY_endgrent:
6967             FUN0(OP_EGRENT);
6968
6969         case KEY_for:
6970         case KEY_foreach:
6971             pl_yylval.ival = CopLINE(PL_curcop);
6972             s = SKIPSPACE1(s);
6973             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6974                 char *p = s;
6975 #ifdef PERL_MAD
6976                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6977 #endif
6978
6979                 if ((PL_bufend - p) >= 3 &&
6980                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6981                     p += 2;
6982                 else if ((PL_bufend - p) >= 4 &&
6983                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6984                     p += 3;
6985                 p = PEEKSPACE(p);
6986                 if (isIDFIRST_lazy_if(p,UTF)) {
6987                     p = scan_ident(p, PL_bufend,
6988                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6989                     p = PEEKSPACE(p);
6990                 }
6991                 if (*p != '$')
6992                     Perl_croak(aTHX_ "Missing $ on loop variable");
6993 #ifdef PERL_MAD
6994                 s = SvPVX(PL_linestr) + soff;
6995 #endif
6996             }
6997             OPERATOR(FOR);
6998
6999         case KEY_formline:
7000             LOP(OP_FORMLINE,XTERM);
7001
7002         case KEY_fork:
7003             FUN0(OP_FORK);
7004
7005         case KEY_fcntl:
7006             LOP(OP_FCNTL,XTERM);
7007
7008         case KEY_fileno:
7009             UNI(OP_FILENO);
7010
7011         case KEY_flock:
7012             LOP(OP_FLOCK,XTERM);
7013
7014         case KEY_gt:
7015             Rop(OP_SGT);
7016
7017         case KEY_ge:
7018             Rop(OP_SGE);
7019
7020         case KEY_grep:
7021             LOP(OP_GREPSTART, XREF);
7022
7023         case KEY_goto:
7024             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7025             LOOPX(OP_GOTO);
7026
7027         case KEY_gmtime:
7028             UNI(OP_GMTIME);
7029
7030         case KEY_getc:
7031             UNIDOR(OP_GETC);
7032
7033         case KEY_getppid:
7034             FUN0(OP_GETPPID);
7035
7036         case KEY_getpgrp:
7037             UNI(OP_GETPGRP);
7038
7039         case KEY_getpriority:
7040             LOP(OP_GETPRIORITY,XTERM);
7041
7042         case KEY_getprotobyname:
7043             UNI(OP_GPBYNAME);
7044
7045         case KEY_getprotobynumber:
7046             LOP(OP_GPBYNUMBER,XTERM);
7047
7048         case KEY_getprotoent:
7049             FUN0(OP_GPROTOENT);
7050
7051         case KEY_getpwent:
7052             FUN0(OP_GPWENT);
7053
7054         case KEY_getpwnam:
7055             UNI(OP_GPWNAM);
7056
7057         case KEY_getpwuid:
7058             UNI(OP_GPWUID);
7059
7060         case KEY_getpeername:
7061             UNI(OP_GETPEERNAME);
7062
7063         case KEY_gethostbyname:
7064             UNI(OP_GHBYNAME);
7065
7066         case KEY_gethostbyaddr:
7067             LOP(OP_GHBYADDR,XTERM);
7068
7069         case KEY_gethostent:
7070             FUN0(OP_GHOSTENT);
7071
7072         case KEY_getnetbyname:
7073             UNI(OP_GNBYNAME);
7074
7075         case KEY_getnetbyaddr:
7076             LOP(OP_GNBYADDR,XTERM);
7077
7078         case KEY_getnetent:
7079             FUN0(OP_GNETENT);
7080
7081         case KEY_getservbyname:
7082             LOP(OP_GSBYNAME,XTERM);
7083
7084         case KEY_getservbyport:
7085             LOP(OP_GSBYPORT,XTERM);
7086
7087         case KEY_getservent:
7088             FUN0(OP_GSERVENT);
7089
7090         case KEY_getsockname:
7091             UNI(OP_GETSOCKNAME);
7092
7093         case KEY_getsockopt:
7094             LOP(OP_GSOCKOPT,XTERM);
7095
7096         case KEY_getgrent:
7097             FUN0(OP_GGRENT);
7098
7099         case KEY_getgrnam:
7100             UNI(OP_GGRNAM);
7101
7102         case KEY_getgrgid:
7103             UNI(OP_GGRGID);
7104
7105         case KEY_getlogin:
7106             FUN0(OP_GETLOGIN);
7107
7108         case KEY_given:
7109             pl_yylval.ival = CopLINE(PL_curcop);
7110             OPERATOR(GIVEN);
7111
7112         case KEY_glob:
7113             LOP(OP_GLOB,XTERM);
7114
7115         case KEY_hex:
7116             UNI(OP_HEX);
7117
7118         case KEY_if:
7119             pl_yylval.ival = CopLINE(PL_curcop);
7120             OPERATOR(IF);
7121
7122         case KEY_index:
7123             LOP(OP_INDEX,XTERM);
7124
7125         case KEY_int:
7126             UNI(OP_INT);
7127
7128         case KEY_ioctl:
7129             LOP(OP_IOCTL,XTERM);
7130
7131         case KEY_join:
7132             LOP(OP_JOIN,XTERM);
7133
7134         case KEY_keys:
7135             UNI(OP_KEYS);
7136
7137         case KEY_kill:
7138             LOP(OP_KILL,XTERM);
7139
7140         case KEY_last:
7141             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7142             LOOPX(OP_LAST);
7143
7144         case KEY_lc:
7145             UNI(OP_LC);
7146
7147         case KEY_lcfirst:
7148             UNI(OP_LCFIRST);
7149
7150         case KEY_local:
7151             pl_yylval.ival = 0;
7152             OPERATOR(LOCAL);
7153
7154         case KEY_length:
7155             UNI(OP_LENGTH);
7156
7157         case KEY_lt:
7158             Rop(OP_SLT);
7159
7160         case KEY_le:
7161             Rop(OP_SLE);
7162
7163         case KEY_localtime:
7164             UNI(OP_LOCALTIME);
7165
7166         case KEY_log:
7167             UNI(OP_LOG);
7168
7169         case KEY_link:
7170             LOP(OP_LINK,XTERM);
7171
7172         case KEY_listen:
7173             LOP(OP_LISTEN,XTERM);
7174
7175         case KEY_lock:
7176             UNI(OP_LOCK);
7177
7178         case KEY_lstat:
7179             UNI(OP_LSTAT);
7180
7181         case KEY_m:
7182             s = scan_pat(s,OP_MATCH);
7183             TERM(sublex_start());
7184
7185         case KEY_map:
7186             LOP(OP_MAPSTART, XREF);
7187
7188         case KEY_mkdir:
7189             LOP(OP_MKDIR,XTERM);
7190
7191         case KEY_msgctl:
7192             LOP(OP_MSGCTL,XTERM);
7193
7194         case KEY_msgget:
7195             LOP(OP_MSGGET,XTERM);
7196
7197         case KEY_msgrcv:
7198             LOP(OP_MSGRCV,XTERM);
7199
7200         case KEY_msgsnd:
7201             LOP(OP_MSGSND,XTERM);
7202
7203         case KEY_our:
7204         case KEY_my:
7205         case KEY_state:
7206             PL_in_my = (U16)tmp;
7207             s = SKIPSPACE1(s);
7208             if (isIDFIRST_lazy_if(s,UTF)) {
7209 #ifdef PERL_MAD
7210                 char* start = s;
7211 #endif
7212                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7213                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7214                     goto really_sub;
7215                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7216                 if (!PL_in_my_stash) {
7217                     char tmpbuf[1024];
7218                     PL_bufptr = s;
7219                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7220                     yyerror(tmpbuf);
7221                 }
7222 #ifdef PERL_MAD
7223                 if (PL_madskills) {     /* just add type to declarator token */
7224                     sv_catsv(PL_thistoken, PL_nextwhite);
7225                     PL_nextwhite = 0;
7226                     sv_catpvn(PL_thistoken, start, s - start);
7227                 }
7228 #endif
7229             }
7230             pl_yylval.ival = 1;
7231             OPERATOR(MY);
7232
7233         case KEY_next:
7234             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7235             LOOPX(OP_NEXT);
7236
7237         case KEY_ne:
7238             Eop(OP_SNE);
7239
7240         case KEY_no:
7241             s = tokenize_use(0, s);
7242             OPERATOR(USE);
7243
7244         case KEY_not:
7245             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7246                 FUN1(OP_NOT);
7247             else
7248                 OPERATOR(NOTOP);
7249
7250         case KEY_open:
7251             s = SKIPSPACE1(s);
7252             if (isIDFIRST_lazy_if(s,UTF)) {
7253                 const char *t;
7254                 for (d = s; isALNUM_lazy_if(d,UTF);)
7255                     d++;
7256                 for (t=d; isSPACE(*t);)
7257                     t++;
7258                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7259                     /* [perl #16184] */
7260                     && !(t[0] == '=' && t[1] == '>')
7261                 ) {
7262                     int parms_len = (int)(d-s);
7263                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7264                            "Precedence problem: open %.*s should be open(%.*s)",
7265                             parms_len, s, parms_len, s);
7266                 }
7267             }
7268             LOP(OP_OPEN,XTERM);
7269
7270         case KEY_or:
7271             pl_yylval.ival = OP_OR;
7272             OPERATOR(OROP);
7273
7274         case KEY_ord:
7275             UNI(OP_ORD);
7276
7277         case KEY_oct:
7278             UNI(OP_OCT);
7279
7280         case KEY_opendir:
7281             LOP(OP_OPEN_DIR,XTERM);
7282
7283         case KEY_print:
7284             checkcomma(s,PL_tokenbuf,"filehandle");
7285             LOP(OP_PRINT,XREF);
7286
7287         case KEY_printf:
7288             checkcomma(s,PL_tokenbuf,"filehandle");
7289             LOP(OP_PRTF,XREF);
7290
7291         case KEY_prototype:
7292             UNI(OP_PROTOTYPE);
7293
7294         case KEY_push:
7295             LOP(OP_PUSH,XTERM);
7296
7297         case KEY_pop:
7298             UNIDOR(OP_POP);
7299
7300         case KEY_pos:
7301             UNIDOR(OP_POS);
7302
7303         case KEY_pack:
7304             LOP(OP_PACK,XTERM);
7305
7306         case KEY_package:
7307             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7308             s = SKIPSPACE1(s);
7309             s = force_strict_version(s);
7310             PL_lex_expect = XBLOCK;
7311             OPERATOR(PACKAGE);
7312
7313         case KEY_pipe:
7314             LOP(OP_PIPE_OP,XTERM);
7315
7316         case KEY_q:
7317             s = scan_str(s,!!PL_madskills,FALSE);
7318             if (!s)
7319                 missingterm(NULL);
7320             pl_yylval.ival = OP_CONST;
7321             TERM(sublex_start());
7322
7323         case KEY_quotemeta:
7324             UNI(OP_QUOTEMETA);
7325
7326         case KEY_qw:
7327             s = scan_str(s,!!PL_madskills,FALSE);
7328             if (!s)
7329                 missingterm(NULL);
7330             PL_expect = XOPERATOR;
7331             force_next(')');
7332             if (SvCUR(PL_lex_stuff)) {
7333                 OP *words = NULL;
7334                 int warned = 0;
7335                 d = SvPV_force(PL_lex_stuff, len);
7336                 while (len) {
7337                     for (; isSPACE(*d) && len; --len, ++d)
7338                         /**/;
7339                     if (len) {
7340                         SV *sv;
7341                         const char *b = d;
7342                         if (!warned && ckWARN(WARN_QW)) {
7343                             for (; !isSPACE(*d) && len; --len, ++d) {
7344                                 if (*d == ',') {
7345                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7346                                         "Possible attempt to separate words with commas");
7347                                     ++warned;
7348                                 }
7349                                 else if (*d == '#') {
7350                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7351                                         "Possible attempt to put comments in qw() list");
7352                                     ++warned;
7353                                 }
7354                             }
7355                         }
7356                         else {
7357                             for (; !isSPACE(*d) && len; --len, ++d)
7358                                 /**/;
7359                         }
7360                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7361                         words = append_elem(OP_LIST, words,
7362                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7363                     }
7364                 }
7365                 if (words) {
7366                     start_force(PL_curforce);
7367                     NEXTVAL_NEXTTOKE.opval = words;
7368                     force_next(THING);
7369                 }
7370             }
7371             if (PL_lex_stuff) {
7372                 SvREFCNT_dec(PL_lex_stuff);
7373                 PL_lex_stuff = NULL;
7374             }
7375             PL_expect = XTERM;
7376             TOKEN('(');
7377
7378         case KEY_qq:
7379             s = scan_str(s,!!PL_madskills,FALSE);
7380             if (!s)
7381                 missingterm(NULL);
7382             pl_yylval.ival = OP_STRINGIFY;
7383             if (SvIVX(PL_lex_stuff) == '\'')
7384                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
7385             TERM(sublex_start());
7386
7387         case KEY_qr:
7388             s = scan_pat(s,OP_QR);
7389             TERM(sublex_start());
7390
7391         case KEY_qx:
7392             s = scan_str(s,!!PL_madskills,FALSE);
7393             if (!s)
7394                 missingterm(NULL);
7395             readpipe_override();
7396             TERM(sublex_start());
7397
7398         case KEY_return:
7399             OLDLOP(OP_RETURN);
7400
7401         case KEY_require:
7402             s = SKIPSPACE1(s);
7403             if (isDIGIT(*s)) {
7404                 s = force_version(s, FALSE);
7405             }
7406             else if (*s != 'v' || !isDIGIT(s[1])
7407                     || (s = force_version(s, TRUE), *s == 'v'))
7408             {
7409                 *PL_tokenbuf = '\0';
7410                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7411                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7412                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7413                 else if (*s == '<')
7414                     yyerror("<> should be quotes");
7415             }
7416             if (orig_keyword == KEY_require) {
7417                 orig_keyword = 0;
7418                 pl_yylval.ival = 1;
7419             }
7420             else
7421                 pl_yylval.ival = 0;
7422             PL_expect = XTERM;
7423             PL_bufptr = s;
7424             PL_last_uni = PL_oldbufptr;
7425             PL_last_lop_op = OP_REQUIRE;
7426             s = skipspace(s);
7427             return REPORT( (int)REQUIRE );
7428
7429         case KEY_reset:
7430             UNI(OP_RESET);
7431
7432         case KEY_redo:
7433             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7434             LOOPX(OP_REDO);
7435
7436         case KEY_rename:
7437             LOP(OP_RENAME,XTERM);
7438
7439         case KEY_rand:
7440             UNI(OP_RAND);
7441
7442         case KEY_rmdir:
7443             UNI(OP_RMDIR);
7444
7445         case KEY_rindex:
7446             LOP(OP_RINDEX,XTERM);
7447
7448         case KEY_read:
7449             LOP(OP_READ,XTERM);
7450
7451         case KEY_readdir:
7452             UNI(OP_READDIR);
7453
7454         case KEY_readline:
7455             UNIDOR(OP_READLINE);
7456
7457         case KEY_readpipe:
7458             UNIDOR(OP_BACKTICK);
7459
7460         case KEY_rewinddir:
7461             UNI(OP_REWINDDIR);
7462
7463         case KEY_recv:
7464             LOP(OP_RECV,XTERM);
7465
7466         case KEY_reverse:
7467             LOP(OP_REVERSE,XTERM);
7468
7469         case KEY_readlink:
7470             UNIDOR(OP_READLINK);
7471
7472         case KEY_ref:
7473             UNI(OP_REF);
7474
7475         case KEY_s:
7476             s = scan_subst(s);
7477             if (pl_yylval.opval)
7478                 TERM(sublex_start());
7479             else
7480                 TOKEN(1);       /* force error */
7481
7482         case KEY_say:
7483             checkcomma(s,PL_tokenbuf,"filehandle");
7484             LOP(OP_SAY,XREF);
7485
7486         case KEY_chomp:
7487             UNI(OP_CHOMP);
7488
7489         case KEY_scalar:
7490             UNI(OP_SCALAR);
7491
7492         case KEY_select:
7493             LOP(OP_SELECT,XTERM);
7494
7495         case KEY_seek:
7496             LOP(OP_SEEK,XTERM);
7497
7498         case KEY_semctl:
7499             LOP(OP_SEMCTL,XTERM);
7500
7501         case KEY_semget:
7502             LOP(OP_SEMGET,XTERM);
7503
7504         case KEY_semop:
7505             LOP(OP_SEMOP,XTERM);
7506
7507         case KEY_send:
7508             LOP(OP_SEND,XTERM);
7509
7510         case KEY_setpgrp:
7511             LOP(OP_SETPGRP,XTERM);
7512
7513         case KEY_setpriority:
7514             LOP(OP_SETPRIORITY,XTERM);
7515
7516         case KEY_sethostent:
7517             UNI(OP_SHOSTENT);
7518
7519         case KEY_setnetent:
7520             UNI(OP_SNETENT);
7521
7522         case KEY_setservent:
7523             UNI(OP_SSERVENT);
7524
7525         case KEY_setprotoent:
7526             UNI(OP_SPROTOENT);
7527
7528         case KEY_setpwent:
7529             FUN0(OP_SPWENT);
7530
7531         case KEY_setgrent:
7532             FUN0(OP_SGRENT);
7533
7534         case KEY_seekdir:
7535             LOP(OP_SEEKDIR,XTERM);
7536
7537         case KEY_setsockopt:
7538             LOP(OP_SSOCKOPT,XTERM);
7539
7540         case KEY_shift:
7541             UNIDOR(OP_SHIFT);
7542
7543         case KEY_shmctl:
7544             LOP(OP_SHMCTL,XTERM);
7545
7546         case KEY_shmget:
7547             LOP(OP_SHMGET,XTERM);
7548
7549         case KEY_shmread:
7550             LOP(OP_SHMREAD,XTERM);
7551
7552         case KEY_shmwrite:
7553             LOP(OP_SHMWRITE,XTERM);
7554
7555         case KEY_shutdown:
7556             LOP(OP_SHUTDOWN,XTERM);
7557
7558         case KEY_sin:
7559             UNI(OP_SIN);
7560
7561         case KEY_sleep:
7562             UNI(OP_SLEEP);
7563
7564         case KEY_socket:
7565             LOP(OP_SOCKET,XTERM);
7566
7567         case KEY_socketpair:
7568             LOP(OP_SOCKPAIR,XTERM);
7569
7570         case KEY_sort:
7571             checkcomma(s,PL_tokenbuf,"subroutine name");
7572             s = SKIPSPACE1(s);
7573             if (*s == ';' || *s == ')')         /* probably a close */
7574                 Perl_croak(aTHX_ "sort is now a reserved word");
7575             PL_expect = XTERM;
7576             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7577             LOP(OP_SORT,XREF);
7578
7579         case KEY_split:
7580             LOP(OP_SPLIT,XTERM);
7581
7582         case KEY_sprintf:
7583             LOP(OP_SPRINTF,XTERM);
7584
7585         case KEY_splice:
7586             LOP(OP_SPLICE,XTERM);
7587
7588         case KEY_sqrt:
7589             UNI(OP_SQRT);
7590
7591         case KEY_srand:
7592             UNI(OP_SRAND);
7593
7594         case KEY_stat:
7595             UNI(OP_STAT);
7596
7597         case KEY_study:
7598             UNI(OP_STUDY);
7599
7600         case KEY_substr:
7601             LOP(OP_SUBSTR,XTERM);
7602
7603         case KEY_format:
7604         case KEY_sub:
7605           really_sub:
7606             {
7607                 char tmpbuf[sizeof PL_tokenbuf];
7608                 SSize_t tboffset = 0;
7609                 expectation attrful;
7610                 bool have_name, have_proto;
7611                 const int key = tmp;
7612
7613 #ifdef PERL_MAD
7614                 SV *tmpwhite = 0;
7615
7616                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7617                 SV *subtoken = newSVpvn(tstart, s - tstart);
7618                 PL_thistoken = 0;
7619
7620                 d = s;
7621                 s = SKIPSPACE2(s,tmpwhite);
7622 #else
7623                 s = skipspace(s);
7624 #endif
7625
7626                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7627                     (*s == ':' && s[1] == ':'))
7628                 {
7629 #ifdef PERL_MAD
7630                     SV *nametoke = NULL;
7631 #endif
7632
7633                     PL_expect = XBLOCK;
7634                     attrful = XATTRBLOCK;
7635                     /* remember buffer pos'n for later force_word */
7636                     tboffset = s - PL_oldbufptr;
7637                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7638 #ifdef PERL_MAD
7639                     if (PL_madskills)
7640                         nametoke = newSVpvn(s, d - s);
7641 #endif
7642                     if (memchr(tmpbuf, ':', len))
7643                         sv_setpvn(PL_subname, tmpbuf, len);
7644                     else {
7645                         sv_setsv(PL_subname,PL_curstname);
7646                         sv_catpvs(PL_subname,"::");
7647                         sv_catpvn(PL_subname,tmpbuf,len);
7648                     }
7649                     have_name = TRUE;
7650
7651 #ifdef PERL_MAD
7652
7653                     start_force(0);
7654                     CURMAD('X', nametoke);
7655                     CURMAD('_', tmpwhite);
7656                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7657                                       FALSE, TRUE, TRUE);
7658
7659                     s = SKIPSPACE2(d,tmpwhite);
7660 #else
7661                     s = skipspace(d);
7662 #endif
7663                 }
7664                 else {
7665                     if (key == KEY_my)
7666                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7667                     PL_expect = XTERMBLOCK;
7668                     attrful = XATTRTERM;
7669                     sv_setpvs(PL_subname,"?");
7670                     have_name = FALSE;
7671                 }
7672
7673                 if (key == KEY_format) {
7674                     if (*s == '=')
7675                         PL_lex_formbrack = PL_lex_brackets + 1;
7676 #ifdef PERL_MAD
7677                     PL_thistoken = subtoken;
7678                     s = d;
7679 #else
7680                     if (have_name)
7681                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7682                                           FALSE, TRUE, TRUE);
7683 #endif
7684                     OPERATOR(FORMAT);
7685                 }
7686
7687                 /* Look for a prototype */
7688                 if (*s == '(') {
7689                     char *p;
7690                     bool bad_proto = FALSE;
7691                     bool in_brackets = FALSE;
7692                     char greedy_proto = ' ';
7693                     bool proto_after_greedy_proto = FALSE;
7694                     bool must_be_last = FALSE;
7695                     bool underscore = FALSE;
7696                     bool seen_underscore = FALSE;
7697                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7698
7699                     s = scan_str(s,!!PL_madskills,FALSE);
7700                     if (!s)
7701                         Perl_croak(aTHX_ "Prototype not terminated");
7702                     /* strip spaces and check for bad characters */
7703                     d = SvPVX(PL_lex_stuff);
7704                     tmp = 0;
7705                     for (p = d; *p; ++p) {
7706                         if (!isSPACE(*p)) {
7707                             d[tmp++] = *p;
7708
7709                             if (warnillegalproto) {
7710                                 if (must_be_last)
7711                                     proto_after_greedy_proto = TRUE;
7712                                 if (!strchr("$@%*;[]&\\_", *p)) {
7713                                     bad_proto = TRUE;
7714                                 }
7715                                 else {
7716                                     if ( underscore ) {
7717                                         if ( *p != ';' )
7718                                             bad_proto = TRUE;
7719                                         underscore = FALSE;
7720                                     }
7721                                     if ( *p == '[' ) {
7722                                         in_brackets = TRUE;
7723                                     }
7724                                     else if ( *p == ']' ) {
7725                                         in_brackets = FALSE;
7726                                     }
7727                                     else if ( (*p == '@' || *p == '%') &&
7728                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
7729                                          !in_brackets ) {
7730                                         must_be_last = TRUE;
7731                                         greedy_proto = *p;
7732                                     }
7733                                     else if ( *p == '_' ) {
7734                                         underscore = seen_underscore = TRUE;
7735                                     }
7736                                 }
7737                             }
7738                         }
7739                     }
7740                     d[tmp] = '\0';
7741                     if (proto_after_greedy_proto)
7742                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7743                                     "Prototype after '%c' for %"SVf" : %s",
7744                                     greedy_proto, SVfARG(PL_subname), d);
7745                     if (bad_proto)
7746                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7747                                     "Illegal character %sin prototype for %"SVf" : %s",
7748                                     seen_underscore ? "after '_' " : "",
7749                                     SVfARG(PL_subname), d);
7750                     SvCUR_set(PL_lex_stuff, tmp);
7751                     have_proto = TRUE;
7752
7753 #ifdef PERL_MAD
7754                     start_force(0);
7755                     CURMAD('q', PL_thisopen);
7756                     CURMAD('_', tmpwhite);
7757                     CURMAD('=', PL_thisstuff);
7758                     CURMAD('Q', PL_thisclose);
7759                     NEXTVAL_NEXTTOKE.opval =
7760                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7761                     PL_lex_stuff = NULL;
7762                     force_next(THING);
7763
7764                     s = SKIPSPACE2(s,tmpwhite);
7765 #else
7766                     s = skipspace(s);
7767 #endif
7768                 }
7769                 else
7770                     have_proto = FALSE;
7771
7772                 if (*s == ':' && s[1] != ':')
7773                     PL_expect = attrful;
7774                 else if (*s != '{' && key == KEY_sub) {
7775                     if (!have_name)
7776                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7777                     else if (*s != ';' && *s != '}')
7778                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7779                 }
7780
7781 #ifdef PERL_MAD
7782                 start_force(0);
7783                 if (tmpwhite) {
7784                     if (PL_madskills)
7785                         curmad('^', newSVpvs(""));
7786                     CURMAD('_', tmpwhite);
7787                 }
7788                 force_next(0);
7789
7790                 PL_thistoken = subtoken;
7791 #else
7792                 if (have_proto) {
7793                     NEXTVAL_NEXTTOKE.opval =
7794                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7795                     PL_lex_stuff = NULL;
7796                     force_next(THING);
7797                 }
7798 #endif
7799                 if (!have_name) {
7800                     if (PL_curstash)
7801                         sv_setpvs(PL_subname, "__ANON__");
7802                     else
7803                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7804                     TOKEN(ANONSUB);
7805                 }
7806 #ifndef PERL_MAD
7807                 (void) force_word(PL_oldbufptr + tboffset, WORD,
7808                                   FALSE, TRUE, TRUE);
7809 #endif
7810                 if (key == KEY_my)
7811                     TOKEN(MYSUB);
7812                 TOKEN(SUB);
7813             }
7814
7815         case KEY_system:
7816             LOP(OP_SYSTEM,XREF);
7817
7818         case KEY_symlink:
7819             LOP(OP_SYMLINK,XTERM);
7820
7821         case KEY_syscall:
7822             LOP(OP_SYSCALL,XTERM);
7823
7824         case KEY_sysopen:
7825             LOP(OP_SYSOPEN,XTERM);
7826
7827         case KEY_sysseek:
7828             LOP(OP_SYSSEEK,XTERM);
7829
7830         case KEY_sysread:
7831             LOP(OP_SYSREAD,XTERM);
7832
7833         case KEY_syswrite:
7834             LOP(OP_SYSWRITE,XTERM);
7835
7836         case KEY_tr:
7837             s = scan_trans(s);
7838             TERM(sublex_start());
7839
7840         case KEY_tell:
7841             UNI(OP_TELL);
7842
7843         case KEY_telldir:
7844             UNI(OP_TELLDIR);
7845
7846         case KEY_tie:
7847             LOP(OP_TIE,XTERM);
7848
7849         case KEY_tied:
7850             UNI(OP_TIED);
7851
7852         case KEY_time:
7853             FUN0(OP_TIME);
7854
7855         case KEY_times:
7856             FUN0(OP_TMS);
7857
7858         case KEY_truncate:
7859             LOP(OP_TRUNCATE,XTERM);
7860
7861         case KEY_uc:
7862             UNI(OP_UC);
7863
7864         case KEY_ucfirst:
7865             UNI(OP_UCFIRST);
7866
7867         case KEY_untie:
7868             UNI(OP_UNTIE);
7869
7870         case KEY_until:
7871             pl_yylval.ival = CopLINE(PL_curcop);
7872             OPERATOR(UNTIL);
7873
7874         case KEY_unless:
7875             pl_yylval.ival = CopLINE(PL_curcop);
7876             OPERATOR(UNLESS);
7877
7878         case KEY_unlink:
7879             LOP(OP_UNLINK,XTERM);
7880
7881         case KEY_undef:
7882             UNIDOR(OP_UNDEF);
7883
7884         case KEY_unpack:
7885             LOP(OP_UNPACK,XTERM);
7886
7887         case KEY_utime:
7888             LOP(OP_UTIME,XTERM);
7889
7890         case KEY_umask:
7891             UNIDOR(OP_UMASK);
7892
7893         case KEY_unshift:
7894             LOP(OP_UNSHIFT,XTERM);
7895
7896         case KEY_use:
7897             s = tokenize_use(1, s);
7898             OPERATOR(USE);
7899
7900         case KEY_values:
7901             UNI(OP_VALUES);
7902
7903         case KEY_vec:
7904             LOP(OP_VEC,XTERM);
7905
7906         case KEY_when:
7907             pl_yylval.ival = CopLINE(PL_curcop);
7908             OPERATOR(WHEN);
7909
7910         case KEY_while:
7911             pl_yylval.ival = CopLINE(PL_curcop);
7912             OPERATOR(WHILE);
7913
7914         case KEY_warn:
7915             PL_hints |= HINT_BLOCK_SCOPE;
7916             LOP(OP_WARN,XTERM);
7917
7918         case KEY_wait:
7919             FUN0(OP_WAIT);
7920
7921         case KEY_waitpid:
7922             LOP(OP_WAITPID,XTERM);
7923
7924         case KEY_wantarray:
7925             FUN0(OP_WANTARRAY);
7926
7927         case KEY_write:
7928 #ifdef EBCDIC
7929         {
7930             char ctl_l[2];
7931             ctl_l[0] = toCTRL('L');
7932             ctl_l[1] = '\0';
7933             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7934         }
7935 #else
7936             /* Make sure $^L is defined */
7937             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7938 #endif
7939             UNI(OP_ENTERWRITE);
7940
7941         case KEY_x:
7942             if (PL_expect == XOPERATOR)
7943                 Mop(OP_REPEAT);
7944             check_uni();
7945             goto just_a_word;
7946
7947         case KEY_xor:
7948             pl_yylval.ival = OP_XOR;
7949             OPERATOR(OROP);
7950
7951         case KEY_y:
7952             s = scan_trans(s);
7953             TERM(sublex_start());
7954         }
7955     }}
7956 }
7957 #ifdef __SC__
7958 #pragma segment Main
7959 #endif
7960
7961 static int
7962 S_pending_ident(pTHX)
7963 {
7964     dVAR;
7965     register char *d;
7966     PADOFFSET tmp = 0;
7967     /* pit holds the identifier we read and pending_ident is reset */
7968     char pit = PL_pending_ident;
7969     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7970     /* All routes through this function want to know if there is a colon.  */
7971     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7972     PL_pending_ident = 0;
7973
7974     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7975     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7976           "### Pending identifier '%s'\n", PL_tokenbuf); });
7977
7978     /* if we're in a my(), we can't allow dynamics here.
7979        $foo'bar has already been turned into $foo::bar, so
7980        just check for colons.
7981
7982        if it's a legal name, the OP is a PADANY.
7983     */
7984     if (PL_in_my) {
7985         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
7986             if (has_colon)
7987                 yyerror(Perl_form(aTHX_ "No package name allowed for "
7988                                   "variable %s in \"our\"",
7989                                   PL_tokenbuf));
7990             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7991         }
7992         else {
7993             if (has_colon)
7994                 yyerror(Perl_form(aTHX_ PL_no_myglob,
7995                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7996
7997             pl_yylval.opval = newOP(OP_PADANY, 0);
7998             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7999             return PRIVATEREF;
8000         }
8001     }
8002
8003     /*
8004        build the ops for accesses to a my() variable.
8005
8006        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8007        then used in a comparison.  This catches most, but not
8008        all cases.  For instance, it catches
8009            sort { my($a); $a <=> $b }
8010        but not
8011            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8012        (although why you'd do that is anyone's guess).
8013     */
8014
8015     if (!has_colon) {
8016         if (!PL_in_my)
8017             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8018         if (tmp != NOT_IN_PAD) {
8019             /* might be an "our" variable" */
8020             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8021                 /* build ops for a bareword */
8022                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8023                 HEK * const stashname = HvNAME_HEK(stash);
8024                 SV *  const sym = newSVhek(stashname);
8025                 sv_catpvs(sym, "::");
8026                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8027                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8028                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8029                 gv_fetchsv(sym,
8030                     (PL_in_eval
8031                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8032                         : GV_ADDMULTI
8033                     ),
8034                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8035                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8036                      : SVt_PVHV));
8037                 return WORD;
8038             }
8039
8040             /* if it's a sort block and they're naming $a or $b */
8041             if (PL_last_lop_op == OP_SORT &&
8042                 PL_tokenbuf[0] == '$' &&
8043                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8044                 && !PL_tokenbuf[2])
8045             {
8046                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8047                      d < PL_bufend && *d != '\n';
8048                      d++)
8049                 {
8050                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8051                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8052                               PL_tokenbuf);
8053                     }
8054                 }
8055             }
8056
8057             pl_yylval.opval = newOP(OP_PADANY, 0);
8058             pl_yylval.opval->op_targ = tmp;
8059             return PRIVATEREF;
8060         }
8061     }
8062
8063     /*
8064        Whine if they've said @foo in a doublequoted string,
8065        and @foo isn't a variable we can find in the symbol
8066        table.
8067     */
8068     if (ckWARN(WARN_AMBIGUOUS) &&
8069         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8070         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8071                                          SVt_PVAV);
8072         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8073                 /* DO NOT warn for @- and @+ */
8074                 && !( PL_tokenbuf[2] == '\0' &&
8075                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8076            )
8077         {
8078             /* Downgraded from fatal to warning 20000522 mjd */
8079             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8080                         "Possible unintended interpolation of %s in string",
8081                         PL_tokenbuf);
8082         }
8083     }
8084
8085     /* build ops for a bareword */
8086     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8087                                                       tokenbuf_len - 1));
8088     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8089     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8090                      PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8091                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8092                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8093                       : SVt_PVHV));
8094     return WORD;
8095 }
8096
8097 /*
8098  *  The following code was generated by perl_keyword.pl.
8099  */
8100
8101 I32
8102 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
8103 {
8104     dVAR;
8105
8106     PERL_ARGS_ASSERT_KEYWORD;
8107
8108   switch (len)
8109   {
8110     case 1: /* 5 tokens of length 1 */
8111       switch (name[0])
8112       {
8113         case 'm':
8114           {                                       /* m          */
8115             return KEY_m;
8116           }
8117
8118         case 'q':
8119           {                                       /* q          */
8120             return KEY_q;
8121           }
8122
8123         case 's':
8124           {                                       /* s          */
8125             return KEY_s;
8126           }
8127
8128         case 'x':
8129           {                                       /* x          */
8130             return -KEY_x;
8131           }
8132
8133         case 'y':
8134           {                                       /* y          */
8135             return KEY_y;
8136           }
8137
8138         default:
8139           goto unknown;
8140       }
8141
8142     case 2: /* 18 tokens of length 2 */
8143       switch (name[0])
8144       {
8145         case 'd':
8146           if (name[1] == 'o')
8147           {                                       /* do         */
8148             return KEY_do;
8149           }
8150
8151           goto unknown;
8152
8153         case 'e':
8154           if (name[1] == 'q')
8155           {                                       /* eq         */
8156             return -KEY_eq;
8157           }
8158
8159           goto unknown;
8160
8161         case 'g':
8162           switch (name[1])
8163           {
8164             case 'e':
8165               {                                   /* ge         */
8166                 return -KEY_ge;
8167               }
8168
8169             case 't':
8170               {                                   /* gt         */
8171                 return -KEY_gt;
8172               }
8173
8174             default:
8175               goto unknown;
8176           }
8177
8178         case 'i':
8179           if (name[1] == 'f')
8180           {                                       /* if         */
8181             return KEY_if;
8182           }
8183
8184           goto unknown;
8185
8186         case 'l':
8187           switch (name[1])
8188           {
8189             case 'c':
8190               {                                   /* lc         */
8191                 return -KEY_lc;
8192               }
8193
8194             case 'e':
8195               {                                   /* le         */
8196                 return -KEY_le;
8197               }
8198
8199             case 't':
8200               {                                   /* lt         */
8201                 return -KEY_lt;
8202               }
8203
8204             default:
8205               goto unknown;
8206           }
8207
8208         case 'm':
8209           if (name[1] == 'y')
8210           {                                       /* my         */
8211             return KEY_my;
8212           }
8213
8214           goto unknown;
8215
8216         case 'n':
8217           switch (name[1])
8218           {
8219             case 'e':
8220               {                                   /* ne         */
8221                 return -KEY_ne;
8222               }
8223
8224             case 'o':
8225               {                                   /* no         */
8226                 return KEY_no;
8227               }
8228
8229             default:
8230               goto unknown;
8231           }
8232
8233         case 'o':
8234           if (name[1] == 'r')
8235           {                                       /* or         */
8236             return -KEY_or;
8237           }
8238
8239           goto unknown;
8240
8241         case 'q':
8242           switch (name[1])
8243           {
8244             case 'q':
8245               {                                   /* qq         */
8246                 return KEY_qq;
8247               }
8248
8249             case 'r':
8250               {                                   /* qr         */
8251                 return KEY_qr;
8252               }
8253
8254             case 'w':
8255               {                                   /* qw         */
8256                 return KEY_qw;
8257               }
8258
8259             case 'x':
8260               {                                   /* qx         */
8261                 return KEY_qx;
8262               }
8263
8264             default:
8265               goto unknown;
8266           }
8267
8268         case 't':
8269           if (name[1] == 'r')
8270           {                                       /* tr         */
8271             return KEY_tr;
8272           }
8273
8274           goto unknown;
8275
8276         case 'u':
8277           if (name[1] == 'c')
8278           {                                       /* uc         */
8279             return -KEY_uc;
8280           }
8281
8282           goto unknown;
8283
8284         default:
8285           goto unknown;
8286       }
8287
8288     case 3: /* 29 tokens of length 3 */
8289       switch (name[0])
8290       {
8291         case 'E':
8292           if (name[1] == 'N' &&
8293               name[2] == 'D')
8294           {                                       /* END        */
8295             return KEY_END;
8296           }
8297
8298           goto unknown;
8299
8300         case 'a':
8301           switch (name[1])
8302           {
8303             case 'b':
8304               if (name[2] == 's')
8305               {                                   /* abs        */
8306                 return -KEY_abs;
8307               }
8308
8309               goto unknown;
8310
8311             case 'n':
8312               if (name[2] == 'd')
8313               {                                   /* and        */
8314                 return -KEY_and;
8315               }
8316
8317               goto unknown;
8318
8319             default:
8320               goto unknown;
8321           }
8322
8323         case 'c':
8324           switch (name[1])
8325           {
8326             case 'h':
8327               if (name[2] == 'r')
8328               {                                   /* chr        */
8329                 return -KEY_chr;
8330               }
8331
8332               goto unknown;
8333
8334             case 'm':
8335               if (name[2] == 'p')
8336               {                                   /* cmp        */
8337                 return -KEY_cmp;
8338               }
8339
8340               goto unknown;
8341
8342             case 'o':
8343               if (name[2] == 's')
8344               {                                   /* cos        */
8345                 return -KEY_cos;
8346               }
8347
8348               goto unknown;
8349
8350             default:
8351               goto unknown;
8352           }
8353
8354         case 'd':
8355           if (name[1] == 'i' &&
8356               name[2] == 'e')
8357           {                                       /* die        */
8358             return -KEY_die;
8359           }
8360
8361           goto unknown;
8362
8363         case 'e':
8364           switch (name[1])
8365           {
8366             case 'o':
8367               if (name[2] == 'f')
8368               {                                   /* eof        */
8369                 return -KEY_eof;
8370               }
8371
8372               goto unknown;
8373
8374             case 'x':
8375               if (name[2] == 'p')
8376               {                                   /* exp        */
8377                 return -KEY_exp;
8378               }
8379
8380               goto unknown;
8381
8382             default:
8383               goto unknown;
8384           }
8385
8386         case 'f':
8387           if (name[1] == 'o' &&
8388               name[2] == 'r')
8389           {                                       /* for        */
8390             return KEY_for;
8391           }
8392
8393           goto unknown;
8394
8395         case 'h':
8396           if (name[1] == 'e' &&
8397               name[2] == 'x')
8398           {                                       /* hex        */
8399             return -KEY_hex;
8400           }
8401
8402           goto unknown;
8403
8404         case 'i':
8405           if (name[1] == 'n' &&
8406               name[2] == 't')
8407           {                                       /* int        */
8408             return -KEY_int;
8409           }
8410
8411           goto unknown;
8412
8413         case 'l':
8414           if (name[1] == 'o' &&
8415               name[2] == 'g')
8416           {                                       /* log        */
8417             return -KEY_log;
8418           }
8419
8420           goto unknown;
8421
8422         case 'm':
8423           if (name[1] == 'a' &&
8424               name[2] == 'p')
8425           {                                       /* map        */
8426             return KEY_map;
8427           }
8428
8429           goto unknown;
8430
8431         case 'n':
8432           if (name[1] == 'o' &&
8433               name[2] == 't')
8434           {                                       /* not        */
8435             return -KEY_not;
8436           }
8437
8438           goto unknown;
8439
8440         case 'o':
8441           switch (name[1])
8442           {
8443             case 'c':
8444               if (name[2] == 't')
8445               {                                   /* oct        */
8446                 return -KEY_oct;
8447               }
8448
8449               goto unknown;
8450
8451             case 'r':
8452               if (name[2] == 'd')
8453               {                                   /* ord        */
8454                 return -KEY_ord;
8455               }
8456
8457               goto unknown;
8458
8459             case 'u':
8460               if (name[2] == 'r')
8461               {                                   /* our        */
8462                 return KEY_our;
8463               }
8464
8465               goto unknown;
8466
8467             default:
8468               goto unknown;
8469           }
8470
8471         case 'p':
8472           if (name[1] == 'o')
8473           {
8474             switch (name[2])
8475             {
8476               case 'p':
8477                 {                                 /* pop        */
8478                   return -KEY_pop;
8479                 }
8480
8481               case 's':
8482                 {                                 /* pos        */
8483                   return KEY_pos;
8484                 }
8485
8486               default:
8487                 goto unknown;
8488             }
8489           }
8490
8491           goto unknown;
8492
8493         case 'r':
8494           if (name[1] == 'e' &&
8495               name[2] == 'f')
8496           {                                       /* ref        */
8497             return -KEY_ref;
8498           }
8499
8500           goto unknown;
8501
8502         case 's':
8503           switch (name[1])
8504           {
8505             case 'a':
8506               if (name[2] == 'y')
8507               {                                   /* say        */
8508                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8509               }
8510
8511               goto unknown;
8512
8513             case 'i':
8514               if (name[2] == 'n')
8515               {                                   /* sin        */
8516                 return -KEY_sin;
8517               }
8518
8519               goto unknown;
8520
8521             case 'u':
8522               if (name[2] == 'b')
8523               {                                   /* sub        */
8524                 return KEY_sub;
8525               }
8526
8527               goto unknown;
8528
8529             default:
8530               goto unknown;
8531           }
8532
8533         case 't':
8534           if (name[1] == 'i' &&
8535               name[2] == 'e')
8536           {                                       /* tie        */
8537             return -KEY_tie;
8538           }
8539
8540           goto unknown;
8541
8542         case 'u':
8543           if (name[1] == 's' &&
8544               name[2] == 'e')
8545           {                                       /* use        */
8546             return KEY_use;
8547           }
8548
8549           goto unknown;
8550
8551         case 'v':
8552           if (name[1] == 'e' &&
8553               name[2] == 'c')
8554           {                                       /* vec        */
8555             return -KEY_vec;
8556           }
8557
8558           goto unknown;
8559
8560         case 'x':
8561           if (name[1] == 'o' &&
8562               name[2] == 'r')
8563           {                                       /* xor        */
8564             return -KEY_xor;
8565           }
8566
8567           goto unknown;
8568
8569         default:
8570           goto unknown;
8571       }
8572
8573     case 4: /* 41 tokens of length 4 */
8574       switch (name[0])
8575       {
8576         case 'C':
8577           if (name[1] == 'O' &&
8578               name[2] == 'R' &&
8579               name[3] == 'E')
8580           {                                       /* CORE       */
8581             return -KEY_CORE;
8582           }
8583
8584           goto unknown;
8585
8586         case 'I':
8587           if (name[1] == 'N' &&
8588               name[2] == 'I' &&
8589               name[3] == 'T')
8590           {                                       /* INIT       */
8591             return KEY_INIT;
8592           }
8593
8594           goto unknown;
8595
8596         case 'b':
8597           if (name[1] == 'i' &&
8598               name[2] == 'n' &&
8599               name[3] == 'd')
8600           {                                       /* bind       */
8601             return -KEY_bind;
8602           }
8603
8604           goto unknown;
8605
8606         case 'c':
8607           if (name[1] == 'h' &&
8608               name[2] == 'o' &&
8609               name[3] == 'p')
8610           {                                       /* chop       */
8611             return -KEY_chop;
8612           }
8613
8614           goto unknown;
8615
8616         case 'd':
8617           if (name[1] == 'u' &&
8618               name[2] == 'm' &&
8619               name[3] == 'p')
8620           {                                       /* dump       */
8621             return -KEY_dump;
8622           }
8623
8624           goto unknown;
8625
8626         case 'e':
8627           switch (name[1])
8628           {
8629             case 'a':
8630               if (name[2] == 'c' &&
8631                   name[3] == 'h')
8632               {                                   /* each       */
8633                 return -KEY_each;
8634               }
8635
8636               goto unknown;
8637
8638             case 'l':
8639               if (name[2] == 's' &&
8640                   name[3] == 'e')
8641               {                                   /* else       */
8642                 return KEY_else;
8643               }
8644
8645               goto unknown;
8646
8647             case 'v':
8648               if (name[2] == 'a' &&
8649                   name[3] == 'l')
8650               {                                   /* eval       */
8651                 return KEY_eval;
8652               }
8653
8654               goto unknown;
8655
8656             case 'x':
8657               switch (name[2])
8658               {
8659                 case 'e':
8660                   if (name[3] == 'c')
8661                   {                               /* exec       */
8662                     return -KEY_exec;
8663                   }
8664
8665                   goto unknown;
8666
8667                 case 'i':
8668                   if (name[3] == 't')
8669                   {                               /* exit       */
8670                     return -KEY_exit;
8671                   }
8672
8673                   goto unknown;
8674
8675                 default:
8676                   goto unknown;
8677               }
8678
8679             default:
8680               goto unknown;
8681           }
8682
8683         case 'f':
8684           if (name[1] == 'o' &&
8685               name[2] == 'r' &&
8686               name[3] == 'k')
8687           {                                       /* fork       */
8688             return -KEY_fork;
8689           }
8690
8691           goto unknown;
8692
8693         case 'g':
8694           switch (name[1])
8695           {
8696             case 'e':
8697               if (name[2] == 't' &&
8698                   name[3] == 'c')
8699               {                                   /* getc       */
8700                 return -KEY_getc;
8701               }
8702
8703               goto unknown;
8704
8705             case 'l':
8706               if (name[2] == 'o' &&
8707                   name[3] == 'b')
8708               {                                   /* glob       */
8709                 return KEY_glob;
8710               }
8711
8712               goto unknown;
8713
8714             case 'o':
8715               if (name[2] == 't' &&
8716                   name[3] == 'o')
8717               {                                   /* goto       */
8718                 return KEY_goto;
8719               }
8720
8721               goto unknown;
8722
8723             case 'r':
8724               if (name[2] == 'e' &&
8725                   name[3] == 'p')
8726               {                                   /* grep       */
8727                 return KEY_grep;
8728               }
8729
8730               goto unknown;
8731
8732             default:
8733               goto unknown;
8734           }
8735
8736         case 'j':
8737           if (name[1] == 'o' &&
8738               name[2] == 'i' &&
8739               name[3] == 'n')
8740           {                                       /* join       */
8741             return -KEY_join;
8742           }
8743
8744           goto unknown;
8745
8746         case 'k':
8747           switch (name[1])
8748           {
8749             case 'e':
8750               if (name[2] == 'y' &&
8751                   name[3] == 's')
8752               {                                   /* keys       */
8753                 return -KEY_keys;
8754               }
8755
8756               goto unknown;
8757
8758             case 'i':
8759               if (name[2] == 'l' &&
8760                   name[3] == 'l')
8761               {                                   /* kill       */
8762                 return -KEY_kill;
8763               }
8764
8765               goto unknown;
8766
8767             default:
8768               goto unknown;
8769           }
8770
8771         case 'l':
8772           switch (name[1])
8773           {
8774             case 'a':
8775               if (name[2] == 's' &&
8776                   name[3] == 't')
8777               {                                   /* last       */
8778                 return KEY_last;
8779               }
8780
8781               goto unknown;
8782
8783             case 'i':
8784               if (name[2] == 'n' &&
8785                   name[3] == 'k')
8786               {                                   /* link       */
8787                 return -KEY_link;
8788               }
8789
8790               goto unknown;
8791
8792             case 'o':
8793               if (name[2] == 'c' &&
8794                   name[3] == 'k')
8795               {                                   /* lock       */
8796                 return -KEY_lock;
8797               }
8798
8799               goto unknown;
8800
8801             default:
8802               goto unknown;
8803           }
8804
8805         case 'n':
8806           if (name[1] == 'e' &&
8807               name[2] == 'x' &&
8808               name[3] == 't')
8809           {                                       /* next       */
8810             return KEY_next;
8811           }
8812
8813           goto unknown;
8814
8815         case 'o':
8816           if (name[1] == 'p' &&
8817               name[2] == 'e' &&
8818               name[3] == 'n')
8819           {                                       /* open       */
8820             return -KEY_open;
8821           }
8822
8823           goto unknown;
8824
8825         case 'p':
8826           switch (name[1])
8827           {
8828             case 'a':
8829               if (name[2] == 'c' &&
8830                   name[3] == 'k')
8831               {                                   /* pack       */
8832                 return -KEY_pack;
8833               }
8834
8835               goto unknown;
8836
8837             case 'i':
8838               if (name[2] == 'p' &&
8839                   name[3] == 'e')
8840               {                                   /* pipe       */
8841                 return -KEY_pipe;
8842               }
8843
8844               goto unknown;
8845
8846             case 'u':
8847               if (name[2] == 's' &&
8848                   name[3] == 'h')
8849               {                                   /* push       */
8850                 return -KEY_push;
8851               }
8852
8853               goto unknown;
8854
8855             default:
8856               goto unknown;
8857           }
8858
8859         case 'r':
8860           switch (name[1])
8861           {
8862             case 'a':
8863               if (name[2] == 'n' &&
8864                   name[3] == 'd')
8865               {                                   /* rand       */
8866                 return -KEY_rand;
8867               }
8868
8869               goto unknown;
8870
8871             case 'e':
8872               switch (name[2])
8873               {
8874                 case 'a':
8875                   if (name[3] == 'd')
8876                   {                               /* read       */
8877                     return -KEY_read;
8878                   }
8879
8880                   goto unknown;
8881
8882                 case 'c':
8883                   if (name[3] == 'v')
8884                   {                               /* recv       */
8885                     return -KEY_recv;
8886                   }
8887
8888                   goto unknown;
8889
8890                 case 'd':
8891                   if (name[3] == 'o')
8892                   {                               /* redo       */
8893                     return KEY_redo;
8894                   }
8895
8896                   goto unknown;
8897
8898                 default:
8899                   goto unknown;
8900               }
8901
8902             default:
8903               goto unknown;
8904           }
8905
8906         case 's':
8907           switch (name[1])
8908           {
8909             case 'e':
8910               switch (name[2])
8911               {
8912                 case 'e':
8913                   if (name[3] == 'k')
8914                   {                               /* seek       */
8915                     return -KEY_seek;
8916                   }
8917
8918                   goto unknown;
8919
8920                 case 'n':
8921                   if (name[3] == 'd')
8922                   {                               /* send       */
8923                     return -KEY_send;
8924                   }
8925
8926                   goto unknown;
8927
8928                 default:
8929                   goto unknown;
8930               }
8931
8932             case 'o':
8933               if (name[2] == 'r' &&
8934                   name[3] == 't')
8935               {                                   /* sort       */
8936                 return KEY_sort;
8937               }
8938
8939               goto unknown;
8940
8941             case 'q':
8942               if (name[2] == 'r' &&
8943                   name[3] == 't')
8944               {                                   /* sqrt       */
8945                 return -KEY_sqrt;
8946               }
8947
8948               goto unknown;
8949
8950             case 't':
8951               if (name[2] == 'a' &&
8952                   name[3] == 't')
8953               {                                   /* stat       */
8954                 return -KEY_stat;
8955               }
8956
8957               goto unknown;
8958
8959             default:
8960               goto unknown;
8961           }
8962
8963         case 't':
8964           switch (name[1])
8965           {
8966             case 'e':
8967               if (name[2] == 'l' &&
8968                   name[3] == 'l')
8969               {                                   /* tell       */
8970                 return -KEY_tell;
8971               }
8972
8973               goto unknown;
8974
8975             case 'i':
8976               switch (name[2])
8977               {
8978                 case 'e':
8979                   if (name[3] == 'd')
8980                   {                               /* tied       */
8981                     return -KEY_tied;
8982                   }
8983
8984                   goto unknown;
8985
8986                 case 'm':
8987                   if (name[3] == 'e')
8988                   {                               /* time       */
8989                     return -KEY_time;
8990                   }
8991
8992                   goto unknown;
8993
8994                 default:
8995                   goto unknown;
8996               }
8997
8998             default:
8999               goto unknown;
9000           }
9001
9002         case 'w':
9003           switch (name[1])
9004           {
9005             case 'a':
9006               switch (name[2])
9007               {
9008                 case 'i':
9009                   if (name[3] == 't')
9010                   {                               /* wait       */
9011                     return -KEY_wait;
9012                   }
9013
9014                   goto unknown;
9015
9016                 case 'r':
9017                   if (name[3] == 'n')
9018                   {                               /* warn       */
9019                     return -KEY_warn;
9020                   }
9021
9022                   goto unknown;
9023
9024                 default:
9025                   goto unknown;
9026               }
9027
9028             case 'h':
9029               if (name[2] == 'e' &&
9030                   name[3] == 'n')
9031               {                                   /* when       */
9032                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
9033               }
9034
9035               goto unknown;
9036
9037             default:
9038               goto unknown;
9039           }
9040
9041         default:
9042           goto unknown;
9043       }
9044
9045     case 5: /* 39 tokens of length 5 */
9046       switch (name[0])
9047       {
9048         case 'B':
9049           if (name[1] == 'E' &&
9050               name[2] == 'G' &&
9051               name[3] == 'I' &&
9052               name[4] == 'N')
9053           {                                       /* BEGIN      */
9054             return KEY_BEGIN;
9055           }
9056
9057           goto unknown;
9058
9059         case 'C':
9060           if (name[1] == 'H' &&
9061               name[2] == 'E' &&
9062               name[3] == 'C' &&
9063               name[4] == 'K')
9064           {                                       /* CHECK      */
9065             return KEY_CHECK;
9066           }
9067
9068           goto unknown;
9069
9070         case 'a':
9071           switch (name[1])
9072           {
9073             case 'l':
9074               if (name[2] == 'a' &&
9075                   name[3] == 'r' &&
9076                   name[4] == 'm')
9077               {                                   /* alarm      */
9078                 return -KEY_alarm;
9079               }
9080
9081               goto unknown;
9082
9083             case 't':
9084               if (name[2] == 'a' &&
9085                   name[3] == 'n' &&
9086                   name[4] == '2')
9087               {                                   /* atan2      */
9088                 return -KEY_atan2;
9089               }
9090
9091               goto unknown;
9092
9093             default:
9094               goto unknown;
9095           }
9096
9097         case 'b':
9098           switch (name[1])
9099           {
9100             case 'l':
9101               if (name[2] == 'e' &&
9102                   name[3] == 's' &&
9103                   name[4] == 's')
9104               {                                   /* bless      */
9105                 return -KEY_bless;
9106               }
9107
9108               goto unknown;
9109
9110             case 'r':
9111               if (name[2] == 'e' &&
9112                   name[3] == 'a' &&
9113                   name[4] == 'k')
9114               {                                   /* break      */
9115                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
9116               }
9117
9118               goto unknown;
9119
9120             default:
9121               goto unknown;
9122           }
9123
9124         case 'c':
9125           switch (name[1])
9126           {
9127             case 'h':
9128               switch (name[2])
9129               {
9130                 case 'd':
9131                   if (name[3] == 'i' &&
9132                       name[4] == 'r')
9133                   {                               /* chdir      */
9134                     return -KEY_chdir;
9135                   }
9136
9137                   goto unknown;
9138
9139                 case 'm':
9140                   if (name[3] == 'o' &&
9141                       name[4] == 'd')
9142                   {                               /* chmod      */
9143                     return -KEY_chmod;
9144                   }
9145
9146                   goto unknown;
9147
9148                 case 'o':
9149                   switch (name[3])
9150                   {
9151                     case 'm':
9152                       if (name[4] == 'p')
9153                       {                           /* chomp      */
9154                         return -KEY_chomp;
9155                       }
9156
9157                       goto unknown;
9158
9159                     case 'w':
9160                       if (name[4] == 'n')
9161                       {                           /* chown      */
9162                         return -KEY_chown;
9163                       }
9164
9165                       goto unknown;
9166
9167                     default:
9168                       goto unknown;
9169                   }
9170
9171                 default:
9172                   goto unknown;
9173               }
9174
9175             case 'l':
9176               if (name[2] == 'o' &&
9177                   name[3] == 's' &&
9178                   name[4] == 'e')
9179               {                                   /* close      */
9180                 return -KEY_close;
9181               }
9182
9183               goto unknown;
9184
9185             case 'r':
9186               if (name[2] == 'y' &&
9187                   name[3] == 'p' &&
9188                   name[4] == 't')
9189               {                                   /* crypt      */
9190                 return -KEY_crypt;
9191               }
9192
9193               goto unknown;
9194
9195             default:
9196               goto unknown;
9197           }
9198
9199         case 'e':
9200           if (name[1] == 'l' &&
9201               name[2] == 's' &&
9202               name[3] == 'i' &&
9203               name[4] == 'f')
9204           {                                       /* elsif      */
9205             return KEY_elsif;
9206           }
9207
9208           goto unknown;
9209
9210         case 'f':
9211           switch (name[1])
9212           {
9213             case 'c':
9214               if (name[2] == 'n' &&
9215                   name[3] == 't' &&
9216                   name[4] == 'l')
9217               {                                   /* fcntl      */
9218                 return -KEY_fcntl;
9219               }
9220
9221               goto unknown;
9222
9223             case 'l':
9224               if (name[2] == 'o' &&
9225                   name[3] == 'c' &&
9226                   name[4] == 'k')
9227               {                                   /* flock      */
9228                 return -KEY_flock;
9229               }
9230
9231               goto unknown;
9232
9233             default:
9234               goto unknown;
9235           }
9236
9237         case 'g':
9238           if (name[1] == 'i' &&
9239               name[2] == 'v' &&
9240               name[3] == 'e' &&
9241               name[4] == 'n')
9242           {                                       /* given      */
9243             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
9244           }
9245
9246           goto unknown;
9247
9248         case 'i':
9249           switch (name[1])
9250           {
9251             case 'n':
9252               if (name[2] == 'd' &&
9253                   name[3] == 'e' &&
9254                   name[4] == 'x')
9255               {                                   /* index      */
9256                 return -KEY_index;
9257               }
9258
9259               goto unknown;
9260
9261             case 'o':
9262               if (name[2] == 'c' &&
9263                   name[3] == 't' &&
9264                   name[4] == 'l')
9265               {                                   /* ioctl      */
9266                 return -KEY_ioctl;
9267               }
9268
9269               goto unknown;
9270
9271             default:
9272               goto unknown;
9273           }
9274
9275         case 'l':
9276           switch (name[1])
9277           {
9278             case 'o':
9279               if (name[2] == 'c' &&
9280                   name[3] == 'a' &&
9281                   name[4] == 'l')
9282               {                                   /* local      */
9283                 return KEY_local;
9284               }
9285
9286               goto unknown;
9287
9288             case 's':
9289               if (name[2] == 't' &&
9290                   name[3] == 'a' &&
9291                   name[4] == 't')
9292               {                                   /* lstat      */
9293                 return -KEY_lstat;
9294               }
9295
9296               goto unknown;
9297
9298             default:
9299               goto unknown;
9300           }
9301
9302         case 'm':
9303           if (name[1] == 'k' &&
9304               name[2] == 'd' &&
9305               name[3] == 'i' &&
9306               name[4] == 'r')
9307           {                                       /* mkdir      */
9308             return -KEY_mkdir;
9309           }
9310
9311           goto unknown;
9312
9313         case 'p':
9314           if (name[1] == 'r' &&
9315               name[2] == 'i' &&
9316               name[3] == 'n' &&
9317               name[4] == 't')
9318           {                                       /* print      */
9319             return KEY_print;
9320           }
9321
9322           goto unknown;
9323
9324         case 'r':
9325           switch (name[1])
9326           {
9327             case 'e':
9328               if (name[2] == 's' &&
9329                   name[3] == 'e' &&
9330                   name[4] == 't')
9331               {                                   /* reset      */
9332                 return -KEY_reset;
9333               }
9334
9335               goto unknown;
9336
9337             case 'm':
9338               if (name[2] == 'd' &&
9339                   name[3] == 'i' &&
9340                   name[4] == 'r')
9341               {                                   /* rmdir      */
9342                 return -KEY_rmdir;
9343               }
9344
9345               goto unknown;
9346
9347             default:
9348               goto unknown;
9349           }
9350
9351         case 's':
9352           switch (name[1])
9353           {
9354             case 'e':
9355               if (name[2] == 'm' &&
9356                   name[3] == 'o' &&
9357                   name[4] == 'p')
9358               {                                   /* semop      */
9359                 return -KEY_semop;
9360               }
9361
9362               goto unknown;
9363
9364             case 'h':
9365               if (name[2] == 'i' &&
9366                   name[3] == 'f' &&
9367                   name[4] == 't')
9368               {                                   /* shift      */
9369                 return -KEY_shift;
9370               }
9371
9372               goto unknown;
9373
9374             case 'l':
9375               if (name[2] == 'e' &&
9376                   name[3] == 'e' &&
9377                   name[4] == 'p')
9378               {                                   /* sleep      */
9379                 return -KEY_sleep;
9380               }
9381
9382               goto unknown;
9383
9384             case 'p':
9385               if (name[2] == 'l' &&
9386                   name[3] == 'i' &&
9387                   name[4] == 't')
9388               {                                   /* split      */
9389                 return KEY_split;
9390               }
9391
9392               goto unknown;
9393
9394             case 'r':
9395               if (name[2] == 'a' &&
9396                   name[3] == 'n' &&
9397                   name[4] == 'd')
9398               {                                   /* srand      */
9399                 return -KEY_srand;
9400               }
9401
9402               goto unknown;
9403
9404             case 't':
9405               switch (name[2])
9406               {
9407                 case 'a':
9408                   if (name[3] == 't' &&
9409                       name[4] == 'e')
9410                   {                               /* state      */
9411                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9412                   }
9413
9414                   goto unknown;
9415
9416                 case 'u':
9417                   if (name[3] == 'd' &&
9418                       name[4] == 'y')
9419                   {                               /* study      */
9420                     return KEY_study;
9421                   }
9422
9423                   goto unknown;
9424
9425                 default:
9426                   goto unknown;
9427               }
9428
9429             default:
9430               goto unknown;
9431           }
9432
9433         case 't':
9434           if (name[1] == 'i' &&
9435               name[2] == 'm' &&
9436               name[3] == 'e' &&
9437               name[4] == 's')
9438           {                                       /* times      */
9439             return -KEY_times;
9440           }
9441
9442           goto unknown;
9443
9444         case 'u':
9445           switch (name[1])
9446           {
9447             case 'm':
9448               if (name[2] == 'a' &&
9449                   name[3] == 's' &&
9450                   name[4] == 'k')
9451               {                                   /* umask      */
9452                 return -KEY_umask;
9453               }
9454
9455               goto unknown;
9456
9457             case 'n':
9458               switch (name[2])
9459               {
9460                 case 'd':
9461                   if (name[3] == 'e' &&
9462                       name[4] == 'f')
9463                   {                               /* undef      */
9464                     return KEY_undef;
9465                   }
9466
9467                   goto unknown;
9468
9469                 case 't':
9470                   if (name[3] == 'i')
9471                   {
9472                     switch (name[4])
9473                     {
9474                       case 'e':
9475                         {                         /* untie      */
9476                           return -KEY_untie;
9477                         }
9478
9479                       case 'l':
9480                         {                         /* until      */
9481                           return KEY_until;
9482                         }
9483
9484                       default:
9485                         goto unknown;
9486                     }
9487                   }
9488
9489                   goto unknown;
9490
9491                 default:
9492                   goto unknown;
9493               }
9494
9495             case 't':
9496               if (name[2] == 'i' &&
9497                   name[3] == 'm' &&
9498                   name[4] == 'e')
9499               {                                   /* utime      */
9500                 return -KEY_utime;
9501               }
9502
9503               goto unknown;
9504
9505             default:
9506               goto unknown;
9507           }
9508
9509         case 'w':
9510           switch (name[1])
9511           {
9512             case 'h':
9513               if (name[2] == 'i' &&
9514                   name[3] == 'l' &&
9515                   name[4] == 'e')
9516               {                                   /* while      */
9517                 return KEY_while;
9518               }
9519
9520               goto unknown;
9521
9522             case 'r':
9523               if (name[2] == 'i' &&
9524                   name[3] == 't' &&
9525                   name[4] == 'e')
9526               {                                   /* write      */
9527                 return -KEY_write;
9528               }
9529
9530               goto unknown;
9531
9532             default:
9533               goto unknown;
9534           }
9535
9536         default:
9537           goto unknown;
9538       }
9539
9540     case 6: /* 33 tokens of length 6 */
9541       switch (name[0])
9542       {
9543         case 'a':
9544           if (name[1] == 'c' &&
9545               name[2] == 'c' &&
9546               name[3] == 'e' &&
9547               name[4] == 'p' &&
9548               name[5] == 't')
9549           {                                       /* accept     */
9550             return -KEY_accept;
9551           }
9552
9553           goto unknown;
9554
9555         case 'c':
9556           switch (name[1])
9557           {
9558             case 'a':
9559               if (name[2] == 'l' &&
9560                   name[3] == 'l' &&
9561                   name[4] == 'e' &&
9562                   name[5] == 'r')
9563               {                                   /* caller     */
9564                 return -KEY_caller;
9565               }
9566
9567               goto unknown;
9568
9569             case 'h':
9570               if (name[2] == 'r' &&
9571                   name[3] == 'o' &&
9572                   name[4] == 'o' &&
9573                   name[5] == 't')
9574               {                                   /* chroot     */
9575                 return -KEY_chroot;
9576               }
9577
9578               goto unknown;
9579
9580             default:
9581               goto unknown;
9582           }
9583
9584         case 'd':
9585           if (name[1] == 'e' &&
9586               name[2] == 'l' &&
9587               name[3] == 'e' &&
9588               name[4] == 't' &&
9589               name[5] == 'e')
9590           {                                       /* delete     */
9591             return KEY_delete;
9592           }
9593
9594           goto unknown;
9595
9596         case 'e':
9597           switch (name[1])
9598           {
9599             case 'l':
9600               if (name[2] == 's' &&
9601                   name[3] == 'e' &&
9602                   name[4] == 'i' &&
9603                   name[5] == 'f')
9604               {                                   /* elseif     */
9605                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9606               }
9607
9608               goto unknown;
9609
9610             case 'x':
9611               if (name[2] == 'i' &&
9612                   name[3] == 's' &&
9613                   name[4] == 't' &&
9614                   name[5] == 's')
9615               {                                   /* exists     */
9616                 return KEY_exists;
9617               }
9618
9619               goto unknown;
9620
9621             default:
9622               goto unknown;
9623           }
9624
9625         case 'f':
9626           switch (name[1])
9627           {
9628             case 'i':
9629               if (name[2] == 'l' &&
9630                   name[3] == 'e' &&
9631                   name[4] == 'n' &&
9632                   name[5] == 'o')
9633               {                                   /* fileno     */
9634                 return -KEY_fileno;
9635               }
9636
9637               goto unknown;
9638
9639             case 'o':
9640               if (name[2] == 'r' &&
9641                   name[3] == 'm' &&
9642                   name[4] == 'a' &&
9643                   name[5] == 't')
9644               {                                   /* format     */
9645                 return KEY_format;
9646               }
9647
9648               goto unknown;
9649
9650             default:
9651               goto unknown;
9652           }
9653
9654         case 'g':
9655           if (name[1] == 'm' &&
9656               name[2] == 't' &&
9657               name[3] == 'i' &&
9658               name[4] == 'm' &&
9659               name[5] == 'e')
9660           {                                       /* gmtime     */
9661             return -KEY_gmtime;
9662           }
9663
9664           goto unknown;
9665
9666         case 'l':
9667           switch (name[1])
9668           {
9669             case 'e':
9670               if (name[2] == 'n' &&
9671                   name[3] == 'g' &&
9672                   name[4] == 't' &&
9673                   name[5] == 'h')
9674               {                                   /* length     */
9675                 return -KEY_length;
9676               }
9677
9678               goto unknown;
9679
9680             case 'i':
9681               if (name[2] == 's' &&
9682                   name[3] == 't' &&
9683                   name[4] == 'e' &&
9684                   name[5] == 'n')
9685               {                                   /* listen     */
9686                 return -KEY_listen;
9687               }
9688
9689               goto unknown;
9690
9691             default:
9692               goto unknown;
9693           }
9694
9695         case 'm':
9696           if (name[1] == 's' &&
9697               name[2] == 'g')
9698           {
9699             switch (name[3])
9700             {
9701               case 'c':
9702                 if (name[4] == 't' &&
9703                     name[5] == 'l')
9704                 {                                 /* msgctl     */
9705                   return -KEY_msgctl;
9706                 }
9707
9708                 goto unknown;
9709
9710               case 'g':
9711                 if (name[4] == 'e' &&
9712                     name[5] == 't')
9713                 {                                 /* msgget     */
9714                   return -KEY_msgget;
9715                 }
9716
9717                 goto unknown;
9718
9719               case 'r':
9720                 if (name[4] == 'c' &&
9721                     name[5] == 'v')
9722                 {                                 /* msgrcv     */
9723                   return -KEY_msgrcv;
9724                 }
9725
9726                 goto unknown;
9727
9728               case 's':
9729                 if (name[4] == 'n' &&
9730                     name[5] == 'd')
9731                 {                                 /* msgsnd     */
9732                   return -KEY_msgsnd;
9733                 }
9734
9735                 goto unknown;
9736
9737               default:
9738                 goto unknown;
9739             }
9740           }
9741
9742           goto unknown;
9743
9744         case 'p':
9745           if (name[1] == 'r' &&
9746               name[2] == 'i' &&
9747               name[3] == 'n' &&
9748               name[4] == 't' &&
9749               name[5] == 'f')
9750           {                                       /* printf     */
9751             return KEY_printf;
9752           }
9753
9754           goto unknown;
9755
9756         case 'r':
9757           switch (name[1])
9758           {
9759             case 'e':
9760               switch (name[2])
9761               {
9762                 case 'n':
9763                   if (name[3] == 'a' &&
9764                       name[4] == 'm' &&
9765                       name[5] == 'e')
9766                   {                               /* rename     */
9767                     return -KEY_rename;
9768                   }
9769
9770                   goto unknown;
9771
9772                 case 't':
9773                   if (name[3] == 'u' &&
9774                       name[4] == 'r' &&
9775                       name[5] == 'n')
9776                   {                               /* return     */
9777                     return KEY_return;
9778                   }
9779
9780                   goto unknown;
9781
9782                 default:
9783                   goto unknown;
9784               }
9785
9786             case 'i':
9787               if (name[2] == 'n' &&
9788                   name[3] == 'd' &&
9789                   name[4] == 'e' &&
9790                   name[5] == 'x')
9791               {                                   /* rindex     */
9792                 return -KEY_rindex;
9793               }
9794
9795               goto unknown;
9796
9797             default:
9798               goto unknown;
9799           }
9800
9801         case 's':
9802           switch (name[1])
9803           {
9804             case 'c':
9805               if (name[2] == 'a' &&
9806                   name[3] == 'l' &&
9807                   name[4] == 'a' &&
9808                   name[5] == 'r')
9809               {                                   /* scalar     */
9810                 return KEY_scalar;
9811               }
9812
9813               goto unknown;
9814
9815             case 'e':
9816               switch (name[2])
9817               {
9818                 case 'l':
9819                   if (name[3] == 'e' &&
9820                       name[4] == 'c' &&
9821                       name[5] == 't')
9822                   {                               /* select     */
9823                     return -KEY_select;
9824                   }
9825
9826                   goto unknown;
9827
9828                 case 'm':
9829                   switch (name[3])
9830                   {
9831                     case 'c':
9832                       if (name[4] == 't' &&
9833                           name[5] == 'l')
9834                       {                           /* semctl     */
9835                         return -KEY_semctl;
9836                       }
9837
9838                       goto unknown;
9839
9840                     case 'g':
9841                       if (name[4] == 'e' &&
9842                           name[5] == 't')
9843                       {                           /* semget     */
9844                         return -KEY_semget;
9845                       }
9846
9847                       goto unknown;
9848
9849                     default:
9850                       goto unknown;
9851                   }
9852
9853                 default:
9854                   goto unknown;
9855               }
9856
9857             case 'h':
9858               if (name[2] == 'm')
9859               {
9860                 switch (name[3])
9861                 {
9862                   case 'c':
9863                     if (name[4] == 't' &&
9864                         name[5] == 'l')
9865                     {                             /* shmctl     */
9866                       return -KEY_shmctl;
9867                     }
9868
9869                     goto unknown;
9870
9871                   case 'g':
9872                     if (name[4] == 'e' &&
9873                         name[5] == 't')
9874                     {                             /* shmget     */
9875                       return -KEY_shmget;
9876                     }
9877
9878                     goto unknown;
9879
9880                   default:
9881                     goto unknown;
9882                 }
9883               }
9884
9885               goto unknown;
9886
9887             case 'o':
9888               if (name[2] == 'c' &&
9889                   name[3] == 'k' &&
9890                   name[4] == 'e' &&
9891                   name[5] == 't')
9892               {                                   /* socket     */
9893                 return -KEY_socket;
9894               }
9895
9896               goto unknown;
9897
9898             case 'p':
9899               if (name[2] == 'l' &&
9900                   name[3] == 'i' &&
9901                   name[4] == 'c' &&
9902                   name[5] == 'e')
9903               {                                   /* splice     */
9904                 return -KEY_splice;
9905               }
9906
9907               goto unknown;
9908
9909             case 'u':
9910               if (name[2] == 'b' &&
9911                   name[3] == 's' &&
9912                   name[4] == 't' &&
9913                   name[5] == 'r')
9914               {                                   /* substr     */
9915                 return -KEY_substr;
9916               }
9917
9918               goto unknown;
9919
9920             case 'y':
9921               if (name[2] == 's' &&
9922                   name[3] == 't' &&
9923                   name[4] == 'e' &&
9924                   name[5] == 'm')
9925               {                                   /* system     */
9926                 return -KEY_system;
9927               }
9928
9929               goto unknown;
9930
9931             default:
9932               goto unknown;
9933           }
9934
9935         case 'u':
9936           if (name[1] == 'n')
9937           {
9938             switch (name[2])
9939             {
9940               case 'l':
9941                 switch (name[3])
9942                 {
9943                   case 'e':
9944                     if (name[4] == 's' &&
9945                         name[5] == 's')
9946                     {                             /* unless     */
9947                       return KEY_unless;
9948                     }
9949
9950                     goto unknown;
9951
9952                   case 'i':
9953                     if (name[4] == 'n' &&
9954                         name[5] == 'k')
9955                     {                             /* unlink     */
9956                       return -KEY_unlink;
9957                     }
9958
9959                     goto unknown;
9960
9961                   default:
9962                     goto unknown;
9963                 }
9964
9965               case 'p':
9966                 if (name[3] == 'a' &&
9967                     name[4] == 'c' &&
9968                     name[5] == 'k')
9969                 {                                 /* unpack     */
9970                   return -KEY_unpack;
9971                 }
9972
9973                 goto unknown;
9974
9975               default:
9976                 goto unknown;
9977             }
9978           }
9979
9980           goto unknown;
9981
9982         case 'v':
9983           if (name[1] == 'a' &&
9984               name[2] == 'l' &&
9985               name[3] == 'u' &&
9986               name[4] == 'e' &&
9987               name[5] == 's')
9988           {                                       /* values     */
9989             return -KEY_values;
9990           }
9991
9992           goto unknown;
9993
9994         default:
9995           goto unknown;
9996       }
9997
9998     case 7: /* 29 tokens of length 7 */
9999       switch (name[0])
10000       {
10001         case 'D':
10002           if (name[1] == 'E' &&
10003               name[2] == 'S' &&
10004               name[3] == 'T' &&
10005               name[4] == 'R' &&
10006               name[5] == 'O' &&
10007               name[6] == 'Y')
10008           {                                       /* DESTROY    */
10009             return KEY_DESTROY;
10010           }
10011
10012           goto unknown;
10013
10014         case '_':
10015           if (name[1] == '_' &&
10016               name[2] == 'E' &&
10017               name[3] == 'N' &&
10018               name[4] == 'D' &&
10019               name[5] == '_' &&
10020               name[6] == '_')
10021           {                                       /* __END__    */
10022             return KEY___END__;
10023           }
10024
10025           goto unknown;
10026
10027         case 'b':
10028           if (name[1] == 'i' &&
10029               name[2] == 'n' &&
10030               name[3] == 'm' &&
10031               name[4] == 'o' &&
10032               name[5] == 'd' &&
10033               name[6] == 'e')
10034           {                                       /* binmode    */
10035             return -KEY_binmode;
10036           }
10037
10038           goto unknown;
10039
10040         case 'c':
10041           if (name[1] == 'o' &&
10042               name[2] == 'n' &&
10043               name[3] == 'n' &&
10044               name[4] == 'e' &&
10045               name[5] == 'c' &&
10046               name[6] == 't')
10047           {                                       /* connect    */
10048             return -KEY_connect;
10049           }
10050
10051           goto unknown;
10052
10053         case 'd':
10054           switch (name[1])
10055           {
10056             case 'b':
10057               if (name[2] == 'm' &&
10058                   name[3] == 'o' &&
10059                   name[4] == 'p' &&
10060                   name[5] == 'e' &&
10061                   name[6] == 'n')
10062               {                                   /* dbmopen    */
10063                 return -KEY_dbmopen;
10064               }
10065
10066               goto unknown;
10067
10068             case 'e':
10069               if (name[2] == 'f')
10070               {
10071                 switch (name[3])
10072                 {
10073                   case 'a':
10074                     if (name[4] == 'u' &&
10075                         name[5] == 'l' &&
10076                         name[6] == 't')
10077                     {                             /* default    */
10078                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
10079                     }
10080
10081                     goto unknown;
10082
10083                   case 'i':
10084                     if (name[4] == 'n' &&
10085                         name[5] == 'e' &&
10086                         name[6] == 'd')
10087                     {                             /* defined    */
10088                       return KEY_defined;
10089                     }
10090
10091                     goto unknown;
10092
10093                   default:
10094                     goto unknown;
10095                 }
10096               }
10097
10098               goto unknown;
10099
10100             default:
10101               goto unknown;
10102           }
10103
10104         case 'f':
10105           if (name[1] == 'o' &&
10106               name[2] == 'r' &&
10107               name[3] == 'e' &&
10108               name[4] == 'a' &&
10109               name[5] == 'c' &&
10110               name[6] == 'h')
10111           {                                       /* foreach    */
10112             return KEY_foreach;
10113           }
10114
10115           goto unknown;
10116
10117         case 'g':
10118           if (name[1] == 'e' &&
10119               name[2] == 't' &&
10120               name[3] == 'p')
10121           {
10122             switch (name[4])
10123             {
10124               case 'g':
10125                 if (name[5] == 'r' &&
10126                     name[6] == 'p')
10127                 {                                 /* getpgrp    */
10128                   return -KEY_getpgrp;
10129                 }
10130
10131                 goto unknown;
10132
10133               case 'p':
10134                 if (name[5] == 'i' &&
10135                     name[6] == 'd')
10136                 {                                 /* getppid    */
10137                   return -KEY_getppid;
10138                 }
10139
10140                 goto unknown;
10141
10142               default:
10143                 goto unknown;
10144             }
10145           }
10146
10147           goto unknown;
10148
10149         case 'l':
10150           if (name[1] == 'c' &&
10151               name[2] == 'f' &&
10152               name[3] == 'i' &&
10153               name[4] == 'r' &&
10154               name[5] == 's' &&
10155               name[6] == 't')
10156           {                                       /* lcfirst    */
10157             return -KEY_lcfirst;
10158           }
10159
10160           goto unknown;
10161
10162         case 'o':
10163           if (name[1] == 'p' &&
10164               name[2] == 'e' &&
10165               name[3] == 'n' &&
10166               name[4] == 'd' &&
10167               name[5] == 'i' &&
10168               name[6] == 'r')
10169           {                                       /* opendir    */
10170             return -KEY_opendir;
10171           }
10172
10173           goto unknown;
10174
10175         case 'p':
10176           if (name[1] == 'a' &&
10177               name[2] == 'c' &&
10178               name[3] == 'k' &&
10179               name[4] == 'a' &&
10180               name[5] == 'g' &&
10181               name[6] == 'e')
10182           {                                       /* package    */
10183             return KEY_package;
10184           }
10185
10186           goto unknown;
10187
10188         case 'r':
10189           if (name[1] == 'e')
10190           {
10191             switch (name[2])
10192             {
10193               case 'a':
10194                 if (name[3] == 'd' &&
10195                     name[4] == 'd' &&
10196                     name[5] == 'i' &&
10197                     name[6] == 'r')
10198                 {                                 /* readdir    */
10199                   return -KEY_readdir;
10200                 }
10201
10202                 goto unknown;
10203
10204               case 'q':
10205                 if (name[3] == 'u' &&
10206                     name[4] == 'i' &&
10207                     name[5] == 'r' &&
10208                     name[6] == 'e')
10209                 {                                 /* require    */
10210                   return KEY_require;
10211                 }
10212
10213                 goto unknown;
10214
10215               case 'v':
10216                 if (name[3] == 'e' &&
10217                     name[4] == 'r' &&
10218                     name[5] == 's' &&
10219                     name[6] == 'e')
10220                 {                                 /* reverse    */
10221                   return -KEY_reverse;
10222                 }
10223
10224                 goto unknown;
10225
10226               default:
10227                 goto unknown;
10228             }
10229           }
10230
10231           goto unknown;
10232
10233         case 's':
10234           switch (name[1])
10235           {
10236             case 'e':
10237               switch (name[2])
10238               {
10239                 case 'e':
10240                   if (name[3] == 'k' &&
10241                       name[4] == 'd' &&
10242                       name[5] == 'i' &&
10243                       name[6] == 'r')
10244                   {                               /* seekdir    */
10245                     return -KEY_seekdir;
10246                   }
10247
10248                   goto unknown;
10249
10250                 case 't':
10251                   if (name[3] == 'p' &&
10252                       name[4] == 'g' &&
10253                       name[5] == 'r' &&
10254                       name[6] == 'p')
10255                   {                               /* setpgrp    */
10256                     return -KEY_setpgrp;
10257                   }
10258
10259                   goto unknown;
10260
10261                 default:
10262                   goto unknown;
10263               }
10264
10265             case 'h':
10266               if (name[2] == 'm' &&
10267                   name[3] == 'r' &&
10268                   name[4] == 'e' &&
10269                   name[5] == 'a' &&
10270                   name[6] == 'd')
10271               {                                   /* shmread    */
10272                 return -KEY_shmread;
10273               }
10274
10275               goto unknown;
10276
10277             case 'p':
10278               if (name[2] == 'r' &&
10279                   name[3] == 'i' &&
10280                   name[4] == 'n' &&
10281                   name[5] == 't' &&
10282                   name[6] == 'f')
10283               {                                   /* sprintf    */
10284                 return -KEY_sprintf;
10285               }
10286
10287               goto unknown;
10288
10289             case 'y':
10290               switch (name[2])
10291               {
10292                 case 'm':
10293                   if (name[3] == 'l' &&
10294                       name[4] == 'i' &&
10295                       name[5] == 'n' &&
10296                       name[6] == 'k')
10297                   {                               /* symlink    */
10298                     return -KEY_symlink;
10299                   }
10300
10301                   goto unknown;
10302
10303                 case 's':
10304                   switch (name[3])
10305                   {
10306                     case 'c':
10307                       if (name[4] == 'a' &&
10308                           name[5] == 'l' &&
10309                           name[6] == 'l')
10310                       {                           /* syscall    */
10311                         return -KEY_syscall;
10312                       }
10313
10314                       goto unknown;
10315
10316                     case 'o':
10317                       if (name[4] == 'p' &&
10318                           name[5] == 'e' &&
10319                           name[6] == 'n')
10320                       {                           /* sysopen    */
10321                         return -KEY_sysopen;
10322                       }
10323
10324                       goto unknown;
10325
10326                     case 'r':
10327                       if (name[4] == 'e' &&
10328                           name[5] == 'a' &&
10329                           name[6] == 'd')
10330                       {                           /* sysread    */
10331                         return -KEY_sysread;
10332                       }
10333
10334                       goto unknown;
10335
10336                     case 's':
10337                       if (name[4] == 'e' &&
10338                           name[5] == 'e' &&
10339                           name[6] == 'k')
10340                       {                           /* sysseek    */
10341                         return -KEY_sysseek;
10342                       }
10343
10344                       goto unknown;
10345
10346                     default:
10347                       goto unknown;
10348                   }
10349
10350                 default:
10351                   goto unknown;
10352               }
10353
10354             default:
10355               goto unknown;
10356           }
10357
10358         case 't':
10359           if (name[1] == 'e' &&
10360               name[2] == 'l' &&
10361               name[3] == 'l' &&
10362               name[4] == 'd' &&
10363               name[5] == 'i' &&
10364               name[6] == 'r')
10365           {                                       /* telldir    */
10366             return -KEY_telldir;
10367           }
10368
10369           goto unknown;
10370
10371         case 'u':
10372           switch (name[1])
10373           {
10374             case 'c':
10375               if (name[2] == 'f' &&
10376                   name[3] == 'i' &&
10377                   name[4] == 'r' &&
10378                   name[5] == 's' &&
10379                   name[6] == 't')
10380               {                                   /* ucfirst    */
10381                 return -KEY_ucfirst;
10382               }
10383
10384               goto unknown;
10385
10386             case 'n':
10387               if (name[2] == 's' &&
10388                   name[3] == 'h' &&
10389                   name[4] == 'i' &&
10390                   name[5] == 'f' &&
10391                   name[6] == 't')
10392               {                                   /* unshift    */
10393                 return -KEY_unshift;
10394               }
10395
10396               goto unknown;
10397
10398             default:
10399               goto unknown;
10400           }
10401
10402         case 'w':
10403           if (name[1] == 'a' &&
10404               name[2] == 'i' &&
10405               name[3] == 't' &&
10406               name[4] == 'p' &&
10407               name[5] == 'i' &&
10408               name[6] == 'd')
10409           {                                       /* waitpid    */
10410             return -KEY_waitpid;
10411           }
10412
10413           goto unknown;
10414
10415         default:
10416           goto unknown;
10417       }
10418
10419     case 8: /* 26 tokens of length 8 */
10420       switch (name[0])
10421       {
10422         case 'A':
10423           if (name[1] == 'U' &&
10424               name[2] == 'T' &&
10425               name[3] == 'O' &&
10426               name[4] == 'L' &&
10427               name[5] == 'O' &&
10428               name[6] == 'A' &&
10429               name[7] == 'D')
10430           {                                       /* AUTOLOAD   */
10431             return KEY_AUTOLOAD;
10432           }
10433
10434           goto unknown;
10435
10436         case '_':
10437           if (name[1] == '_')
10438           {
10439             switch (name[2])
10440             {
10441               case 'D':
10442                 if (name[3] == 'A' &&
10443                     name[4] == 'T' &&
10444                     name[5] == 'A' &&
10445                     name[6] == '_' &&
10446                     name[7] == '_')
10447                 {                                 /* __DATA__   */
10448                   return KEY___DATA__;
10449                 }
10450
10451                 goto unknown;
10452
10453               case 'F':
10454                 if (name[3] == 'I' &&
10455                     name[4] == 'L' &&
10456                     name[5] == 'E' &&
10457                     name[6] == '_' &&
10458                     name[7] == '_')
10459                 {                                 /* __FILE__   */
10460                   return -KEY___FILE__;
10461                 }
10462
10463                 goto unknown;
10464
10465               case 'L':
10466                 if (name[3] == 'I' &&
10467                     name[4] == 'N' &&
10468                     name[5] == 'E' &&
10469                     name[6] == '_' &&
10470                     name[7] == '_')
10471                 {                                 /* __LINE__   */
10472                   return -KEY___LINE__;
10473                 }
10474
10475                 goto unknown;
10476
10477               default:
10478                 goto unknown;
10479             }
10480           }
10481
10482           goto unknown;
10483
10484         case 'c':
10485           switch (name[1])
10486           {
10487             case 'l':
10488               if (name[2] == 'o' &&
10489                   name[3] == 's' &&
10490                   name[4] == 'e' &&
10491                   name[5] == 'd' &&
10492                   name[6] == 'i' &&
10493                   name[7] == 'r')
10494               {                                   /* closedir   */
10495                 return -KEY_closedir;
10496               }
10497
10498               goto unknown;
10499
10500             case 'o':
10501               if (name[2] == 'n' &&
10502                   name[3] == 't' &&
10503                   name[4] == 'i' &&
10504                   name[5] == 'n' &&
10505                   name[6] == 'u' &&
10506                   name[7] == 'e')
10507               {                                   /* continue   */
10508                 return -KEY_continue;
10509               }
10510
10511               goto unknown;
10512
10513             default:
10514               goto unknown;
10515           }
10516
10517         case 'd':
10518           if (name[1] == 'b' &&
10519               name[2] == 'm' &&
10520               name[3] == 'c' &&
10521               name[4] == 'l' &&
10522               name[5] == 'o' &&
10523               name[6] == 's' &&
10524               name[7] == 'e')
10525           {                                       /* dbmclose   */
10526             return -KEY_dbmclose;
10527           }
10528
10529           goto unknown;
10530
10531         case 'e':
10532           if (name[1] == 'n' &&
10533               name[2] == 'd')
10534           {
10535             switch (name[3])
10536             {
10537               case 'g':
10538                 if (name[4] == 'r' &&
10539                     name[5] == 'e' &&
10540                     name[6] == 'n' &&
10541                     name[7] == 't')
10542                 {                                 /* endgrent   */
10543                   return -KEY_endgrent;
10544                 }
10545
10546                 goto unknown;
10547
10548               case 'p':
10549                 if (name[4] == 'w' &&
10550                     name[5] == 'e' &&
10551                     name[6] == 'n' &&
10552                     name[7] == 't')
10553                 {                                 /* endpwent   */
10554                   return -KEY_endpwent;
10555                 }
10556
10557                 goto unknown;
10558
10559               default:
10560                 goto unknown;
10561             }
10562           }
10563
10564           goto unknown;
10565
10566         case 'f':
10567           if (name[1] == 'o' &&
10568               name[2] == 'r' &&
10569               name[3] == 'm' &&
10570               name[4] == 'l' &&
10571               name[5] == 'i' &&
10572               name[6] == 'n' &&
10573               name[7] == 'e')
10574           {                                       /* formline   */
10575             return -KEY_formline;
10576           }
10577
10578           goto unknown;
10579
10580         case 'g':
10581           if (name[1] == 'e' &&
10582               name[2] == 't')
10583           {
10584             switch (name[3])
10585             {
10586               case 'g':
10587                 if (name[4] == 'r')
10588                 {
10589                   switch (name[5])
10590                   {
10591                     case 'e':
10592                       if (name[6] == 'n' &&
10593                           name[7] == 't')
10594                       {                           /* getgrent   */
10595                         return -KEY_getgrent;
10596                       }
10597
10598                       goto unknown;
10599
10600                     case 'g':
10601                       if (name[6] == 'i' &&
10602                           name[7] == 'd')
10603                       {                           /* getgrgid   */
10604                         return -KEY_getgrgid;
10605                       }
10606
10607                       goto unknown;
10608
10609                     case 'n':
10610                       if (name[6] == 'a' &&
10611                           name[7] == 'm')
10612                       {                           /* getgrnam   */
10613                         return -KEY_getgrnam;
10614                       }
10615
10616                       goto unknown;
10617
10618                     default:
10619                       goto unknown;
10620                   }
10621                 }
10622
10623                 goto unknown;
10624
10625               case 'l':
10626                 if (name[4] == 'o' &&
10627                     name[5] == 'g' &&
10628                     name[6] == 'i' &&
10629                     name[7] == 'n')
10630                 {                                 /* getlogin   */
10631                   return -KEY_getlogin;
10632                 }
10633
10634                 goto unknown;
10635
10636               case 'p':
10637                 if (name[4] == 'w')
10638                 {
10639                   switch (name[5])
10640                   {
10641                     case 'e':
10642                       if (name[6] == 'n' &&
10643                           name[7] == 't')
10644                       {                           /* getpwent   */
10645                         return -KEY_getpwent;
10646                       }
10647
10648                       goto unknown;
10649
10650                     case 'n':
10651                       if (name[6] == 'a' &&
10652                           name[7] == 'm')
10653                       {                           /* getpwnam   */
10654                         return -KEY_getpwnam;
10655                       }
10656
10657                       goto unknown;
10658
10659                     case 'u':
10660                       if (name[6] == 'i' &&
10661                           name[7] == 'd')
10662                       {                           /* getpwuid   */
10663                         return -KEY_getpwuid;
10664                       }
10665
10666                       goto unknown;
10667
10668                     default:
10669                       goto unknown;
10670                   }
10671                 }
10672
10673                 goto unknown;
10674
10675               default:
10676                 goto unknown;
10677             }
10678           }
10679
10680           goto unknown;
10681
10682         case 'r':
10683           if (name[1] == 'e' &&
10684               name[2] == 'a' &&
10685               name[3] == 'd')
10686           {
10687             switch (name[4])
10688             {
10689               case 'l':
10690                 if (name[5] == 'i' &&
10691                     name[6] == 'n')
10692                 {
10693                   switch (name[7])
10694                   {
10695                     case 'e':
10696                       {                           /* readline   */
10697                         return -KEY_readline;
10698                       }
10699
10700                     case 'k':
10701                       {                           /* readlink   */
10702                         return -KEY_readlink;
10703                       }
10704
10705                     default:
10706                       goto unknown;
10707                   }
10708                 }
10709
10710                 goto unknown;
10711
10712               case 'p':
10713                 if (name[5] == 'i' &&
10714                     name[6] == 'p' &&
10715                     name[7] == 'e')
10716                 {                                 /* readpipe   */
10717                   return -KEY_readpipe;
10718                 }
10719
10720                 goto unknown;
10721
10722               default:
10723                 goto unknown;
10724             }
10725           }
10726
10727           goto unknown;
10728
10729         case 's':
10730           switch (name[1])
10731           {
10732             case 'e':
10733               if (name[2] == 't')
10734               {
10735                 switch (name[3])
10736                 {
10737                   case 'g':
10738                     if (name[4] == 'r' &&
10739                         name[5] == 'e' &&
10740                         name[6] == 'n' &&
10741                         name[7] == 't')
10742                     {                             /* setgrent   */
10743                       return -KEY_setgrent;
10744                     }
10745
10746                     goto unknown;
10747
10748                   case 'p':
10749                     if (name[4] == 'w' &&
10750                         name[5] == 'e' &&
10751                         name[6] == 'n' &&
10752                         name[7] == 't')
10753                     {                             /* setpwent   */
10754                       return -KEY_setpwent;
10755                     }
10756
10757                     goto unknown;
10758
10759                   default:
10760                     goto unknown;
10761                 }
10762               }
10763
10764               goto unknown;
10765
10766             case 'h':
10767               switch (name[2])
10768               {
10769                 case 'm':
10770                   if (name[3] == 'w' &&
10771                       name[4] == 'r' &&
10772                       name[5] == 'i' &&
10773                       name[6] == 't' &&
10774                       name[7] == 'e')
10775                   {                               /* shmwrite   */
10776                     return -KEY_shmwrite;
10777                   }
10778
10779                   goto unknown;
10780
10781                 case 'u':
10782                   if (name[3] == 't' &&
10783                       name[4] == 'd' &&
10784                       name[5] == 'o' &&
10785                       name[6] == 'w' &&
10786                       name[7] == 'n')
10787                   {                               /* shutdown   */
10788                     return -KEY_shutdown;
10789                   }
10790
10791                   goto unknown;
10792
10793                 default:
10794                   goto unknown;
10795               }
10796
10797             case 'y':
10798               if (name[2] == 's' &&
10799                   name[3] == 'w' &&
10800                   name[4] == 'r' &&
10801                   name[5] == 'i' &&
10802                   name[6] == 't' &&
10803                   name[7] == 'e')
10804               {                                   /* syswrite   */
10805                 return -KEY_syswrite;
10806               }
10807
10808               goto unknown;
10809
10810             default:
10811               goto unknown;
10812           }
10813
10814         case 't':
10815           if (name[1] == 'r' &&
10816               name[2] == 'u' &&
10817               name[3] == 'n' &&
10818               name[4] == 'c' &&
10819               name[5] == 'a' &&
10820               name[6] == 't' &&
10821               name[7] == 'e')
10822           {                                       /* truncate   */
10823             return -KEY_truncate;
10824           }
10825
10826           goto unknown;
10827
10828         default:
10829           goto unknown;
10830       }
10831
10832     case 9: /* 9 tokens of length 9 */
10833       switch (name[0])
10834       {
10835         case 'U':
10836           if (name[1] == 'N' &&
10837               name[2] == 'I' &&
10838               name[3] == 'T' &&
10839               name[4] == 'C' &&
10840               name[5] == 'H' &&
10841               name[6] == 'E' &&
10842               name[7] == 'C' &&
10843               name[8] == 'K')
10844           {                                       /* UNITCHECK  */
10845             return KEY_UNITCHECK;
10846           }
10847
10848           goto unknown;
10849
10850         case 'e':
10851           if (name[1] == 'n' &&
10852               name[2] == 'd' &&
10853               name[3] == 'n' &&
10854               name[4] == 'e' &&
10855               name[5] == 't' &&
10856               name[6] == 'e' &&
10857               name[7] == 'n' &&
10858               name[8] == 't')
10859           {                                       /* endnetent  */
10860             return -KEY_endnetent;
10861           }
10862
10863           goto unknown;
10864
10865         case 'g':
10866           if (name[1] == 'e' &&
10867               name[2] == 't' &&
10868               name[3] == 'n' &&
10869               name[4] == 'e' &&
10870               name[5] == 't' &&
10871               name[6] == 'e' &&
10872               name[7] == 'n' &&
10873               name[8] == 't')
10874           {                                       /* getnetent  */
10875             return -KEY_getnetent;
10876           }
10877
10878           goto unknown;
10879
10880         case 'l':
10881           if (name[1] == 'o' &&
10882               name[2] == 'c' &&
10883               name[3] == 'a' &&
10884               name[4] == 'l' &&
10885               name[5] == 't' &&
10886               name[6] == 'i' &&
10887               name[7] == 'm' &&
10888               name[8] == 'e')
10889           {                                       /* localtime  */
10890             return -KEY_localtime;
10891           }
10892
10893           goto unknown;
10894
10895         case 'p':
10896           if (name[1] == 'r' &&
10897               name[2] == 'o' &&
10898               name[3] == 't' &&
10899               name[4] == 'o' &&
10900               name[5] == 't' &&
10901               name[6] == 'y' &&
10902               name[7] == 'p' &&
10903               name[8] == 'e')
10904           {                                       /* prototype  */
10905             return KEY_prototype;
10906           }
10907
10908           goto unknown;
10909
10910         case 'q':
10911           if (name[1] == 'u' &&
10912               name[2] == 'o' &&
10913               name[3] == 't' &&
10914               name[4] == 'e' &&
10915               name[5] == 'm' &&
10916               name[6] == 'e' &&
10917               name[7] == 't' &&
10918               name[8] == 'a')
10919           {                                       /* quotemeta  */
10920             return -KEY_quotemeta;
10921           }
10922
10923           goto unknown;
10924
10925         case 'r':
10926           if (name[1] == 'e' &&
10927               name[2] == 'w' &&
10928               name[3] == 'i' &&
10929               name[4] == 'n' &&
10930               name[5] == 'd' &&
10931               name[6] == 'd' &&
10932               name[7] == 'i' &&
10933               name[8] == 'r')
10934           {                                       /* rewinddir  */
10935             return -KEY_rewinddir;
10936           }
10937
10938           goto unknown;
10939
10940         case 's':
10941           if (name[1] == 'e' &&
10942               name[2] == 't' &&
10943               name[3] == 'n' &&
10944               name[4] == 'e' &&
10945               name[5] == 't' &&
10946               name[6] == 'e' &&
10947               name[7] == 'n' &&
10948               name[8] == 't')
10949           {                                       /* setnetent  */
10950             return -KEY_setnetent;
10951           }
10952
10953           goto unknown;
10954
10955         case 'w':
10956           if (name[1] == 'a' &&
10957               name[2] == 'n' &&
10958               name[3] == 't' &&
10959               name[4] == 'a' &&
10960               name[5] == 'r' &&
10961               name[6] == 'r' &&
10962               name[7] == 'a' &&
10963               name[8] == 'y')
10964           {                                       /* wantarray  */
10965             return -KEY_wantarray;
10966           }
10967
10968           goto unknown;
10969
10970         default:
10971           goto unknown;
10972       }
10973
10974     case 10: /* 9 tokens of length 10 */
10975       switch (name[0])
10976       {
10977         case 'e':
10978           if (name[1] == 'n' &&
10979               name[2] == 'd')
10980           {
10981             switch (name[3])
10982             {
10983               case 'h':
10984                 if (name[4] == 'o' &&
10985                     name[5] == 's' &&
10986                     name[6] == 't' &&
10987                     name[7] == 'e' &&
10988                     name[8] == 'n' &&
10989                     name[9] == 't')
10990                 {                                 /* endhostent */
10991                   return -KEY_endhostent;
10992                 }
10993
10994                 goto unknown;
10995
10996               case 's':
10997                 if (name[4] == 'e' &&
10998                     name[5] == 'r' &&
10999                     name[6] == 'v' &&
11000                     name[7] == 'e' &&
11001                     name[8] == 'n' &&
11002                     name[9] == 't')
11003                 {                                 /* endservent */
11004                   return -KEY_endservent;
11005                 }
11006
11007                 goto unknown;
11008
11009               default:
11010                 goto unknown;
11011             }
11012           }
11013
11014           goto unknown;
11015
11016         case 'g':
11017           if (name[1] == 'e' &&
11018               name[2] == 't')
11019           {
11020             switch (name[3])
11021             {
11022               case 'h':
11023                 if (name[4] == 'o' &&
11024                     name[5] == 's' &&
11025                     name[6] == 't' &&
11026                     name[7] == 'e' &&
11027                     name[8] == 'n' &&
11028                     name[9] == 't')
11029                 {                                 /* gethostent */
11030                   return -KEY_gethostent;
11031                 }
11032
11033                 goto unknown;
11034
11035               case 's':
11036                 switch (name[4])
11037                 {
11038                   case 'e':
11039                     if (name[5] == 'r' &&
11040                         name[6] == 'v' &&
11041                         name[7] == 'e' &&
11042                         name[8] == 'n' &&
11043                         name[9] == 't')
11044                     {                             /* getservent */
11045                       return -KEY_getservent;
11046                     }
11047
11048                     goto unknown;
11049
11050                   case 'o':
11051                     if (name[5] == 'c' &&
11052                         name[6] == 'k' &&
11053                         name[7] == 'o' &&
11054                         name[8] == 'p' &&
11055                         name[9] == 't')
11056                     {                             /* getsockopt */
11057                       return -KEY_getsockopt;
11058                     }
11059
11060                     goto unknown;
11061
11062                   default:
11063                     goto unknown;
11064                 }
11065
11066               default:
11067                 goto unknown;
11068             }
11069           }
11070
11071           goto unknown;
11072
11073         case 's':
11074           switch (name[1])
11075           {
11076             case 'e':
11077               if (name[2] == 't')
11078               {
11079                 switch (name[3])
11080                 {
11081                   case 'h':
11082                     if (name[4] == 'o' &&
11083                         name[5] == 's' &&
11084                         name[6] == 't' &&
11085                         name[7] == 'e' &&
11086                         name[8] == 'n' &&
11087                         name[9] == 't')
11088                     {                             /* sethostent */
11089                       return -KEY_sethostent;
11090                     }
11091
11092                     goto unknown;
11093
11094                   case 's':
11095                     switch (name[4])
11096                     {
11097                       case 'e':
11098                         if (name[5] == 'r' &&
11099                             name[6] == 'v' &&
11100                             name[7] == 'e' &&
11101                             name[8] == 'n' &&
11102                             name[9] == 't')
11103                         {                         /* setservent */
11104                           return -KEY_setservent;
11105                         }
11106
11107                         goto unknown;
11108
11109                       case 'o':
11110                         if (name[5] == 'c' &&
11111                             name[6] == 'k' &&
11112                             name[7] == 'o' &&
11113                             name[8] == 'p' &&
11114                             name[9] == 't')
11115                         {                         /* setsockopt */
11116                           return -KEY_setsockopt;
11117                         }
11118
11119                         goto unknown;
11120
11121                       default:
11122                         goto unknown;
11123                     }
11124
11125                   default:
11126                     goto unknown;
11127                 }
11128               }
11129
11130               goto unknown;
11131
11132             case 'o':
11133               if (name[2] == 'c' &&
11134                   name[3] == 'k' &&
11135                   name[4] == 'e' &&
11136                   name[5] == 't' &&
11137                   name[6] == 'p' &&
11138                   name[7] == 'a' &&
11139                   name[8] == 'i' &&
11140                   name[9] == 'r')
11141               {                                   /* socketpair */
11142                 return -KEY_socketpair;
11143               }
11144
11145               goto unknown;
11146
11147             default:
11148               goto unknown;
11149           }
11150
11151         default:
11152           goto unknown;
11153       }
11154
11155     case 11: /* 8 tokens of length 11 */
11156       switch (name[0])
11157       {
11158         case '_':
11159           if (name[1] == '_' &&
11160               name[2] == 'P' &&
11161               name[3] == 'A' &&
11162               name[4] == 'C' &&
11163               name[5] == 'K' &&
11164               name[6] == 'A' &&
11165               name[7] == 'G' &&
11166               name[8] == 'E' &&
11167               name[9] == '_' &&
11168               name[10] == '_')
11169           {                                       /* __PACKAGE__ */
11170             return -KEY___PACKAGE__;
11171           }
11172
11173           goto unknown;
11174
11175         case 'e':
11176           if (name[1] == 'n' &&
11177               name[2] == 'd' &&
11178               name[3] == 'p' &&
11179               name[4] == 'r' &&
11180               name[5] == 'o' &&
11181               name[6] == 't' &&
11182               name[7] == 'o' &&
11183               name[8] == 'e' &&
11184               name[9] == 'n' &&
11185               name[10] == 't')
11186           {                                       /* endprotoent */
11187             return -KEY_endprotoent;
11188           }
11189
11190           goto unknown;
11191
11192         case 'g':
11193           if (name[1] == 'e' &&
11194               name[2] == 't')
11195           {
11196             switch (name[3])
11197             {
11198               case 'p':
11199                 switch (name[4])
11200                 {
11201                   case 'e':
11202                     if (name[5] == 'e' &&
11203                         name[6] == 'r' &&
11204                         name[7] == 'n' &&
11205                         name[8] == 'a' &&
11206                         name[9] == 'm' &&
11207                         name[10] == 'e')
11208                     {                             /* getpeername */
11209                       return -KEY_getpeername;
11210                     }
11211
11212                     goto unknown;
11213
11214                   case 'r':
11215                     switch (name[5])
11216                     {
11217                       case 'i':
11218                         if (name[6] == 'o' &&
11219                             name[7] == 'r' &&
11220                             name[8] == 'i' &&
11221                             name[9] == 't' &&
11222                             name[10] == 'y')
11223                         {                         /* getpriority */
11224                           return -KEY_getpriority;
11225                         }
11226
11227                         goto unknown;
11228
11229                       case 'o':
11230                         if (name[6] == 't' &&
11231                             name[7] == 'o' &&
11232                             name[8] == 'e' &&
11233                             name[9] == 'n' &&
11234                             name[10] == 't')
11235                         {                         /* getprotoent */
11236                           return -KEY_getprotoent;
11237                         }
11238
11239                         goto unknown;
11240
11241                       default:
11242                         goto unknown;
11243                     }
11244
11245                   default:
11246                     goto unknown;
11247                 }
11248
11249               case 's':
11250                 if (name[4] == 'o' &&
11251                     name[5] == 'c' &&
11252                     name[6] == 'k' &&
11253                     name[7] == 'n' &&
11254                     name[8] == 'a' &&
11255                     name[9] == 'm' &&
11256                     name[10] == 'e')
11257                 {                                 /* getsockname */
11258                   return -KEY_getsockname;
11259                 }
11260
11261                 goto unknown;
11262
11263               default:
11264                 goto unknown;
11265             }
11266           }
11267
11268           goto unknown;
11269
11270         case 's':
11271           if (name[1] == 'e' &&
11272               name[2] == 't' &&
11273               name[3] == 'p' &&
11274               name[4] == 'r')
11275           {
11276             switch (name[5])
11277             {
11278               case 'i':
11279                 if (name[6] == 'o' &&
11280                     name[7] == 'r' &&
11281                     name[8] == 'i' &&
11282                     name[9] == 't' &&
11283                     name[10] == 'y')
11284                 {                                 /* setpriority */
11285                   return -KEY_setpriority;
11286                 }
11287
11288                 goto unknown;
11289
11290               case 'o':
11291                 if (name[6] == 't' &&
11292                     name[7] == 'o' &&
11293                     name[8] == 'e' &&
11294                     name[9] == 'n' &&
11295                     name[10] == 't')
11296                 {                                 /* setprotoent */
11297                   return -KEY_setprotoent;
11298                 }
11299
11300                 goto unknown;
11301
11302               default:
11303                 goto unknown;
11304             }
11305           }
11306
11307           goto unknown;
11308
11309         default:
11310           goto unknown;
11311       }
11312
11313     case 12: /* 2 tokens of length 12 */
11314       if (name[0] == 'g' &&
11315           name[1] == 'e' &&
11316           name[2] == 't' &&
11317           name[3] == 'n' &&
11318           name[4] == 'e' &&
11319           name[5] == 't' &&
11320           name[6] == 'b' &&
11321           name[7] == 'y')
11322       {
11323         switch (name[8])
11324         {
11325           case 'a':
11326             if (name[9] == 'd' &&
11327                 name[10] == 'd' &&
11328                 name[11] == 'r')
11329             {                                     /* getnetbyaddr */
11330               return -KEY_getnetbyaddr;
11331             }
11332
11333             goto unknown;
11334
11335           case 'n':
11336             if (name[9] == 'a' &&
11337                 name[10] == 'm' &&
11338                 name[11] == 'e')
11339             {                                     /* getnetbyname */
11340               return -KEY_getnetbyname;
11341             }
11342
11343             goto unknown;
11344
11345           default:
11346             goto unknown;
11347         }
11348       }
11349
11350       goto unknown;
11351
11352     case 13: /* 4 tokens of length 13 */
11353       if (name[0] == 'g' &&
11354           name[1] == 'e' &&
11355           name[2] == 't')
11356       {
11357         switch (name[3])
11358         {
11359           case 'h':
11360             if (name[4] == 'o' &&
11361                 name[5] == 's' &&
11362                 name[6] == 't' &&
11363                 name[7] == 'b' &&
11364                 name[8] == 'y')
11365             {
11366               switch (name[9])
11367               {
11368                 case 'a':
11369                   if (name[10] == 'd' &&
11370                       name[11] == 'd' &&
11371                       name[12] == 'r')
11372                   {                               /* gethostbyaddr */
11373                     return -KEY_gethostbyaddr;
11374                   }
11375
11376                   goto unknown;
11377
11378                 case 'n':
11379                   if (name[10] == 'a' &&
11380                       name[11] == 'm' &&
11381                       name[12] == 'e')
11382                   {                               /* gethostbyname */
11383                     return -KEY_gethostbyname;
11384                   }
11385
11386                   goto unknown;
11387
11388                 default:
11389                   goto unknown;
11390               }
11391             }
11392
11393             goto unknown;
11394
11395           case 's':
11396             if (name[4] == 'e' &&
11397                 name[5] == 'r' &&
11398                 name[6] == 'v' &&
11399                 name[7] == 'b' &&
11400                 name[8] == 'y')
11401             {
11402               switch (name[9])
11403               {
11404                 case 'n':
11405                   if (name[10] == 'a' &&
11406                       name[11] == 'm' &&
11407                       name[12] == 'e')
11408                   {                               /* getservbyname */
11409                     return -KEY_getservbyname;
11410                   }
11411
11412                   goto unknown;
11413
11414                 case 'p':
11415                   if (name[10] == 'o' &&
11416                       name[11] == 'r' &&
11417                       name[12] == 't')
11418                   {                               /* getservbyport */
11419                     return -KEY_getservbyport;
11420                   }
11421
11422                   goto unknown;
11423
11424                 default:
11425                   goto unknown;
11426               }
11427             }
11428
11429             goto unknown;
11430
11431           default:
11432             goto unknown;
11433         }
11434       }
11435
11436       goto unknown;
11437
11438     case 14: /* 1 tokens of length 14 */
11439       if (name[0] == 'g' &&
11440           name[1] == 'e' &&
11441           name[2] == 't' &&
11442           name[3] == 'p' &&
11443           name[4] == 'r' &&
11444           name[5] == 'o' &&
11445           name[6] == 't' &&
11446           name[7] == 'o' &&
11447           name[8] == 'b' &&
11448           name[9] == 'y' &&
11449           name[10] == 'n' &&
11450           name[11] == 'a' &&
11451           name[12] == 'm' &&
11452           name[13] == 'e')
11453       {                                           /* getprotobyname */
11454         return -KEY_getprotobyname;
11455       }
11456
11457       goto unknown;
11458
11459     case 16: /* 1 tokens of length 16 */
11460       if (name[0] == 'g' &&
11461           name[1] == 'e' &&
11462           name[2] == 't' &&
11463           name[3] == 'p' &&
11464           name[4] == 'r' &&
11465           name[5] == 'o' &&
11466           name[6] == 't' &&
11467           name[7] == 'o' &&
11468           name[8] == 'b' &&
11469           name[9] == 'y' &&
11470           name[10] == 'n' &&
11471           name[11] == 'u' &&
11472           name[12] == 'm' &&
11473           name[13] == 'b' &&
11474           name[14] == 'e' &&
11475           name[15] == 'r')
11476       {                                           /* getprotobynumber */
11477         return -KEY_getprotobynumber;
11478       }
11479
11480       goto unknown;
11481
11482     default:
11483       goto unknown;
11484   }
11485
11486 unknown:
11487   return 0;
11488 }
11489
11490 STATIC void
11491 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11492 {
11493     dVAR;
11494
11495     PERL_ARGS_ASSERT_CHECKCOMMA;
11496
11497     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11498         if (ckWARN(WARN_SYNTAX)) {
11499             int level = 1;
11500             const char *w;
11501             for (w = s+2; *w && level; w++) {
11502                 if (*w == '(')
11503                     ++level;
11504                 else if (*w == ')')
11505                     --level;
11506             }
11507             while (isSPACE(*w))
11508                 ++w;
11509             /* the list of chars below is for end of statements or
11510              * block / parens, boolean operators (&&, ||, //) and branch
11511              * constructs (or, and, if, until, unless, while, err, for).
11512              * Not a very solid hack... */
11513             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11514                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11515                             "%s (...) interpreted as function",name);
11516         }
11517     }
11518     while (s < PL_bufend && isSPACE(*s))
11519         s++;
11520     if (*s == '(')
11521         s++;
11522     while (s < PL_bufend && isSPACE(*s))
11523         s++;
11524     if (isIDFIRST_lazy_if(s,UTF)) {
11525         const char * const w = s++;
11526         while (isALNUM_lazy_if(s,UTF))
11527             s++;
11528         while (s < PL_bufend && isSPACE(*s))
11529             s++;
11530         if (*s == ',') {
11531             GV* gv;
11532             if (keyword(w, s - w, 0))
11533                 return;
11534
11535             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11536             if (gv && GvCVu(gv))
11537                 return;
11538             Perl_croak(aTHX_ "No comma allowed after %s", what);
11539         }
11540     }
11541 }
11542
11543 /* Either returns sv, or mortalizes sv and returns a new SV*.
11544    Best used as sv=new_constant(..., sv, ...).
11545    If s, pv are NULL, calls subroutine with one argument,
11546    and type is used with error messages only. */
11547
11548 STATIC SV *
11549 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11550                SV *sv, SV *pv, const char *type, STRLEN typelen)
11551 {
11552     dVAR; dSP;
11553     HV * const table = GvHV(PL_hintgv);          /* ^H */
11554     SV *res;
11555     SV **cvp;
11556     SV *cv, *typesv;
11557     const char *why1 = "", *why2 = "", *why3 = "";
11558
11559     PERL_ARGS_ASSERT_NEW_CONSTANT;
11560
11561     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11562         SV *msg;
11563
11564         why2 = (const char *)
11565             (strEQ(key,"charnames")
11566              ? "(possibly a missing \"use charnames ...\")"
11567              : "");
11568         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11569                             (type ? type: "undef"), why2);
11570
11571         /* This is convoluted and evil ("goto considered harmful")
11572          * but I do not understand the intricacies of all the different
11573          * failure modes of %^H in here.  The goal here is to make
11574          * the most probable error message user-friendly. --jhi */
11575
11576         goto msgdone;
11577
11578     report:
11579         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11580                             (type ? type: "undef"), why1, why2, why3);
11581     msgdone:
11582         yyerror(SvPVX_const(msg));
11583         SvREFCNT_dec(msg);
11584         return sv;
11585     }
11586
11587     /* charnames doesn't work well if there have been errors found */
11588     if (PL_error_count > 0 && strEQ(key,"charnames"))
11589         return &PL_sv_undef;
11590
11591     cvp = hv_fetch(table, key, keylen, FALSE);
11592     if (!cvp || !SvOK(*cvp)) {
11593         why1 = "$^H{";
11594         why2 = key;
11595         why3 = "} is not defined";
11596         goto report;
11597     }
11598     sv_2mortal(sv);                     /* Parent created it permanently */
11599     cv = *cvp;
11600     if (!pv && s)
11601         pv = newSVpvn_flags(s, len, SVs_TEMP);
11602     if (type && pv)
11603         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11604     else
11605         typesv = &PL_sv_undef;
11606
11607     PUSHSTACKi(PERLSI_OVERLOAD);
11608     ENTER ;
11609     SAVETMPS;
11610
11611     PUSHMARK(SP) ;
11612     EXTEND(sp, 3);
11613     if (pv)
11614         PUSHs(pv);
11615     PUSHs(sv);
11616     if (pv)
11617         PUSHs(typesv);
11618     PUTBACK;
11619     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11620
11621     SPAGAIN ;
11622
11623     /* Check the eval first */
11624     if (!PL_in_eval && SvTRUE(ERRSV)) {
11625         sv_catpvs(ERRSV, "Propagated");
11626         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11627         (void)POPs;
11628         res = SvREFCNT_inc_simple(sv);
11629     }
11630     else {
11631         res = POPs;
11632         SvREFCNT_inc_simple_void(res);
11633     }
11634
11635     PUTBACK ;
11636     FREETMPS ;
11637     LEAVE ;
11638     POPSTACK;
11639
11640     if (!SvOK(res)) {
11641         why1 = "Call to &{$^H{";
11642         why2 = key;
11643         why3 = "}} did not return a defined value";
11644         sv = res;
11645         goto report;
11646     }
11647
11648     return res;
11649 }
11650
11651 /* Returns a NUL terminated string, with the length of the string written to
11652    *slp
11653    */
11654 STATIC char *
11655 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11656 {
11657     dVAR;
11658     register char *d = dest;
11659     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11660
11661     PERL_ARGS_ASSERT_SCAN_WORD;
11662
11663     for (;;) {
11664         if (d >= e)
11665             Perl_croak(aTHX_ ident_too_long);
11666         if (isALNUM(*s))        /* UTF handled below */
11667             *d++ = *s++;
11668         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11669             *d++ = ':';
11670             *d++ = ':';
11671             s++;
11672         }
11673         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11674             *d++ = *s++;
11675             *d++ = *s++;
11676         }
11677         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11678             char *t = s + UTF8SKIP(s);
11679             size_t len;
11680             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11681                 t += UTF8SKIP(t);
11682             len = t - s;
11683             if (d + len > e)
11684                 Perl_croak(aTHX_ ident_too_long);
11685             Copy(s, d, len, char);
11686             d += len;
11687             s = t;
11688         }
11689         else {
11690             *d = '\0';
11691             *slp = d - dest;
11692             return s;
11693         }
11694     }
11695 }
11696
11697 STATIC char *
11698 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11699 {
11700     dVAR;
11701     char *bracket = NULL;
11702     char funny = *s++;
11703     register char *d = dest;
11704     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
11705
11706     PERL_ARGS_ASSERT_SCAN_IDENT;
11707
11708     if (isSPACE(*s))
11709         s = PEEKSPACE(s);
11710     if (isDIGIT(*s)) {
11711         while (isDIGIT(*s)) {
11712             if (d >= e)
11713                 Perl_croak(aTHX_ ident_too_long);
11714             *d++ = *s++;
11715         }
11716     }
11717     else {
11718         for (;;) {
11719             if (d >= e)
11720                 Perl_croak(aTHX_ ident_too_long);
11721             if (isALNUM(*s))    /* UTF handled below */
11722                 *d++ = *s++;
11723             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11724                 *d++ = ':';
11725                 *d++ = ':';
11726                 s++;
11727             }
11728             else if (*s == ':' && s[1] == ':') {
11729                 *d++ = *s++;
11730                 *d++ = *s++;
11731             }
11732             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11733                 char *t = s + UTF8SKIP(s);
11734                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11735                     t += UTF8SKIP(t);
11736                 if (d + (t - s) > e)
11737                     Perl_croak(aTHX_ ident_too_long);
11738                 Copy(s, d, t - s, char);
11739                 d += t - s;
11740                 s = t;
11741             }
11742             else
11743                 break;
11744         }
11745     }
11746     *d = '\0';
11747     d = dest;
11748     if (*d) {
11749         if (PL_lex_state != LEX_NORMAL)
11750             PL_lex_state = LEX_INTERPENDMAYBE;
11751         return s;
11752     }
11753     if (*s == '$' && s[1] &&
11754         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11755     {
11756         return s;
11757     }
11758     if (*s == '{') {
11759         bracket = s;
11760         s++;
11761     }
11762     else if (ck_uni)
11763         check_uni();
11764     if (s < send)
11765         *d = *s++;
11766     d[1] = '\0';
11767     if (*d == '^' && *s && isCONTROLVAR(*s)) {
11768         *d = toCTRL(*s);
11769         s++;
11770     }
11771     if (bracket) {
11772         if (isSPACE(s[-1])) {
11773             while (s < send) {
11774                 const char ch = *s++;
11775                 if (!SPACE_OR_TAB(ch)) {
11776                     *d = ch;
11777                     break;
11778                 }
11779             }
11780         }
11781         if (isIDFIRST_lazy_if(d,UTF)) {
11782             d++;
11783             if (UTF) {
11784                 char *end = s;
11785                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11786                     end += UTF8SKIP(end);
11787                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11788                         end += UTF8SKIP(end);
11789                 }
11790                 Copy(s, d, end - s, char);
11791                 d += end - s;
11792                 s = end;
11793             }
11794             else {
11795                 while ((isALNUM(*s) || *s == ':') && d < e)
11796                     *d++ = *s++;
11797                 if (d >= e)
11798                     Perl_croak(aTHX_ ident_too_long);
11799             }
11800             *d = '\0';
11801             while (s < send && SPACE_OR_TAB(*s))
11802                 s++;
11803             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11804                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11805                     const char * const brack =
11806                         (const char *)
11807                         ((*s == '[') ? "[...]" : "{...}");
11808                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11809                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11810                         funny, dest, brack, funny, dest, brack);
11811                 }
11812                 bracket++;
11813                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11814                 return s;
11815             }
11816         }
11817         /* Handle extended ${^Foo} variables
11818          * 1999-02-27 mjd-perl-patch@plover.com */
11819         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11820                  && isALNUM(*s))
11821         {
11822             d++;
11823             while (isALNUM(*s) && d < e) {
11824                 *d++ = *s++;
11825             }
11826             if (d >= e)
11827                 Perl_croak(aTHX_ ident_too_long);
11828             *d = '\0';
11829         }
11830         if (*s == '}') {
11831             s++;
11832             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11833                 PL_lex_state = LEX_INTERPEND;
11834                 PL_expect = XREF;
11835             }
11836             if (PL_lex_state == LEX_NORMAL) {
11837                 if (ckWARN(WARN_AMBIGUOUS) &&
11838                     (keyword(dest, d - dest, 0)
11839                      || get_cvn_flags(dest, d - dest, 0)))
11840                 {
11841                     if (funny == '#')
11842                         funny = '@';
11843                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11844                         "Ambiguous use of %c{%s} resolved to %c%s",
11845                         funny, dest, funny, dest);
11846                 }
11847             }
11848         }
11849         else {
11850             s = bracket;                /* let the parser handle it */
11851             *dest = '\0';
11852         }
11853     }
11854     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11855         PL_lex_state = LEX_INTERPEND;
11856     return s;
11857 }
11858
11859 static U32
11860 S_pmflag(U32 pmfl, const char ch) {
11861     switch (ch) {
11862         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11863     case GLOBAL_PAT_MOD:      pmfl |= PMf_GLOBAL; break;
11864     case CONTINUE_PAT_MOD:    pmfl |= PMf_CONTINUE; break;
11865     case ONCE_PAT_MOD:        pmfl |= PMf_KEEP; break;
11866     case KEEPCOPY_PAT_MOD:    pmfl |= PMf_KEEPCOPY; break;
11867     case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
11868     }
11869     return pmfl;
11870 }
11871
11872 STATIC char *
11873 S_scan_pat(pTHX_ char *start, I32 type)
11874 {
11875     dVAR;
11876     PMOP *pm;
11877     char *s = scan_str(start,!!PL_madskills,FALSE);
11878     const char * const valid_flags =
11879         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11880 #ifdef PERL_MAD
11881     char *modstart;
11882 #endif
11883
11884     PERL_ARGS_ASSERT_SCAN_PAT;
11885
11886     if (!s) {
11887         const char * const delimiter = skipspace(start);
11888         Perl_croak(aTHX_
11889                    (const char *)
11890                    (*delimiter == '?'
11891                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11892                     : "Search pattern not terminated" ));
11893     }
11894
11895     pm = (PMOP*)newPMOP(type, 0);
11896     if (PL_multi_open == '?') {
11897         /* This is the only point in the code that sets PMf_ONCE:  */
11898         pm->op_pmflags |= PMf_ONCE;
11899
11900         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11901            allows us to restrict the list needed by reset to just the ??
11902            matches.  */
11903         assert(type != OP_TRANS);
11904         if (PL_curstash) {
11905             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11906             U32 elements;
11907             if (!mg) {
11908                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11909                                  0);
11910             }
11911             elements = mg->mg_len / sizeof(PMOP**);
11912             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11913             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11914             mg->mg_len = elements * sizeof(PMOP**);
11915             PmopSTASH_set(pm,PL_curstash);
11916         }
11917     }
11918 #ifdef PERL_MAD
11919     modstart = s;
11920 #endif
11921     while (*s && strchr(valid_flags, *s))
11922         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11923
11924     if (isALNUM(*s)) {
11925         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11926             "Having no space between pattern and following word is deprecated");
11927
11928     }
11929 #ifdef PERL_MAD
11930     if (PL_madskills && modstart != s) {
11931         SV* tmptoken = newSVpvn(modstart, s - modstart);
11932         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11933     }
11934 #endif
11935     /* issue a warning if /c is specified,but /g is not */
11936     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11937     {
11938         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11939                        "Use of /c modifier is meaningless without /g" );
11940     }
11941
11942     PL_lex_op = (OP*)pm;
11943     pl_yylval.ival = OP_MATCH;
11944     return s;
11945 }
11946
11947 STATIC char *
11948 S_scan_subst(pTHX_ char *start)
11949 {
11950     dVAR;
11951     register char *s;
11952     register PMOP *pm;
11953     I32 first_start;
11954     I32 es = 0;
11955 #ifdef PERL_MAD
11956     char *modstart;
11957 #endif
11958
11959     PERL_ARGS_ASSERT_SCAN_SUBST;
11960
11961     pl_yylval.ival = OP_NULL;
11962
11963     s = scan_str(start,!!PL_madskills,FALSE);
11964
11965     if (!s)
11966         Perl_croak(aTHX_ "Substitution pattern not terminated");
11967
11968     if (s[-1] == PL_multi_open)
11969         s--;
11970 #ifdef PERL_MAD
11971     if (PL_madskills) {
11972         CURMAD('q', PL_thisopen);
11973         CURMAD('_', PL_thiswhite);
11974         CURMAD('E', PL_thisstuff);
11975         CURMAD('Q', PL_thisclose);
11976         PL_realtokenstart = s - SvPVX(PL_linestr);
11977     }
11978 #endif
11979
11980     first_start = PL_multi_start;
11981     s = scan_str(s,!!PL_madskills,FALSE);
11982     if (!s) {
11983         if (PL_lex_stuff) {
11984             SvREFCNT_dec(PL_lex_stuff);
11985             PL_lex_stuff = NULL;
11986         }
11987         Perl_croak(aTHX_ "Substitution replacement not terminated");
11988     }
11989     PL_multi_start = first_start;       /* so whole substitution is taken together */
11990
11991     pm = (PMOP*)newPMOP(OP_SUBST, 0);
11992
11993 #ifdef PERL_MAD
11994     if (PL_madskills) {
11995         CURMAD('z', PL_thisopen);
11996         CURMAD('R', PL_thisstuff);
11997         CURMAD('Z', PL_thisclose);
11998     }
11999     modstart = s;
12000 #endif
12001
12002     while (*s) {
12003         if (*s == EXEC_PAT_MOD) {
12004             s++;
12005             es++;
12006         }
12007         else if (strchr(S_PAT_MODS, *s))
12008             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
12009         else {
12010             if (isALNUM(*s)) {
12011                 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12012                     "Having no space between pattern and following word is deprecated");
12013
12014             }
12015             break;
12016         }
12017     }
12018
12019 #ifdef PERL_MAD
12020     if (PL_madskills) {
12021         if (modstart != s)
12022             curmad('m', newSVpvn(modstart, s - modstart));
12023         append_madprops(PL_thismad, (OP*)pm, 0);
12024         PL_thismad = 0;
12025     }
12026 #endif
12027     if ((pm->op_pmflags & PMf_CONTINUE)) {
12028         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
12029     }
12030
12031     if (es) {
12032         SV * const repl = newSVpvs("");
12033
12034         PL_sublex_info.super_bufptr = s;
12035         PL_sublex_info.super_bufend = PL_bufend;
12036         PL_multi_end = 0;
12037         pm->op_pmflags |= PMf_EVAL;
12038         while (es-- > 0) {
12039             if (es)
12040                 sv_catpvs(repl, "eval ");
12041             else
12042                 sv_catpvs(repl, "do ");
12043         }
12044         sv_catpvs(repl, "{");
12045         sv_catsv(repl, PL_lex_repl);
12046         if (strchr(SvPVX(PL_lex_repl), '#'))
12047             sv_catpvs(repl, "\n");
12048         sv_catpvs(repl, "}");
12049         SvEVALED_on(repl);
12050         SvREFCNT_dec(PL_lex_repl);
12051         PL_lex_repl = repl;
12052     }
12053
12054     PL_lex_op = (OP*)pm;
12055     pl_yylval.ival = OP_SUBST;
12056     return s;
12057 }
12058
12059 STATIC char *
12060 S_scan_trans(pTHX_ char *start)
12061 {
12062     dVAR;
12063     register char* s;
12064     OP *o;
12065     short *tbl;
12066     U8 squash;
12067     U8 del;
12068     U8 complement;
12069 #ifdef PERL_MAD
12070     char *modstart;
12071 #endif
12072
12073     PERL_ARGS_ASSERT_SCAN_TRANS;
12074
12075     pl_yylval.ival = OP_NULL;
12076
12077     s = scan_str(start,!!PL_madskills,FALSE);
12078     if (!s)
12079         Perl_croak(aTHX_ "Transliteration pattern not terminated");
12080
12081     if (s[-1] == PL_multi_open)
12082         s--;
12083 #ifdef PERL_MAD
12084     if (PL_madskills) {
12085         CURMAD('q', PL_thisopen);
12086         CURMAD('_', PL_thiswhite);
12087         CURMAD('E', PL_thisstuff);
12088         CURMAD('Q', PL_thisclose);
12089         PL_realtokenstart = s - SvPVX(PL_linestr);
12090     }
12091 #endif
12092
12093     s = scan_str(s,!!PL_madskills,FALSE);
12094     if (!s) {
12095         if (PL_lex_stuff) {
12096             SvREFCNT_dec(PL_lex_stuff);
12097             PL_lex_stuff = NULL;
12098         }
12099         Perl_croak(aTHX_ "Transliteration replacement not terminated");
12100     }
12101     if (PL_madskills) {
12102         CURMAD('z', PL_thisopen);
12103         CURMAD('R', PL_thisstuff);
12104         CURMAD('Z', PL_thisclose);
12105     }
12106
12107     complement = del = squash = 0;
12108 #ifdef PERL_MAD
12109     modstart = s;
12110 #endif
12111     while (1) {
12112         switch (*s) {
12113         case 'c':
12114             complement = OPpTRANS_COMPLEMENT;
12115             break;
12116         case 'd':
12117             del = OPpTRANS_DELETE;
12118             break;
12119         case 's':
12120             squash = OPpTRANS_SQUASH;
12121             break;
12122         default:
12123             goto no_more;
12124         }
12125         s++;
12126     }
12127   no_more:
12128
12129     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
12130     o = newPVOP(OP_TRANS, 0, (char*)tbl);
12131     o->op_private &= ~OPpTRANS_ALL;
12132     o->op_private |= del|squash|complement|
12133       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12134       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
12135
12136     PL_lex_op = o;
12137     pl_yylval.ival = OP_TRANS;
12138
12139 #ifdef PERL_MAD
12140     if (PL_madskills) {
12141         if (modstart != s)
12142             curmad('m', newSVpvn(modstart, s - modstart));
12143         append_madprops(PL_thismad, o, 0);
12144         PL_thismad = 0;
12145     }
12146 #endif
12147
12148     return s;
12149 }
12150
12151 STATIC char *
12152 S_scan_heredoc(pTHX_ register char *s)
12153 {
12154     dVAR;
12155     SV *herewas;
12156     I32 op_type = OP_SCALAR;
12157     I32 len;
12158     SV *tmpstr;
12159     char term;
12160     const char *found_newline;
12161     register char *d;
12162     register char *e;
12163     char *peek;
12164     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
12165 #ifdef PERL_MAD
12166     I32 stuffstart = s - SvPVX(PL_linestr);
12167     char *tstart;
12168
12169     PL_realtokenstart = -1;
12170 #endif
12171
12172     PERL_ARGS_ASSERT_SCAN_HEREDOC;
12173
12174     s += 2;
12175     d = PL_tokenbuf;
12176     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
12177     if (!outer)
12178         *d++ = '\n';
12179     peek = s;
12180     while (SPACE_OR_TAB(*peek))
12181         peek++;
12182     if (*peek == '`' || *peek == '\'' || *peek =='"') {
12183         s = peek;
12184         term = *s++;
12185         s = delimcpy(d, e, s, PL_bufend, term, &len);
12186         d += len;
12187         if (s < PL_bufend)
12188             s++;
12189     }
12190     else {
12191         if (*s == '\\')
12192             s++, term = '\'';
12193         else
12194             term = '"';
12195         if (!isALNUM_lazy_if(s,UTF))
12196             deprecate("bare << to mean <<\"\"");
12197         for (; isALNUM_lazy_if(s,UTF); s++) {
12198             if (d < e)
12199                 *d++ = *s;
12200         }
12201     }
12202     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
12203         Perl_croak(aTHX_ "Delimiter for here document is too long");
12204     *d++ = '\n';
12205     *d = '\0';
12206     len = d - PL_tokenbuf;
12207
12208 #ifdef PERL_MAD
12209     if (PL_madskills) {
12210         tstart = PL_tokenbuf + !outer;
12211         PL_thisclose = newSVpvn(tstart, len - !outer);
12212         tstart = SvPVX(PL_linestr) + stuffstart;
12213         PL_thisopen = newSVpvn(tstart, s - tstart);
12214         stuffstart = s - SvPVX(PL_linestr);
12215     }
12216 #endif
12217 #ifndef PERL_STRICT_CR
12218     d = strchr(s, '\r');
12219     if (d) {
12220         char * const olds = s;
12221         s = d;
12222         while (s < PL_bufend) {
12223             if (*s == '\r') {
12224                 *d++ = '\n';
12225                 if (*++s == '\n')
12226                     s++;
12227             }
12228             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
12229                 *d++ = *s++;
12230                 s++;
12231             }
12232             else
12233                 *d++ = *s++;
12234         }
12235         *d = '\0';
12236         PL_bufend = d;
12237         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12238         s = olds;
12239     }
12240 #endif
12241 #ifdef PERL_MAD
12242     found_newline = 0;
12243 #endif
12244     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
12245         herewas = newSVpvn(s,PL_bufend-s);
12246     }
12247     else {
12248 #ifdef PERL_MAD
12249         herewas = newSVpvn(s-1,found_newline-s+1);
12250 #else
12251         s--;
12252         herewas = newSVpvn(s,found_newline-s);
12253 #endif
12254     }
12255 #ifdef PERL_MAD
12256     if (PL_madskills) {
12257         tstart = SvPVX(PL_linestr) + stuffstart;
12258         if (PL_thisstuff)
12259             sv_catpvn(PL_thisstuff, tstart, s - tstart);
12260         else
12261             PL_thisstuff = newSVpvn(tstart, s - tstart);
12262     }
12263 #endif
12264     s += SvCUR(herewas);
12265
12266 #ifdef PERL_MAD
12267     stuffstart = s - SvPVX(PL_linestr);
12268
12269     if (found_newline)
12270         s--;
12271 #endif
12272
12273     tmpstr = newSV_type(SVt_PVIV);
12274     SvGROW(tmpstr, 80);
12275     if (term == '\'') {
12276         op_type = OP_CONST;
12277         SvIV_set(tmpstr, -1);
12278     }
12279     else if (term == '`') {
12280         op_type = OP_BACKTICK;
12281         SvIV_set(tmpstr, '\\');
12282     }
12283
12284     CLINE;
12285     PL_multi_start = CopLINE(PL_curcop);
12286     PL_multi_open = PL_multi_close = '<';
12287     term = *PL_tokenbuf;
12288     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
12289         char * const bufptr = PL_sublex_info.super_bufptr;
12290         char * const bufend = PL_sublex_info.super_bufend;
12291         char * const olds = s - SvCUR(herewas);
12292         s = strchr(bufptr, '\n');
12293         if (!s)
12294             s = bufend;
12295         d = s;
12296         while (s < bufend &&
12297           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12298             if (*s++ == '\n')
12299                 CopLINE_inc(PL_curcop);
12300         }
12301         if (s >= bufend) {
12302             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12303             missingterm(PL_tokenbuf);
12304         }
12305         sv_setpvn(herewas,bufptr,d-bufptr+1);
12306         sv_setpvn(tmpstr,d+1,s-d);
12307         s += len - 1;
12308         sv_catpvn(herewas,s,bufend-s);
12309         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
12310
12311         s = olds;
12312         goto retval;
12313     }
12314     else if (!outer) {
12315         d = s;
12316         while (s < PL_bufend &&
12317           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12318             if (*s++ == '\n')
12319                 CopLINE_inc(PL_curcop);
12320         }
12321         if (s >= PL_bufend) {
12322             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12323             missingterm(PL_tokenbuf);
12324         }
12325         sv_setpvn(tmpstr,d+1,s-d);
12326 #ifdef PERL_MAD
12327         if (PL_madskills) {
12328             if (PL_thisstuff)
12329                 sv_catpvn(PL_thisstuff, d + 1, s - d);
12330             else
12331                 PL_thisstuff = newSVpvn(d + 1, s - d);
12332             stuffstart = s - SvPVX(PL_linestr);
12333         }
12334 #endif
12335         s += len - 1;
12336         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12337
12338         sv_catpvn(herewas,s,PL_bufend-s);
12339         sv_setsv(PL_linestr,herewas);
12340         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12341         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12342         PL_last_lop = PL_last_uni = NULL;
12343     }
12344     else
12345         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12346     while (s >= PL_bufend) {    /* multiple line string? */
12347 #ifdef PERL_MAD
12348         if (PL_madskills) {
12349             tstart = SvPVX(PL_linestr) + stuffstart;
12350             if (PL_thisstuff)
12351                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12352             else
12353                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12354         }
12355 #endif
12356         PL_bufptr = s;
12357         CopLINE_inc(PL_curcop);
12358         if (!outer || !lex_next_chunk(0)) {
12359             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12360             missingterm(PL_tokenbuf);
12361         }
12362         CopLINE_dec(PL_curcop);
12363         s = PL_bufptr;
12364 #ifdef PERL_MAD
12365         stuffstart = s - SvPVX(PL_linestr);
12366 #endif
12367         CopLINE_inc(PL_curcop);
12368         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12369         PL_last_lop = PL_last_uni = NULL;
12370 #ifndef PERL_STRICT_CR
12371         if (PL_bufend - PL_linestart >= 2) {
12372             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12373                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12374             {
12375                 PL_bufend[-2] = '\n';
12376                 PL_bufend--;
12377                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12378             }
12379             else if (PL_bufend[-1] == '\r')
12380                 PL_bufend[-1] = '\n';
12381         }
12382         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12383             PL_bufend[-1] = '\n';
12384 #endif
12385         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12386             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12387             *(SvPVX(PL_linestr) + off ) = ' ';
12388             sv_catsv(PL_linestr,herewas);
12389             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12390             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12391         }
12392         else {
12393             s = PL_bufend;
12394             sv_catsv(tmpstr,PL_linestr);
12395         }
12396     }
12397     s++;
12398 retval:
12399     PL_multi_end = CopLINE(PL_curcop);
12400     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12401         SvPV_shrink_to_cur(tmpstr);
12402     }
12403     SvREFCNT_dec(herewas);
12404     if (!IN_BYTES) {
12405         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12406             SvUTF8_on(tmpstr);
12407         else if (PL_encoding)
12408             sv_recode_to_utf8(tmpstr, PL_encoding);
12409     }
12410     PL_lex_stuff = tmpstr;
12411     pl_yylval.ival = op_type;
12412     return s;
12413 }
12414
12415 /* scan_inputsymbol
12416    takes: current position in input buffer
12417    returns: new position in input buffer
12418    side-effects: pl_yylval and lex_op are set.
12419
12420    This code handles:
12421
12422    <>           read from ARGV
12423    <FH>         read from filehandle
12424    <pkg::FH>    read from package qualified filehandle
12425    <pkg'FH>     read from package qualified filehandle
12426    <$fh>        read from filehandle in $fh
12427    <*.h>        filename glob
12428
12429 */
12430
12431 STATIC char *
12432 S_scan_inputsymbol(pTHX_ char *start)
12433 {
12434     dVAR;
12435     register char *s = start;           /* current position in buffer */
12436     char *end;
12437     I32 len;
12438     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12439     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12440
12441     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12442
12443     end = strchr(s, '\n');
12444     if (!end)
12445         end = PL_bufend;
12446     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12447
12448     /* die if we didn't have space for the contents of the <>,
12449        or if it didn't end, or if we see a newline
12450     */
12451
12452     if (len >= (I32)sizeof PL_tokenbuf)
12453         Perl_croak(aTHX_ "Excessively long <> operator");
12454     if (s >= end)
12455         Perl_croak(aTHX_ "Unterminated <> operator");
12456
12457     s++;
12458
12459     /* check for <$fh>
12460        Remember, only scalar variables are interpreted as filehandles by
12461        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12462        treated as a glob() call.
12463        This code makes use of the fact that except for the $ at the front,
12464        a scalar variable and a filehandle look the same.
12465     */
12466     if (*d == '$' && d[1]) d++;
12467
12468     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12469     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12470         d++;
12471
12472     /* If we've tried to read what we allow filehandles to look like, and
12473        there's still text left, then it must be a glob() and not a getline.
12474        Use scan_str to pull out the stuff between the <> and treat it
12475        as nothing more than a string.
12476     */
12477
12478     if (d - PL_tokenbuf != len) {
12479         pl_yylval.ival = OP_GLOB;
12480         s = scan_str(start,!!PL_madskills,FALSE);
12481         if (!s)
12482            Perl_croak(aTHX_ "Glob not terminated");
12483         return s;
12484     }
12485     else {
12486         bool readline_overriden = FALSE;
12487         GV *gv_readline;
12488         GV **gvp;
12489         /* we're in a filehandle read situation */
12490         d = PL_tokenbuf;
12491
12492         /* turn <> into <ARGV> */
12493         if (!len)
12494             Copy("ARGV",d,5,char);
12495
12496         /* Check whether readline() is overriden */
12497         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12498         if ((gv_readline
12499                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12500                 ||
12501                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12502                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12503                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12504             readline_overriden = TRUE;
12505
12506         /* if <$fh>, create the ops to turn the variable into a
12507            filehandle
12508         */
12509         if (*d == '$') {
12510             /* try to find it in the pad for this block, otherwise find
12511                add symbol table ops
12512             */
12513             const PADOFFSET tmp = pad_findmy(d, len, 0);
12514             if (tmp != NOT_IN_PAD) {
12515                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12516                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12517                     HEK * const stashname = HvNAME_HEK(stash);
12518                     SV * const sym = sv_2mortal(newSVhek(stashname));
12519                     sv_catpvs(sym, "::");
12520                     sv_catpv(sym, d+1);
12521                     d = SvPVX(sym);
12522                     goto intro_sym;
12523                 }
12524                 else {
12525                     OP * const o = newOP(OP_PADSV, 0);
12526                     o->op_targ = tmp;
12527                     PL_lex_op = readline_overriden
12528                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12529                                 append_elem(OP_LIST, o,
12530                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12531                         : (OP*)newUNOP(OP_READLINE, 0, o);
12532                 }
12533             }
12534             else {
12535                 GV *gv;
12536                 ++d;
12537 intro_sym:
12538                 gv = gv_fetchpv(d,
12539                                 (PL_in_eval
12540                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12541                                  : GV_ADDMULTI),
12542                                 SVt_PV);
12543                 PL_lex_op = readline_overriden
12544                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12545                             append_elem(OP_LIST,
12546                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12547                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12548                     : (OP*)newUNOP(OP_READLINE, 0,
12549                             newUNOP(OP_RV2SV, 0,
12550                                 newGVOP(OP_GV, 0, gv)));
12551             }
12552             if (!readline_overriden)
12553                 PL_lex_op->op_flags |= OPf_SPECIAL;
12554             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12555             pl_yylval.ival = OP_NULL;
12556         }
12557
12558         /* If it's none of the above, it must be a literal filehandle
12559            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12560         else {
12561             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12562             PL_lex_op = readline_overriden
12563                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12564                         append_elem(OP_LIST,
12565                             newGVOP(OP_GV, 0, gv),
12566                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12567                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12568             pl_yylval.ival = OP_NULL;
12569         }
12570     }
12571
12572     return s;
12573 }
12574
12575
12576 /* scan_str
12577    takes: start position in buffer
12578           keep_quoted preserve \ on the embedded delimiter(s)
12579           keep_delims preserve the delimiters around the string
12580    returns: position to continue reading from buffer
12581    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12582         updates the read buffer.
12583
12584    This subroutine pulls a string out of the input.  It is called for:
12585         q               single quotes           q(literal text)
12586         '               single quotes           'literal text'
12587         qq              double quotes           qq(interpolate $here please)
12588         "               double quotes           "interpolate $here please"
12589         qx              backticks               qx(/bin/ls -l)
12590         `               backticks               `/bin/ls -l`
12591         qw              quote words             @EXPORT_OK = qw( func() $spam )
12592         m//             regexp match            m/this/
12593         s///            regexp substitute       s/this/that/
12594         tr///           string transliterate    tr/this/that/
12595         y///            string transliterate    y/this/that/
12596         ($*@)           sub prototypes          sub foo ($)
12597         (stuff)         sub attr parameters     sub foo : attr(stuff)
12598         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12599
12600    In most of these cases (all but <>, patterns and transliterate)
12601    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12602    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12603    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12604    calls scan_str().
12605
12606    It skips whitespace before the string starts, and treats the first
12607    character as the delimiter.  If the delimiter is one of ([{< then
12608    the corresponding "close" character )]}> is used as the closing
12609    delimiter.  It allows quoting of delimiters, and if the string has
12610    balanced delimiters ([{<>}]) it allows nesting.
12611
12612    On success, the SV with the resulting string is put into lex_stuff or,
12613    if that is already non-NULL, into lex_repl. The second case occurs only
12614    when parsing the RHS of the special constructs s/// and tr/// (y///).
12615    For convenience, the terminating delimiter character is stuffed into
12616    SvIVX of the SV.
12617 */
12618
12619 STATIC char *
12620 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12621 {
12622     dVAR;
12623     SV *sv;                             /* scalar value: string */
12624     const char *tmps;                   /* temp string, used for delimiter matching */
12625     register char *s = start;           /* current position in the buffer */
12626     register char term;                 /* terminating character */
12627     register char *to;                  /* current position in the sv's data */
12628     I32 brackets = 1;                   /* bracket nesting level */
12629     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12630     I32 termcode;                       /* terminating char. code */
12631     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12632     STRLEN termlen;                     /* length of terminating string */
12633     int last_off = 0;                   /* last position for nesting bracket */
12634 #ifdef PERL_MAD
12635     int stuffstart;
12636     char *tstart;
12637 #endif
12638
12639     PERL_ARGS_ASSERT_SCAN_STR;
12640
12641     /* skip space before the delimiter */
12642     if (isSPACE(*s)) {
12643         s = PEEKSPACE(s);
12644     }
12645
12646 #ifdef PERL_MAD
12647     if (PL_realtokenstart >= 0) {
12648         stuffstart = PL_realtokenstart;
12649         PL_realtokenstart = -1;
12650     }
12651     else
12652         stuffstart = start - SvPVX(PL_linestr);
12653 #endif
12654     /* mark where we are, in case we need to report errors */
12655     CLINE;
12656
12657     /* after skipping whitespace, the next character is the terminator */
12658     term = *s;
12659     if (!UTF) {
12660         termcode = termstr[0] = term;
12661         termlen = 1;
12662     }
12663     else {
12664         termcode = utf8_to_uvchr((U8*)s, &termlen);
12665         Copy(s, termstr, termlen, U8);
12666         if (!UTF8_IS_INVARIANT(term))
12667             has_utf8 = TRUE;
12668     }
12669
12670     /* mark where we are */
12671     PL_multi_start = CopLINE(PL_curcop);
12672     PL_multi_open = term;
12673
12674     /* find corresponding closing delimiter */
12675     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12676         termcode = termstr[0] = term = tmps[5];
12677
12678     PL_multi_close = term;
12679
12680     /* create a new SV to hold the contents.  79 is the SV's initial length.
12681        What a random number. */
12682     sv = newSV_type(SVt_PVIV);
12683     SvGROW(sv, 80);
12684     SvIV_set(sv, termcode);
12685     (void)SvPOK_only(sv);               /* validate pointer */
12686
12687     /* move past delimiter and try to read a complete string */
12688     if (keep_delims)
12689         sv_catpvn(sv, s, termlen);
12690     s += termlen;
12691 #ifdef PERL_MAD
12692     tstart = SvPVX(PL_linestr) + stuffstart;
12693     if (!PL_thisopen && !keep_delims) {
12694         PL_thisopen = newSVpvn(tstart, s - tstart);
12695         stuffstart = s - SvPVX(PL_linestr);
12696     }
12697 #endif
12698     for (;;) {
12699         if (PL_encoding && !UTF) {
12700             bool cont = TRUE;
12701
12702             while (cont) {
12703                 int offset = s - SvPVX_const(PL_linestr);
12704                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12705                                            &offset, (char*)termstr, termlen);
12706                 const char * const ns = SvPVX_const(PL_linestr) + offset;
12707                 char * const svlast = SvEND(sv) - 1;
12708
12709                 for (; s < ns; s++) {
12710                     if (*s == '\n' && !PL_rsfp)
12711                         CopLINE_inc(PL_curcop);
12712                 }
12713                 if (!found)
12714                     goto read_more_line;
12715                 else {
12716                     /* handle quoted delimiters */
12717                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12718                         const char *t;
12719                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12720                             t--;
12721                         if ((svlast-1 - t) % 2) {
12722                             if (!keep_quoted) {
12723                                 *(svlast-1) = term;
12724                                 *svlast = '\0';
12725                                 SvCUR_set(sv, SvCUR(sv) - 1);
12726                             }
12727                             continue;
12728                         }
12729                     }
12730                     if (PL_multi_open == PL_multi_close) {
12731                         cont = FALSE;
12732                     }
12733                     else {
12734                         const char *t;
12735                         char *w;
12736                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12737                             /* At here, all closes are "was quoted" one,
12738                                so we don't check PL_multi_close. */
12739                             if (*t == '\\') {
12740                                 if (!keep_quoted && *(t+1) == PL_multi_open)
12741                                     t++;
12742                                 else
12743                                     *w++ = *t++;
12744                             }
12745                             else if (*t == PL_multi_open)
12746                                 brackets++;
12747
12748                             *w = *t;
12749                         }
12750                         if (w < t) {
12751                             *w++ = term;
12752                             *w = '\0';
12753                             SvCUR_set(sv, w - SvPVX_const(sv));
12754                         }
12755                         last_off = w - SvPVX(sv);
12756                         if (--brackets <= 0)
12757                             cont = FALSE;
12758                     }
12759                 }
12760             }
12761             if (!keep_delims) {
12762                 SvCUR_set(sv, SvCUR(sv) - 1);
12763                 *SvEND(sv) = '\0';
12764             }
12765             break;
12766         }
12767
12768         /* extend sv if need be */
12769         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12770         /* set 'to' to the next character in the sv's string */
12771         to = SvPVX(sv)+SvCUR(sv);
12772
12773         /* if open delimiter is the close delimiter read unbridle */
12774         if (PL_multi_open == PL_multi_close) {
12775             for (; s < PL_bufend; s++,to++) {
12776                 /* embedded newlines increment the current line number */
12777                 if (*s == '\n' && !PL_rsfp)
12778                     CopLINE_inc(PL_curcop);
12779                 /* handle quoted delimiters */
12780                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12781                     if (!keep_quoted && s[1] == term)
12782                         s++;
12783                 /* any other quotes are simply copied straight through */
12784                     else
12785                         *to++ = *s++;
12786                 }
12787                 /* terminate when run out of buffer (the for() condition), or
12788                    have found the terminator */
12789                 else if (*s == term) {
12790                     if (termlen == 1)
12791                         break;
12792                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12793                         break;
12794                 }
12795                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12796                     has_utf8 = TRUE;
12797                 *to = *s;
12798             }
12799         }
12800
12801         /* if the terminator isn't the same as the start character (e.g.,
12802            matched brackets), we have to allow more in the quoting, and
12803            be prepared for nested brackets.
12804         */
12805         else {
12806             /* read until we run out of string, or we find the terminator */
12807             for (; s < PL_bufend; s++,to++) {
12808                 /* embedded newlines increment the line count */
12809                 if (*s == '\n' && !PL_rsfp)
12810                     CopLINE_inc(PL_curcop);
12811                 /* backslashes can escape the open or closing characters */
12812                 if (*s == '\\' && s+1 < PL_bufend) {
12813                     if (!keep_quoted &&
12814                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12815                         s++;
12816                     else
12817                         *to++ = *s++;
12818                 }
12819                 /* allow nested opens and closes */
12820                 else if (*s == PL_multi_close && --brackets <= 0)
12821                     break;
12822                 else if (*s == PL_multi_open)
12823                     brackets++;
12824                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12825                     has_utf8 = TRUE;
12826                 *to = *s;
12827             }
12828         }
12829         /* terminate the copied string and update the sv's end-of-string */
12830         *to = '\0';
12831         SvCUR_set(sv, to - SvPVX_const(sv));
12832
12833         /*
12834          * this next chunk reads more into the buffer if we're not done yet
12835          */
12836
12837         if (s < PL_bufend)
12838             break;              /* handle case where we are done yet :-) */
12839
12840 #ifndef PERL_STRICT_CR
12841         if (to - SvPVX_const(sv) >= 2) {
12842             if ((to[-2] == '\r' && to[-1] == '\n') ||
12843                 (to[-2] == '\n' && to[-1] == '\r'))
12844             {
12845                 to[-2] = '\n';
12846                 to--;
12847                 SvCUR_set(sv, to - SvPVX_const(sv));
12848             }
12849             else if (to[-1] == '\r')
12850                 to[-1] = '\n';
12851         }
12852         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12853             to[-1] = '\n';
12854 #endif
12855
12856      read_more_line:
12857         /* if we're out of file, or a read fails, bail and reset the current
12858            line marker so we can report where the unterminated string began
12859         */
12860 #ifdef PERL_MAD
12861         if (PL_madskills) {
12862             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12863             if (PL_thisstuff)
12864                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12865             else
12866                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12867         }
12868 #endif
12869         CopLINE_inc(PL_curcop);
12870         PL_bufptr = PL_bufend;
12871         if (!lex_next_chunk(0)) {
12872             sv_free(sv);
12873             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12874             return NULL;
12875         }
12876         s = PL_bufptr;
12877 #ifdef PERL_MAD
12878         stuffstart = 0;
12879 #endif
12880     }
12881
12882     /* at this point, we have successfully read the delimited string */
12883
12884     if (!PL_encoding || UTF) {
12885 #ifdef PERL_MAD
12886         if (PL_madskills) {
12887             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12888             const int len = s - tstart;
12889             if (PL_thisstuff)
12890                 sv_catpvn(PL_thisstuff, tstart, len);
12891             else
12892                 PL_thisstuff = newSVpvn(tstart, len);
12893             if (!PL_thisclose && !keep_delims)
12894                 PL_thisclose = newSVpvn(s,termlen);
12895         }
12896 #endif
12897
12898         if (keep_delims)
12899             sv_catpvn(sv, s, termlen);
12900         s += termlen;
12901     }
12902 #ifdef PERL_MAD
12903     else {
12904         if (PL_madskills) {
12905             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12906             const int len = s - tstart - termlen;
12907             if (PL_thisstuff)
12908                 sv_catpvn(PL_thisstuff, tstart, len);
12909             else
12910                 PL_thisstuff = newSVpvn(tstart, len);
12911             if (!PL_thisclose && !keep_delims)
12912                 PL_thisclose = newSVpvn(s - termlen,termlen);
12913         }
12914     }
12915 #endif
12916     if (has_utf8 || PL_encoding)
12917         SvUTF8_on(sv);
12918
12919     PL_multi_end = CopLINE(PL_curcop);
12920
12921     /* if we allocated too much space, give some back */
12922     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12923         SvLEN_set(sv, SvCUR(sv) + 1);
12924         SvPV_renew(sv, SvLEN(sv));
12925     }
12926
12927     /* decide whether this is the first or second quoted string we've read
12928        for this op
12929     */
12930
12931     if (PL_lex_stuff)
12932         PL_lex_repl = sv;
12933     else
12934         PL_lex_stuff = sv;
12935     return s;
12936 }
12937
12938 /*
12939   scan_num
12940   takes: pointer to position in buffer
12941   returns: pointer to new position in buffer
12942   side-effects: builds ops for the constant in pl_yylval.op
12943
12944   Read a number in any of the formats that Perl accepts:
12945
12946   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12947   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12948   0b[01](_?[01])*
12949   0[0-7](_?[0-7])*
12950   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12951
12952   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12953   thing it reads.
12954
12955   If it reads a number without a decimal point or an exponent, it will
12956   try converting the number to an integer and see if it can do so
12957   without loss of precision.
12958 */
12959
12960 char *
12961 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12962 {
12963     dVAR;
12964     register const char *s = start;     /* current position in buffer */
12965     register char *d;                   /* destination in temp buffer */
12966     register char *e;                   /* end of temp buffer */
12967     NV nv;                              /* number read, as a double */
12968     SV *sv = NULL;                      /* place to put the converted number */
12969     bool floatit;                       /* boolean: int or float? */
12970     const char *lastub = NULL;          /* position of last underbar */
12971     static char const number_too_long[] = "Number too long";
12972
12973     PERL_ARGS_ASSERT_SCAN_NUM;
12974
12975     /* We use the first character to decide what type of number this is */
12976
12977     switch (*s) {
12978     default:
12979       Perl_croak(aTHX_ "panic: scan_num");
12980
12981     /* if it starts with a 0, it could be an octal number, a decimal in
12982        0.13 disguise, or a hexadecimal number, or a binary number. */
12983     case '0':
12984         {
12985           /* variables:
12986              u          holds the "number so far"
12987              shift      the power of 2 of the base
12988                         (hex == 4, octal == 3, binary == 1)
12989              overflowed was the number more than we can hold?
12990
12991              Shift is used when we add a digit.  It also serves as an "are
12992              we in octal/hex/binary?" indicator to disallow hex characters
12993              when in octal mode.
12994            */
12995             NV n = 0.0;
12996             UV u = 0;
12997             I32 shift;
12998             bool overflowed = FALSE;
12999             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
13000             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
13001             static const char* const bases[5] =
13002               { "", "binary", "", "octal", "hexadecimal" };
13003             static const char* const Bases[5] =
13004               { "", "Binary", "", "Octal", "Hexadecimal" };
13005             static const char* const maxima[5] =
13006               { "",
13007                 "0b11111111111111111111111111111111",
13008                 "",
13009                 "037777777777",
13010                 "0xffffffff" };
13011             const char *base, *Base, *max;
13012
13013             /* check for hex */
13014             if (s[1] == 'x' || s[1] == 'X') {
13015                 shift = 4;
13016                 s += 2;
13017                 just_zero = FALSE;
13018             } else if (s[1] == 'b' || s[1] == 'B') {
13019                 shift = 1;
13020                 s += 2;
13021                 just_zero = FALSE;
13022             }
13023             /* check for a decimal in disguise */
13024             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
13025                 goto decimal;
13026             /* so it must be octal */
13027             else {
13028                 shift = 3;
13029                 s++;
13030             }
13031
13032             if (*s == '_') {
13033                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13034                                "Misplaced _ in number");
13035                lastub = s++;
13036             }
13037
13038             base = bases[shift];
13039             Base = Bases[shift];
13040             max  = maxima[shift];
13041
13042             /* read the rest of the number */
13043             for (;;) {
13044                 /* x is used in the overflow test,
13045                    b is the digit we're adding on. */
13046                 UV x, b;
13047
13048                 switch (*s) {
13049
13050                 /* if we don't mention it, we're done */
13051                 default:
13052                     goto out;
13053
13054                 /* _ are ignored -- but warned about if consecutive */
13055                 case '_':
13056                     if (lastub && s == lastub + 1)
13057                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13058                                        "Misplaced _ in number");
13059                     lastub = s++;
13060                     break;
13061
13062                 /* 8 and 9 are not octal */
13063                 case '8': case '9':
13064                     if (shift == 3)
13065                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
13066                     /* FALL THROUGH */
13067
13068                 /* octal digits */
13069                 case '2': case '3': case '4':
13070                 case '5': case '6': case '7':
13071                     if (shift == 1)
13072                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
13073                     /* FALL THROUGH */
13074
13075                 case '0': case '1':
13076                     b = *s++ & 15;              /* ASCII digit -> value of digit */
13077                     goto digit;
13078
13079                 /* hex digits */
13080                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13081                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
13082                     /* make sure they said 0x */
13083                     if (shift != 4)
13084                         goto out;
13085                     b = (*s++ & 7) + 9;
13086
13087                     /* Prepare to put the digit we have onto the end
13088                        of the number so far.  We check for overflows.
13089                     */
13090
13091                   digit:
13092                     just_zero = FALSE;
13093                     if (!overflowed) {
13094                         x = u << shift; /* make room for the digit */
13095
13096                         if ((x >> shift) != u
13097                             && !(PL_hints & HINT_NEW_BINARY)) {
13098                             overflowed = TRUE;
13099                             n = (NV) u;
13100                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13101                                              "Integer overflow in %s number",
13102                                              base);
13103                         } else
13104                             u = x | b;          /* add the digit to the end */
13105                     }
13106                     if (overflowed) {
13107                         n *= nvshift[shift];
13108                         /* If an NV has not enough bits in its
13109                          * mantissa to represent an UV this summing of
13110                          * small low-order numbers is a waste of time
13111                          * (because the NV cannot preserve the
13112                          * low-order bits anyway): we could just
13113                          * remember when did we overflow and in the
13114                          * end just multiply n by the right
13115                          * amount. */
13116                         n += (NV) b;
13117                     }
13118                     break;
13119                 }
13120             }
13121
13122           /* if we get here, we had success: make a scalar value from
13123              the number.
13124           */
13125           out:
13126
13127             /* final misplaced underbar check */
13128             if (s[-1] == '_') {
13129                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13130             }
13131
13132             if (overflowed) {
13133                 if (n > 4294967295.0)
13134                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13135                                    "%s number > %s non-portable",
13136                                    Base, max);
13137                 sv = newSVnv(n);
13138             }
13139             else {
13140 #if UVSIZE > 4
13141                 if (u > 0xffffffff)
13142                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13143                                    "%s number > %s non-portable",
13144                                    Base, max);
13145 #endif
13146                 sv = newSVuv(u);
13147             }
13148             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
13149                 sv = new_constant(start, s - start, "integer",
13150                                   sv, NULL, NULL, 0);
13151             else if (PL_hints & HINT_NEW_BINARY)
13152                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
13153         }
13154         break;
13155
13156     /*
13157       handle decimal numbers.
13158       we're also sent here when we read a 0 as the first digit
13159     */
13160     case '1': case '2': case '3': case '4': case '5':
13161     case '6': case '7': case '8': case '9': case '.':
13162       decimal:
13163         d = PL_tokenbuf;
13164         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
13165         floatit = FALSE;
13166
13167         /* read next group of digits and _ and copy into d */
13168         while (isDIGIT(*s) || *s == '_') {
13169             /* skip underscores, checking for misplaced ones
13170                if -w is on
13171             */
13172             if (*s == '_') {
13173                 if (lastub && s == lastub + 1)
13174                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13175                                    "Misplaced _ in number");
13176                 lastub = s++;
13177             }
13178             else {
13179                 /* check for end of fixed-length buffer */
13180                 if (d >= e)
13181                     Perl_croak(aTHX_ number_too_long);
13182                 /* if we're ok, copy the character */
13183                 *d++ = *s++;
13184             }
13185         }
13186
13187         /* final misplaced underbar check */
13188         if (lastub && s == lastub + 1) {
13189             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13190         }
13191
13192         /* read a decimal portion if there is one.  avoid
13193            3..5 being interpreted as the number 3. followed
13194            by .5
13195         */
13196         if (*s == '.' && s[1] != '.') {
13197             floatit = TRUE;
13198             *d++ = *s++;
13199
13200             if (*s == '_') {
13201                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13202                                "Misplaced _ in number");
13203                 lastub = s;
13204             }
13205
13206             /* copy, ignoring underbars, until we run out of digits.
13207             */
13208             for (; isDIGIT(*s) || *s == '_'; s++) {
13209                 /* fixed length buffer check */
13210                 if (d >= e)
13211                     Perl_croak(aTHX_ number_too_long);
13212                 if (*s == '_') {
13213                    if (lastub && s == lastub + 1)
13214                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13215                                       "Misplaced _ in number");
13216                    lastub = s;
13217                 }
13218                 else
13219                     *d++ = *s;
13220             }
13221             /* fractional part ending in underbar? */
13222             if (s[-1] == '_') {
13223                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13224                                "Misplaced _ in number");
13225             }
13226             if (*s == '.' && isDIGIT(s[1])) {
13227                 /* oops, it's really a v-string, but without the "v" */
13228                 s = start;
13229                 goto vstring;
13230             }
13231         }
13232
13233         /* read exponent part, if present */
13234         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
13235             floatit = TRUE;
13236             s++;
13237
13238             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
13239             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
13240
13241             /* stray preinitial _ */
13242             if (*s == '_') {
13243                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13244                                "Misplaced _ in number");
13245                 lastub = s++;
13246             }
13247
13248             /* allow positive or negative exponent */
13249             if (*s == '+' || *s == '-')
13250                 *d++ = *s++;
13251
13252             /* stray initial _ */
13253             if (*s == '_') {
13254                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13255                                "Misplaced _ in number");
13256                 lastub = s++;
13257             }
13258
13259             /* read digits of exponent */
13260             while (isDIGIT(*s) || *s == '_') {
13261                 if (isDIGIT(*s)) {
13262                     if (d >= e)
13263                         Perl_croak(aTHX_ number_too_long);
13264                     *d++ = *s++;
13265                 }
13266                 else {
13267                    if (((lastub && s == lastub + 1) ||
13268                         (!isDIGIT(s[1]) && s[1] != '_')))
13269                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13270                                       "Misplaced _ in number");
13271                    lastub = s++;
13272                 }
13273             }
13274         }
13275
13276
13277         /*
13278            We try to do an integer conversion first if no characters
13279            indicating "float" have been found.
13280          */
13281
13282         if (!floatit) {
13283             UV uv;
13284             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
13285
13286             if (flags == IS_NUMBER_IN_UV) {
13287               if (uv <= IV_MAX)
13288                 sv = newSViv(uv); /* Prefer IVs over UVs. */
13289               else
13290                 sv = newSVuv(uv);
13291             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13292               if (uv <= (UV) IV_MIN)
13293                 sv = newSViv(-(IV)uv);
13294               else
13295                 floatit = TRUE;
13296             } else
13297               floatit = TRUE;
13298         }
13299         if (floatit) {
13300             /* terminate the string */
13301             *d = '\0';
13302             nv = Atof(PL_tokenbuf);
13303             sv = newSVnv(nv);
13304         }
13305
13306         if ( floatit
13307              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13308             const char *const key = floatit ? "float" : "integer";
13309             const STRLEN keylen = floatit ? 5 : 7;
13310             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13311                                 key, keylen, sv, NULL, NULL, 0);
13312         }
13313         break;
13314
13315     /* if it starts with a v, it could be a v-string */
13316     case 'v':
13317 vstring:
13318                 sv = newSV(5); /* preallocate storage space */
13319                 s = scan_vstring(s, PL_bufend, sv);
13320         break;
13321     }
13322
13323     /* make the op for the constant and return */
13324
13325     if (sv)
13326         lvalp->opval = newSVOP(OP_CONST, 0, sv);
13327     else
13328         lvalp->opval = NULL;
13329
13330     return (char *)s;
13331 }
13332
13333 STATIC char *
13334 S_scan_formline(pTHX_ register char *s)
13335 {
13336     dVAR;
13337     register char *eol;
13338     register char *t;
13339     SV * const stuff = newSVpvs("");
13340     bool needargs = FALSE;
13341     bool eofmt = FALSE;
13342 #ifdef PERL_MAD
13343     char *tokenstart = s;
13344     SV* savewhite = NULL;
13345
13346     if (PL_madskills) {
13347         savewhite = PL_thiswhite;
13348         PL_thiswhite = 0;
13349     }
13350 #endif
13351
13352     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13353
13354     while (!needargs) {
13355         if (*s == '.') {
13356             t = s+1;
13357 #ifdef PERL_STRICT_CR
13358             while (SPACE_OR_TAB(*t))
13359                 t++;
13360 #else
13361             while (SPACE_OR_TAB(*t) || *t == '\r')
13362                 t++;
13363 #endif
13364             if (*t == '\n' || t == PL_bufend) {
13365                 eofmt = TRUE;
13366                 break;
13367             }
13368         }
13369         if (PL_in_eval && !PL_rsfp) {
13370             eol = (char *) memchr(s,'\n',PL_bufend-s);
13371             if (!eol++)
13372                 eol = PL_bufend;
13373         }
13374         else
13375             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13376         if (*s != '#') {
13377             for (t = s; t < eol; t++) {
13378                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13379                     needargs = FALSE;
13380                     goto enough;        /* ~~ must be first line in formline */
13381                 }
13382                 if (*t == '@' || *t == '^')
13383                     needargs = TRUE;
13384             }
13385             if (eol > s) {
13386                 sv_catpvn(stuff, s, eol-s);
13387 #ifndef PERL_STRICT_CR
13388                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13389                     char *end = SvPVX(stuff) + SvCUR(stuff);
13390                     end[-2] = '\n';
13391                     end[-1] = '\0';
13392                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13393                 }
13394 #endif
13395             }
13396             else
13397               break;
13398         }
13399         s = (char*)eol;
13400         if (PL_rsfp) {
13401             bool got_some;
13402 #ifdef PERL_MAD
13403             if (PL_madskills) {
13404                 if (PL_thistoken)
13405                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13406                 else
13407                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13408             }
13409 #endif
13410             PL_bufptr = PL_bufend;
13411             CopLINE_inc(PL_curcop);
13412             got_some = lex_next_chunk(0);
13413             CopLINE_dec(PL_curcop);
13414             s = PL_bufptr;
13415 #ifdef PERL_MAD
13416             tokenstart = PL_bufptr;
13417 #endif
13418             if (!got_some)
13419                 break;
13420         }
13421         incline(s);
13422     }
13423   enough:
13424     if (SvCUR(stuff)) {
13425         PL_expect = XTERM;
13426         if (needargs) {
13427             PL_lex_state = LEX_NORMAL;
13428             start_force(PL_curforce);
13429             NEXTVAL_NEXTTOKE.ival = 0;
13430             force_next(',');
13431         }
13432         else
13433             PL_lex_state = LEX_FORMLINE;
13434         if (!IN_BYTES) {
13435             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13436                 SvUTF8_on(stuff);
13437             else if (PL_encoding)
13438                 sv_recode_to_utf8(stuff, PL_encoding);
13439         }
13440         start_force(PL_curforce);
13441         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13442         force_next(THING);
13443         start_force(PL_curforce);
13444         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13445         force_next(LSTOP);
13446     }
13447     else {
13448         SvREFCNT_dec(stuff);
13449         if (eofmt)
13450             PL_lex_formbrack = 0;
13451         PL_bufptr = s;
13452     }
13453 #ifdef PERL_MAD
13454     if (PL_madskills) {
13455         if (PL_thistoken)
13456             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13457         else
13458             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13459         PL_thiswhite = savewhite;
13460     }
13461 #endif
13462     return s;
13463 }
13464
13465 I32
13466 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13467 {
13468     dVAR;
13469     const I32 oldsavestack_ix = PL_savestack_ix;
13470     CV* const outsidecv = PL_compcv;
13471
13472     if (PL_compcv) {
13473         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13474     }
13475     SAVEI32(PL_subline);
13476     save_item(PL_subname);
13477     SAVESPTR(PL_compcv);
13478
13479     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13480     CvFLAGS(PL_compcv) |= flags;
13481
13482     PL_subline = CopLINE(PL_curcop);
13483     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13484     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13485     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13486
13487     return oldsavestack_ix;
13488 }
13489
13490 #ifdef __SC__
13491 #pragma segment Perl_yylex
13492 #endif
13493 static int
13494 S_yywarn(pTHX_ const char *const s)
13495 {
13496     dVAR;
13497
13498     PERL_ARGS_ASSERT_YYWARN;
13499
13500     PL_in_eval |= EVAL_WARNONLY;
13501     yyerror(s);
13502     PL_in_eval &= ~EVAL_WARNONLY;
13503     return 0;
13504 }
13505
13506 int
13507 Perl_yyerror(pTHX_ const char *const s)
13508 {
13509     dVAR;
13510     const char *where = NULL;
13511     const char *context = NULL;
13512     int contlen = -1;
13513     SV *msg;
13514     int yychar  = PL_parser->yychar;
13515
13516     PERL_ARGS_ASSERT_YYERROR;
13517
13518     if (!yychar || (yychar == ';' && !PL_rsfp))
13519         where = "at EOF";
13520     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13521       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13522       PL_oldbufptr != PL_bufptr) {
13523         /*
13524                 Only for NetWare:
13525                 The code below is removed for NetWare because it abends/crashes on NetWare
13526                 when the script has error such as not having the closing quotes like:
13527                     if ($var eq "value)
13528                 Checking of white spaces is anyway done in NetWare code.
13529         */
13530 #ifndef NETWARE
13531         while (isSPACE(*PL_oldoldbufptr))
13532             PL_oldoldbufptr++;
13533 #endif
13534         context = PL_oldoldbufptr;
13535         contlen = PL_bufptr - PL_oldoldbufptr;
13536     }
13537     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13538       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13539         /*
13540                 Only for NetWare:
13541                 The code below is removed for NetWare because it abends/crashes on NetWare
13542                 when the script has error such as not having the closing quotes like:
13543                     if ($var eq "value)
13544                 Checking of white spaces is anyway done in NetWare code.
13545         */
13546 #ifndef NETWARE
13547         while (isSPACE(*PL_oldbufptr))
13548             PL_oldbufptr++;
13549 #endif
13550         context = PL_oldbufptr;
13551         contlen = PL_bufptr - PL_oldbufptr;
13552     }
13553     else if (yychar > 255)
13554         where = "next token ???";
13555     else if (yychar == -2) { /* YYEMPTY */
13556         if (PL_lex_state == LEX_NORMAL ||
13557            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13558             where = "at end of line";
13559         else if (PL_lex_inpat)
13560             where = "within pattern";
13561         else
13562             where = "within string";
13563     }
13564     else {
13565         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13566         if (yychar < 32)
13567             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13568         else if (isPRINT_LC(yychar)) {
13569             const char string = yychar;
13570             sv_catpvn(where_sv, &string, 1);
13571         }
13572         else
13573             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13574         where = SvPVX_const(where_sv);
13575     }
13576     msg = sv_2mortal(newSVpv(s, 0));
13577     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13578         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13579     if (context)
13580         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13581     else
13582         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13583     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13584         Perl_sv_catpvf(aTHX_ msg,
13585         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13586                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13587         PL_multi_end = 0;
13588     }
13589     if (PL_in_eval & EVAL_WARNONLY) {
13590         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13591     }
13592     else
13593         qerror(msg);
13594     if (PL_error_count >= 10) {
13595         if (PL_in_eval && SvCUR(ERRSV))
13596             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13597                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13598         else
13599             Perl_croak(aTHX_ "%s has too many errors.\n",
13600             OutCopFILE(PL_curcop));
13601     }
13602     PL_in_my = 0;
13603     PL_in_my_stash = NULL;
13604     return 0;
13605 }
13606 #ifdef __SC__
13607 #pragma segment Main
13608 #endif
13609
13610 STATIC char*
13611 S_swallow_bom(pTHX_ U8 *s)
13612 {
13613     dVAR;
13614     const STRLEN slen = SvCUR(PL_linestr);
13615
13616     PERL_ARGS_ASSERT_SWALLOW_BOM;
13617
13618     switch (s[0]) {
13619     case 0xFF:
13620         if (s[1] == 0xFE) {
13621             /* UTF-16 little-endian? (or UTF-32LE?) */
13622             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13623                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13624 #ifndef PERL_NO_UTF16_FILTER
13625             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13626             s += 2;
13627             if (PL_bufend > (char*)s) {
13628                 s = add_utf16_textfilter(s, TRUE);
13629             }
13630 #else
13631             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13632 #endif
13633         }
13634         break;
13635     case 0xFE:
13636         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13637 #ifndef PERL_NO_UTF16_FILTER
13638             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13639             s += 2;
13640             if (PL_bufend > (char *)s) {
13641                 s = add_utf16_textfilter(s, FALSE);
13642             }
13643 #else
13644             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13645 #endif
13646         }
13647         break;
13648     case 0xEF:
13649         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13650             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13651             s += 3;                      /* UTF-8 */
13652         }
13653         break;
13654     case 0:
13655         if (slen > 3) {
13656              if (s[1] == 0) {
13657                   if (s[2] == 0xFE && s[3] == 0xFF) {
13658                        /* UTF-32 big-endian */
13659                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13660                   }
13661              }
13662              else if (s[2] == 0 && s[3] != 0) {
13663                   /* Leading bytes
13664                    * 00 xx 00 xx
13665                    * are a good indicator of UTF-16BE. */
13666 #ifndef PERL_NO_UTF16_FILTER
13667                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13668                   s = add_utf16_textfilter(s, FALSE);
13669 #else
13670                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13671 #endif
13672              }
13673         }
13674 #ifdef EBCDIC
13675     case 0xDD:
13676         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13677             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13678             s += 4;                      /* UTF-8 */
13679         }
13680         break;
13681 #endif
13682
13683     default:
13684          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13685                   /* Leading bytes
13686                    * xx 00 xx 00
13687                    * are a good indicator of UTF-16LE. */
13688 #ifndef PERL_NO_UTF16_FILTER
13689               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13690               s = add_utf16_textfilter(s, TRUE);
13691 #else
13692               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13693 #endif
13694          }
13695     }
13696     return (char*)s;
13697 }
13698
13699
13700 #ifndef PERL_NO_UTF16_FILTER
13701 static I32
13702 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13703 {
13704     dVAR;
13705     SV *const filter = FILTER_DATA(idx);
13706     /* We re-use this each time round, throwing the contents away before we
13707        return.  */
13708     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13709     SV *const utf8_buffer = filter;
13710     IV status = IoPAGE(filter);
13711     const bool reverse = cBOOL(IoLINES(filter));
13712     I32 retval;
13713
13714     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13715
13716     /* As we're automatically added, at the lowest level, and hence only called
13717        from this file, we can be sure that we're not called in block mode. Hence
13718        don't bother writing code to deal with block mode.  */
13719     if (maxlen) {
13720         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13721     }
13722     if (status < 0) {
13723         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13724     }
13725     DEBUG_P(PerlIO_printf(Perl_debug_log,
13726                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13727                           FPTR2DPTR(void *, S_utf16_textfilter),
13728                           reverse ? 'l' : 'b', idx, maxlen, status,
13729                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13730
13731     while (1) {
13732         STRLEN chars;
13733         STRLEN have;
13734         I32 newlen;
13735         U8 *end;
13736         /* First, look in our buffer of existing UTF-8 data:  */
13737         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13738
13739         if (nl) {
13740             ++nl;
13741         } else if (status == 0) {
13742             /* EOF */
13743             IoPAGE(filter) = 0;
13744             nl = SvEND(utf8_buffer);
13745         }
13746         if (nl) {
13747             STRLEN got = nl - SvPVX(utf8_buffer);
13748             /* Did we have anything to append?  */
13749             retval = got != 0;
13750             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13751             /* Everything else in this code works just fine if SVp_POK isn't
13752                set.  This, however, needs it, and we need it to work, else
13753                we loop infinitely because the buffer is never consumed.  */
13754             sv_chop(utf8_buffer, nl);
13755             break;
13756         }
13757
13758         /* OK, not a complete line there, so need to read some more UTF-16.
13759            Read an extra octect if the buffer currently has an odd number. */
13760         while (1) {
13761             if (status <= 0)
13762                 break;
13763             if (SvCUR(utf16_buffer) >= 2) {
13764                 /* Location of the high octet of the last complete code point.
13765                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13766                    *coupled* with all the benefits of partial reads and
13767                    endianness.  */
13768                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13769                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13770
13771                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13772                     break;
13773                 }
13774
13775                 /* We have the first half of a surrogate. Read more.  */
13776                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13777             }
13778
13779             status = FILTER_READ(idx + 1, utf16_buffer,
13780                                  160 + (SvCUR(utf16_buffer) & 1));
13781             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13782             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13783             if (status < 0) {
13784                 /* Error */
13785                 IoPAGE(filter) = status;
13786                 return status;
13787             }
13788         }
13789
13790         chars = SvCUR(utf16_buffer) >> 1;
13791         have = SvCUR(utf8_buffer);
13792         SvGROW(utf8_buffer, have + chars * 3 + 1);
13793
13794         if (reverse) {
13795             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13796                                          (U8*)SvPVX_const(utf8_buffer) + have,
13797                                          chars * 2, &newlen);
13798         } else {
13799             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13800                                 (U8*)SvPVX_const(utf8_buffer) + have,
13801                                 chars * 2, &newlen);
13802         }
13803         SvCUR_set(utf8_buffer, have + newlen);
13804         *end = '\0';
13805
13806         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13807            it's private to us, and utf16_to_utf8{,reversed} take a
13808            (pointer,length) pair, rather than a NUL-terminated string.  */
13809         if(SvCUR(utf16_buffer) & 1) {
13810             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13811             SvCUR_set(utf16_buffer, 1);
13812         } else {
13813             SvCUR_set(utf16_buffer, 0);
13814         }
13815     }
13816     DEBUG_P(PerlIO_printf(Perl_debug_log,
13817                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13818                           status,
13819                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13820     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13821     return retval;
13822 }
13823
13824 static U8 *
13825 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13826 {
13827     SV *filter = filter_add(S_utf16_textfilter, NULL);
13828
13829     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13830
13831     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13832     sv_setpvs(filter, "");
13833     IoLINES(filter) = reversed;
13834     IoPAGE(filter) = 1; /* Not EOF */
13835
13836     /* Sadly, we have to return a valid pointer, come what may, so we have to
13837        ignore any error return from this.  */
13838     SvCUR_set(PL_linestr, 0);
13839     if (FILTER_READ(0, PL_linestr, 0)) {
13840         SvUTF8_on(PL_linestr);
13841     } else {
13842         SvUTF8_on(PL_linestr);
13843     }
13844     PL_bufend = SvEND(PL_linestr);
13845     return (U8*)SvPVX(PL_linestr);
13846 }
13847 #endif
13848
13849 /*
13850 Returns a pointer to the next character after the parsed
13851 vstring, as well as updating the passed in sv.
13852
13853 Function must be called like
13854
13855         sv = newSV(5);
13856         s = scan_vstring(s,e,sv);
13857
13858 where s and e are the start and end of the string.
13859 The sv should already be large enough to store the vstring
13860 passed in, for performance reasons.
13861
13862 */
13863
13864 char *
13865 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13866 {
13867     dVAR;
13868     const char *pos = s;
13869     const char *start = s;
13870
13871     PERL_ARGS_ASSERT_SCAN_VSTRING;
13872
13873     if (*pos == 'v') pos++;  /* get past 'v' */
13874     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13875         pos++;
13876     if ( *pos != '.') {
13877         /* this may not be a v-string if followed by => */
13878         const char *next = pos;
13879         while (next < e && isSPACE(*next))
13880             ++next;
13881         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13882             /* return string not v-string */
13883             sv_setpvn(sv,(char *)s,pos-s);
13884             return (char *)pos;
13885         }
13886     }
13887
13888     if (!isALPHA(*pos)) {
13889         U8 tmpbuf[UTF8_MAXBYTES+1];
13890
13891         if (*s == 'v')
13892             s++;  /* get past 'v' */
13893
13894         sv_setpvs(sv, "");
13895
13896         for (;;) {
13897             /* this is atoi() that tolerates underscores */
13898             U8 *tmpend;
13899             UV rev = 0;
13900             const char *end = pos;
13901             UV mult = 1;
13902             while (--end >= s) {
13903                 if (*end != '_') {
13904                     const UV orev = rev;
13905                     rev += (*end - '0') * mult;
13906                     mult *= 10;
13907                     if (orev > rev)
13908                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13909                                          "Integer overflow in decimal number");
13910                 }
13911             }
13912 #ifdef EBCDIC
13913             if (rev > 0x7FFFFFFF)
13914                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13915 #endif
13916             /* Append native character for the rev point */
13917             tmpend = uvchr_to_utf8(tmpbuf, rev);
13918             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13919             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13920                  SvUTF8_on(sv);
13921             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13922                  s = ++pos;
13923             else {
13924                  s = pos;
13925                  break;
13926             }
13927             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13928                  pos++;
13929         }
13930         SvPOK_on(sv);
13931         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13932         SvRMAGICAL_on(sv);
13933     }
13934     return (char *)s;
13935 }
13936
13937 int
13938 Perl_keyword_plugin_standard(pTHX_
13939         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13940 {
13941     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13942     PERL_UNUSED_CONTEXT;
13943     PERL_UNUSED_ARG(keyword_ptr);
13944     PERL_UNUSED_ARG(keyword_len);
13945     PERL_UNUSED_ARG(op_ptr);
13946     return KEYWORD_PLUGIN_DECLINE;
13947 }
13948
13949 /*
13950  * Local variables:
13951  * c-indentation-style: bsd
13952  * c-basic-offset: 4
13953  * indent-tabs-mode: t
13954  * End:
13955  *
13956  * ex: set ts=8 sts=4 sw=4 noet:
13957  */