toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  52 #define PL_lex_casemods         (PL_parser->lex_casemods)
  53 #define PL_lex_casestack        (PL_parser->lex_casestack)
  54 #define PL_lex_defer            (PL_parser->lex_defer)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_expect           (PL_parser->lex_expect)
  57 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  58 #define PL_lex_inpat            (PL_parser->lex_inpat)
  59 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  60 #define PL_lex_op               (PL_parser->lex_op)
  61 #define PL_lex_repl             (PL_parser->lex_repl)
  62 #define PL_lex_starts           (PL_parser->lex_starts)
  63 #define PL_lex_stuff            (PL_parser->lex_stuff)
  64 #define PL_multi_start          (PL_parser->multi_start)
  65 #define PL_multi_open           (PL_parser->multi_open)
  66 #define PL_multi_close          (PL_parser->multi_close)
  67 #define PL_pending_ident        (PL_parser->pending_ident)
  68 #define PL_preambled            (PL_parser->preambled)
  69 #define PL_sublex_info          (PL_parser->sublex_info)
  70 #define PL_linestr              (PL_parser->linestr)
  71 #define PL_expect               (PL_parser->expect)
  72 #define PL_copline              (PL_parser->copline)
  73 #define PL_bufptr               (PL_parser->bufptr)
  74 #define PL_oldbufptr            (PL_parser->oldbufptr)
  75 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  76 #define PL_linestart            (PL_parser->linestart)
  77 #define PL_bufend               (PL_parser->bufend)
  78 #define PL_last_uni             (PL_parser->last_uni)
  79 #define PL_last_lop             (PL_parser->last_lop)
  80 #define PL_last_lop_op          (PL_parser->last_lop_op)
  81 #define PL_lex_state            (PL_parser->lex_state)
  82 #define PL_rsfp                 (PL_parser->rsfp)
  83 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  84 #define PL_in_my                (PL_parser->in_my)
  85 #define PL_in_my_stash          (PL_parser->in_my_stash)
  86 #define PL_tokenbuf             (PL_parser->tokenbuf)
  87 #define PL_multi_end            (PL_parser->multi_end)
  88 #define PL_error_count          (PL_parser->error_count)
  89
  90 #ifdef PERL_MAD
  91 #  define PL_endwhite           (PL_parser->endwhite)
  92 #  define PL_faketokens         (PL_parser->faketokens)
  93 #  define PL_lasttoke           (PL_parser->lasttoke)
  94 #  define PL_nextwhite          (PL_parser->nextwhite)
  95 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  96 #  define PL_skipwhite          (PL_parser->skipwhite)
  97 #  define PL_thisclose          (PL_parser->thisclose)
  98 #  define PL_thismad            (PL_parser->thismad)
  99 #  define PL_thisopen           (PL_parser->thisopen)
 100 #  define PL_thisstuff          (PL_parser->thisstuff)
 101 #  define PL_thistoken          (PL_parser->thistoken)
 102 #  define PL_thiswhite          (PL_parser->thiswhite)
 103 #  define PL_thiswhite          (PL_parser->thiswhite)
 104 #  define PL_nexttoke           (PL_parser->nexttoke)
 105 #  define PL_curforce           (PL_parser->curforce)
 106 #else
 107 #  define PL_nexttoke           (PL_parser->nexttoke)
 108 #  define PL_nexttype           (PL_parser->nexttype)
 109 #  define PL_nextval            (PL_parser->nextval)
 110 #endif
 111
 112 /* This can't be done with embed.fnc, because struct yy_parser contains a
 113    member named pending_ident, which clashes with the generated #define  */
 114 static int
 115 S_pending_ident(pTHX);
 116
 117 static const char ident_too_long[] = "Identifier too long";
 118
 119 #ifdef PERL_MAD
 120 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 121 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 122 #else
 123 #  define CURMAD(slot,sv)
 124 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 125 #endif
 126
 127 #define XFAKEBRACK 128
 128 #define XENUMMASK 127
 129
 130 #ifdef USE_UTF8_SCRIPTS
 131 #   define UTF (!IN_BYTES)
 132 #else
 133 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 134 #endif
 135
 136 /* The maximum number of characters preceding the unrecognized one to display */
 137 #define UNRECOGNIZED_PRECEDE_COUNT 10
 138
 139 /* In variables named $^X, these are the legal values for X.
 140  * 1999-02-27 mjd-perl-patch@plover.com */
 141 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 142
 143 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 144
 145 /* LEX_* are values for PL_lex_state, the state of the lexer.
 146  * They are arranged oddly so that the guard on the switch statement
 147  * can get by with a single comparison (if the compiler is smart enough).
 148  */
 149
 150 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 151
 152 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 153 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 154 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 155 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 156 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 157
 158                                    /* at end of code, eg "$x" followed by:  */
 159 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 160 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 161
 162 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 163                                         string or after \E, $foo, etc       */
 164 #define LEX_INTERPCONST          2 /* NOT USED */
 165 #define LEX_FORMLINE             1 /* expecting a format line               */
 166 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 167
 168
 169 #ifdef DEBUGGING
 170 static const char* const lex_state_names[] = {
 171     "KNOWNEXT",
 172     "FORMLINE",
 173     "INTERPCONST",
 174     "INTERPCONCAT",
 175     "INTERPENDMAYBE",
 176     "INTERPEND",
 177     "INTERPSTART",
 178     "INTERPPUSH",
 179     "INTERPCASEMOD",
 180     "INTERPNORMAL",
 181     "NORMAL"
 182 };
 183 #endif
 184
 185 #ifdef ff_next
 186 #undef ff_next
 187 #endif
 188
 189 #include "keywords.h"
 190
 191 /* CLINE is a macro that ensures PL_copline has a sane value */
 192
 193 #ifdef CLINE
 194 #undef CLINE
 195 #endif
 196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 197
 198 #ifdef PERL_MAD
 199 #  define SKIPSPACE0(s) skipspace0(s)
 200 #  define SKIPSPACE1(s) skipspace1(s)
 201 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 202 #  define PEEKSPACE(s) skipspace2(s,0)
 203 #else
 204 #  define SKIPSPACE0(s) skipspace(s)
 205 #  define SKIPSPACE1(s) skipspace(s)
 206 #  define SKIPSPACE2(s,tsv) skipspace(s)
 207 #  define PEEKSPACE(s) skipspace(s)
 208 #endif
 209
 210 /*
 211  * Convenience functions to return different tokens and prime the
 212  * lexer for the next token.  They all take an argument.
 213  *
 214  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 215  * OPERATOR     : generic operator
 216  * AOPERATOR    : assignment operator
 217  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 218  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 219  * PREREF       : *EXPR where EXPR is not a simple identifier
 220  * TERM         : expression term
 221  * LOOPX        : loop exiting command (goto, last, dump, etc)
 222  * FTST         : file test operator
 223  * FUN0         : zero-argument function
 224  * FUN1         : not used, except for not, which isn't a UNIOP
 225  * BOop         : bitwise or or xor
 226  * BAop         : bitwise and
 227  * SHop         : shift operator
 228  * PWop         : power operator
 229  * PMop         : pattern-matching operator
 230  * Aop          : addition-level operator
 231  * Mop          : multiplication-level operator
 232  * Eop          : equality-testing operator
 233  * Rop          : relational operator <= != gt
 234  *
 235  * Also see LOP and lop() below.
 236  */
 237
 238 #ifdef DEBUGGING /* Serve -DT. */
 239 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 240 #else
 241 #   define REPORT(retval) (retval)
 242 #endif
 243
 244 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 245 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 246 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 247 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 248 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 249 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 250 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 251 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 252 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 253 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 254 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 255 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 256 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 257 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 258 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 259 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 260 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 261 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 262 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 263 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 264
 265 /* This bit of chicanery makes a unary function followed by
 266  * a parenthesis into a function with one argument, highest precedence.
 267  * The UNIDOR macro is for unary functions that can be followed by the //
 268  * operator (such as C<shift // 0>).
 269  */
 270 #define UNI2(f,x) { \
 271         pl_yylval.ival = f; \
 272         PL_expect = x; \
 273         PL_bufptr = s; \
 274         PL_last_uni = PL_oldbufptr; \
 275         PL_last_lop_op = f; \
 276         if (*s == '(') \
 277             return REPORT( (int)FUNC1 ); \
 278         s = PEEKSPACE(s); \
 279         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 280         }
 281 #define UNI(f)    UNI2(f,XTERM)
 282 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 283
 284 #define UNIBRACK(f) { \
 285         pl_yylval.ival = f; \
 286         PL_bufptr = s; \
 287         PL_last_uni = PL_oldbufptr; \
 288         if (*s == '(') \
 289             return REPORT( (int)FUNC1 ); \
 290         s = PEEKSPACE(s); \
 291         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 292         }
 293
 294 /* grandfather return to old style */
 295 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 296
 297 #ifdef DEBUGGING
 298
 299 /* how to interpret the pl_yylval associated with the token */
 300 enum token_type {
 301     TOKENTYPE_NONE,
 302     TOKENTYPE_IVAL,
 303     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 304     TOKENTYPE_PVAL,
 305     TOKENTYPE_OPVAL,
 306     TOKENTYPE_GVVAL
 307 };
 308
 309 static struct debug_tokens {
 310     const int token;
 311     enum token_type type;
 312     const char *name;
 313 } const debug_tokens[] =
 314 {
 315     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 316     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 317     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 318     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 319     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 320     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 321     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 322     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 323     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 324     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 325     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 326     { DO,               TOKENTYPE_NONE,         "DO" },
 327     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 328     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 329     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 330     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 331     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 332     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 333     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 334     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 335     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 336     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 337     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 338     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 339     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 340     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 341     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 342     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 343     { IF,               TOKENTYPE_IVAL,         "IF" },
 344     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 345     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 346     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 347     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 348     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 349     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 350     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 351     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 352     { MY,               TOKENTYPE_IVAL,         "MY" },
 353     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 354     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 355     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 356     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 357     { OROR,             TOKENTYPE_NONE,         "OROR" },
 358     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 359     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 360     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 361     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 362     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 363     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 364     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 365     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 366     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 367     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 368     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 369     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 370     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 371     { SUB,              TOKENTYPE_NONE,         "SUB" },
 372     { THING,            TOKENTYPE_OPVAL,        "THING" },
 373     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 374     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 375     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 376     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 377     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 378     { USE,              TOKENTYPE_IVAL,         "USE" },
 379     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 380     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 381     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 382     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 383     { 0,                TOKENTYPE_NONE,         NULL }
 384 };
 385
 386 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 387
 388 STATIC int
 389 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 390 {
 391     dVAR;
 392
 393     PERL_ARGS_ASSERT_TOKEREPORT;
 394
 395     if (DEBUG_T_TEST) {
 396         const char *name = NULL;
 397         enum token_type type = TOKENTYPE_NONE;
 398         const struct debug_tokens *p;
 399         SV* const report = newSVpvs("<== ");
 400
 401         for (p = debug_tokens; p->token; p++) {
 402             if (p->token == (int)rv) {
 403                 name = p->name;
 404                 type = p->type;
 405                 break;
 406             }
 407         }
 408         if (name)
 409             Perl_sv_catpv(aTHX_ report, name);
 410         else if ((char)rv > ' ' && (char)rv < '~')
 411             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 412         else if (!rv)
 413             sv_catpvs(report, "EOF");
 414         else
 415             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 416         switch (type) {
 417         case TOKENTYPE_NONE:
 418         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 419             break;
 420         case TOKENTYPE_IVAL:
 421             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 422             break;
 423         case TOKENTYPE_OPNUM:
 424             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 425                                     PL_op_name[lvalp->ival]);
 426             break;
 427         case TOKENTYPE_PVAL:
 428             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 429             break;
 430         case TOKENTYPE_OPVAL:
 431             if (lvalp->opval) {
 432                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 433                                     PL_op_name[lvalp->opval->op_type]);
 434                 if (lvalp->opval->op_type == OP_CONST) {
 435                     Perl_sv_catpvf(aTHX_ report, " %s",
 436                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 437                 }
 438
 439             }
 440             else
 441                 sv_catpvs(report, "(opval=null)");
 442             break;
 443         }
 444         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 445     };
 446     return (int)rv;
 447 }
 448
 449
 450 /* print the buffer with suitable escapes */
 451
 452 STATIC void
 453 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 454 {
 455     SV* const tmp = newSVpvs("");
 456
 457     PERL_ARGS_ASSERT_PRINTBUF;
 458
 459     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 460     SvREFCNT_dec(tmp);
 461 }
 462
 463 #endif
 464
 465 static int
 466 S_deprecate_commaless_var_list(pTHX) {
 467     PL_expect = XTERM;
 468     deprecate("comma-less variable list");
 469     return REPORT(','); /* grandfather non-comma-format format */
 470 }
 471
 472 /*
 473  * S_ao
 474  *
 475  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 476  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 477  */
 478
 479 STATIC int
 480 S_ao(pTHX_ int toketype)
 481 {
 482     dVAR;
 483     if (*PL_bufptr == '=') {
 484         PL_bufptr++;
 485         if (toketype == ANDAND)
 486             pl_yylval.ival = OP_ANDASSIGN;
 487         else if (toketype == OROR)
 488             pl_yylval.ival = OP_ORASSIGN;
 489         else if (toketype == DORDOR)
 490             pl_yylval.ival = OP_DORASSIGN;
 491         toketype = ASSIGNOP;
 492     }
 493     return toketype;
 494 }
 495
 496 /*
 497  * S_no_op
 498  * When Perl expects an operator and finds something else, no_op
 499  * prints the warning.  It always prints "<something> found where
 500  * operator expected.  It prints "Missing semicolon on previous line?"
 501  * if the surprise occurs at the start of the line.  "do you need to
 502  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 503  * where the compiler doesn't know if foo is a method call or a function.
 504  * It prints "Missing operator before end of line" if there's nothing
 505  * after the missing operator, or "... before <...>" if there is something
 506  * after the missing operator.
 507  */
 508
 509 STATIC void
 510 S_no_op(pTHX_ const char *const what, char *s)
 511 {
 512     dVAR;
 513     char * const oldbp = PL_bufptr;
 514     const bool is_first = (PL_oldbufptr == PL_linestart);
 515
 516     PERL_ARGS_ASSERT_NO_OP;
 517
 518     if (!s)
 519         s = oldbp;
 520     else
 521         PL_bufptr = s;
 522     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 523     if (ckWARN_d(WARN_SYNTAX)) {
 524         if (is_first)
 525             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 526                     "\t(Missing semicolon on previous line?)\n");
 527         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 528             const char *t;
 529             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 530                 NOOP;
 531             if (t < PL_bufptr && isSPACE(*t))
 532                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 533                         "\t(Do you need to predeclare %.*s?)\n",
 534                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 535         }
 536         else {
 537             assert(s >= oldbp);
 538             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 539                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 540         }
 541     }
 542     PL_bufptr = oldbp;
 543 }
 544
 545 /*
 546  * S_missingterm
 547  * Complain about missing quote/regexp/heredoc terminator.
 548  * If it's called with NULL then it cauterizes the line buffer.
 549  * If we're in a delimited string and the delimiter is a control
 550  * character, it's reformatted into a two-char sequence like ^C.
 551  * This is fatal.
 552  */
 553
 554 STATIC void
 555 S_missingterm(pTHX_ char *s)
 556 {
 557     dVAR;
 558     char tmpbuf[3];
 559     char q;
 560     if (s) {
 561         char * const nl = strrchr(s,'\n');
 562         if (nl)
 563             *nl = '\0';
 564     }
 565     else if (isCNTRL(PL_multi_close)) {
 566         *tmpbuf = '^';
 567         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 568         tmpbuf[2] = '\0';
 569         s = tmpbuf;
 570     }
 571     else {
 572         *tmpbuf = (char)PL_multi_close;
 573         tmpbuf[1] = '\0';
 574         s = tmpbuf;
 575     }
 576     q = strchr(s,'"') ? '\'' : '"';
 577     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 578 }
 579
 580 #define FEATURE_IS_ENABLED(name)                                        \
 581         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 582             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 583 /* The longest string we pass in.  */
 584 #define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
 585
 586 /*
 587  * S_feature_is_enabled
 588  * Check whether the named feature is enabled.
 589  */
 590 STATIC bool
 591 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 592 {
 593     dVAR;
 594     HV * const hinthv = GvHV(PL_hintgv);
 595     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 596
 597     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 598
 599     assert(namelen <= MAX_FEATURE_LEN);
 600     memcpy(&he_name[8], name, namelen);
 601
 602     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 603 }
 604
 605 /*
 606  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 607  * utf16-to-utf8-reversed.
 608  */
 609
 610 #ifdef PERL_CR_FILTER
 611 static void
 612 strip_return(SV *sv)
 613 {
 614     register const char *s = SvPVX_const(sv);
 615     register const char * const e = s + SvCUR(sv);
 616
 617     PERL_ARGS_ASSERT_STRIP_RETURN;
 618
 619     /* outer loop optimized to do nothing if there are no CR-LFs */
 620     while (s < e) {
 621         if (*s++ == '\r' && *s == '\n') {
 622             /* hit a CR-LF, need to copy the rest */
 623             register char *d = s - 1;
 624             *d++ = *s++;
 625             while (s < e) {
 626                 if (*s == '\r' && s[1] == '\n')
 627                     s++;
 628                 *d++ = *s++;
 629             }
 630             SvCUR(sv) -= s - d;
 631             return;
 632         }
 633     }
 634 }
 635
 636 STATIC I32
 637 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 638 {
 639     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 640     if (count > 0 && !maxlen)
 641         strip_return(sv);
 642     return count;
 643 }
 644 #endif
 645
 646
 647
 648 /*
 649  * Perl_lex_start
 650  *
 651  * Create a parser object and initialise its parser and lexer fields
 652  *
 653  * rsfp       is the opened file handle to read from (if any),
 654  *
 655  * line       holds any initial content already read from the file (or in
 656  *            the case of no file, such as an eval, the whole contents);
 657  *
 658  * new_filter indicates that this is a new file and it shouldn't inherit
 659  *            the filters from the current parser (ie require).
 660  */
 661
 662 void
 663 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 664 {
 665     dVAR;
 666     const char *s = NULL;
 667     STRLEN len;
 668     yy_parser *parser, *oparser;
 669
 670     /* create and initialise a parser */
 671
 672     Newxz(parser, 1, yy_parser);
 673     parser->old_parser = oparser = PL_parser;
 674     PL_parser = parser;
 675
 676     parser->stack = NULL;
 677     parser->ps = NULL;
 678     parser->stack_size = 0;
 679
 680     /* on scope exit, free this parser and restore any outer one */
 681     SAVEPARSER(parser);
 682     parser->saved_curcop = PL_curcop;
 683
 684     /* initialise lexer state */
 685
 686 #ifdef PERL_MAD
 687     parser->curforce = -1;
 688 #else
 689     parser->nexttoke = 0;
 690 #endif
 691     parser->error_count = oparser ? oparser->error_count : 0;
 692     parser->copline = NOLINE;
 693     parser->lex_state = LEX_NORMAL;
 694     parser->expect = XSTATE;
 695     parser->rsfp = rsfp;
 696     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 697                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 698
 699     Newx(parser->lex_brackstack, 120, char);
 700     Newx(parser->lex_casestack, 12, char);
 701     *parser->lex_casestack = '\0';
 702
 703     if (line) {
 704         s = SvPV_const(line, len);
 705     } else {
 706         len = 0;
 707     }
 708
 709     if (!len) {
 710         parser->linestr = newSVpvs("\n;");
 711     } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
 712         /* avoid tie/overload weirdness */
 713         parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
 714         if (s[len-1] != ';')
 715             sv_catpvs(parser->linestr, "\n;");
 716     } else {
 717         SvTEMP_off(line);
 718         SvREFCNT_inc_simple_void_NN(line);
 719         parser->linestr = line;
 720     }
 721     parser->oldoldbufptr =
 722         parser->oldbufptr =
 723         parser->bufptr =
 724         parser->linestart = SvPVX(parser->linestr);
 725     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 726     parser->last_lop = parser->last_uni = NULL;
 727 }
 728
 729
 730 /* delete a parser object */
 731
 732 void
 733 Perl_parser_free(pTHX_  const yy_parser *parser)
 734 {
 735     PERL_ARGS_ASSERT_PARSER_FREE;
 736
 737     PL_curcop = parser->saved_curcop;
 738     SvREFCNT_dec(parser->linestr);
 739
 740     if (parser->rsfp == PerlIO_stdin())
 741         PerlIO_clearerr(parser->rsfp);
 742     else if (parser->rsfp && (!parser->old_parser ||
 743                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 744         PerlIO_close(parser->rsfp);
 745     SvREFCNT_dec(parser->rsfp_filters);
 746
 747     Safefree(parser->lex_brackstack);
 748     Safefree(parser->lex_casestack);
 749     PL_parser = parser->old_parser;
 750     Safefree(parser);
 751 }
 752
 753
 754 /*
 755  * Perl_lex_end
 756  * Finalizer for lexing operations.  Must be called when the parser is
 757  * done with the lexer.
 758  */
 759
 760 void
 761 Perl_lex_end(pTHX)
 762 {
 763     dVAR;
 764     PL_doextract = FALSE;
 765 }
 766
 767 /*
 768 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 769
 770 Buffer scalar containing the chunk currently under consideration of the
 771 text currently being lexed.  This is always a plain string scalar (for
 772 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 773 normal scalar means; instead refer to the buffer directly by the pointer
 774 variables described below.
 775
 776 The lexer maintains various C<char*> pointers to things in the
 777 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 778 reallocated, all of these pointers must be updated.  Don't attempt to
 779 do this manually, but rather use L</lex_grow_linestr> if you need to
 780 reallocate the buffer.
 781
 782 The content of the text chunk in the buffer is commonly exactly one
 783 complete line of input, up to and including a newline terminator,
 784 but there are situations where it is otherwise.  The octets of the
 785 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 786 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 787 flag on this scalar, which may disagree with it.
 788
 789 For direct examination of the buffer, the variable
 790 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 791 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 792 of these pointers is usually preferable to examination of the scalar
 793 through normal scalar means.
 794
 795 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 796
 797 Direct pointer to the end of the chunk of text currently being lexed, the
 798 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 799 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 800 always located at the end of the buffer, and does not count as part of
 801 the buffer's contents.
 802
 803 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 804
 805 Points to the current position of lexing inside the lexer buffer.
 806 Characters around this point may be freely examined, within
 807 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 808 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 809 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 810
 811 Lexing code (whether in the Perl core or not) moves this pointer past
 812 the characters that it consumes.  It is also expected to perform some
 813 bookkeeping whenever a newline character is consumed.  This movement
 814 can be more conveniently performed by the function L</lex_read_to>,
 815 which handles newlines appropriately.
 816
 817 Interpretation of the buffer's octets can be abstracted out by
 818 using the slightly higher-level functions L</lex_peek_unichar> and
 819 L</lex_read_unichar>.
 820
 821 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 822
 823 Points to the start of the current line inside the lexer buffer.
 824 This is useful for indicating at which column an error occurred, and
 825 not much else.  This must be updated by any lexing code that consumes
 826 a newline; the function L</lex_read_to> handles this detail.
 827
 828 =cut
 829 */
 830
 831 /*
 832 =for apidoc Amx|bool|lex_bufutf8
 833
 834 Indicates whether the octets in the lexer buffer
 835 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 836 of Unicode characters.  If not, they should be interpreted as Latin-1
 837 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 838
 839 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 840 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 841 encoding.
 842
 843 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 844 is significant, but not the whole story regarding the input character
 845 encoding.  Normally, when a file is being read, the scalar contains octets
 846 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 847 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 848 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 849 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 850 is in effect.  This logic may change in the future; use this function
 851 instead of implementing the logic yourself.
 852
 853 =cut
 854 */
 855
 856 bool
 857 Perl_lex_bufutf8(pTHX)
 858 {
 859     return UTF;
 860 }
 861
 862 /*
 863 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 864
 865 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 866 at least I<len> octets (including terminating NUL).  Returns a
 867 pointer to the reallocated buffer.  This is necessary before making
 868 any direct modification of the buffer that would increase its length.
 869 L</lex_stuff_pvn> provides a more convenient way to insert text into
 870 the buffer.
 871
 872 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 873 this function updates all of the lexer's variables that point directly
 874 into the buffer.
 875
 876 =cut
 877 */
 878
 879 char *
 880 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 881 {
 882     SV *linestr;
 883     char *buf;
 884     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 885     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 886     linestr = PL_parser->linestr;
 887     buf = SvPVX(linestr);
 888     if (len <= SvLEN(linestr))
 889         return buf;
 890     bufend_pos = PL_parser->bufend - buf;
 891     bufptr_pos = PL_parser->bufptr - buf;
 892     oldbufptr_pos = PL_parser->oldbufptr - buf;
 893     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 894     linestart_pos = PL_parser->linestart - buf;
 895     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 896     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 897     buf = sv_grow(linestr, len);
 898     PL_parser->bufend = buf + bufend_pos;
 899     PL_parser->bufptr = buf + bufptr_pos;
 900     PL_parser->oldbufptr = buf + oldbufptr_pos;
 901     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 902     PL_parser->linestart = buf + linestart_pos;
 903     if (PL_parser->last_uni)
 904         PL_parser->last_uni = buf + last_uni_pos;
 905     if (PL_parser->last_lop)
 906         PL_parser->last_lop = buf + last_lop_pos;
 907     return buf;
 908 }
 909
 910 /*
 911 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 912
 913 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 914 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 915 reallocating the buffer if necessary.  This means that lexing code that
 916 runs later will see the characters as if they had appeared in the input.
 917 It is not recommended to do this as part of normal parsing, and most
 918 uses of this facility run the risk of the inserted characters being
 919 interpreted in an unintended manner.
 920
 921 The string to be inserted is represented by I<len> octets starting
 922 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 923 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 924 The characters are recoded for the lexer buffer, according to how the
 925 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 926 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
 927 function is more convenient.
 928
 929 =cut
 930 */
 931
 932 void
 933 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 934 {
 935     dVAR;
 936     char *bufptr;
 937     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 938     if (flags & ~(LEX_STUFF_UTF8))
 939         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 940     if (UTF) {
 941         if (flags & LEX_STUFF_UTF8) {
 942             goto plain_copy;
 943         } else {
 944             STRLEN highhalf = 0;
 945             const char *p, *e = pv+len;
 946             for (p = pv; p != e; p++)
 947                 highhalf += !!(((U8)*p) & 0x80);
 948             if (!highhalf)
 949                 goto plain_copy;
 950             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 951             bufptr = PL_parser->bufptr;
 952             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 953             SvCUR_set(PL_parser->linestr,
 954                 SvCUR(PL_parser->linestr) + len+highhalf);
 955             PL_parser->bufend += len+highhalf;
 956             for (p = pv; p != e; p++) {
 957                 U8 c = (U8)*p;
 958                 if (c & 0x80) {
 959                     *bufptr++ = (char)(0xc0 | (c >> 6));
 960                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 961                 } else {
 962                     *bufptr++ = (char)c;
 963                 }
 964             }
 965         }
 966     } else {
 967         if (flags & LEX_STUFF_UTF8) {
 968             STRLEN highhalf = 0;
 969             const char *p, *e = pv+len;
 970             for (p = pv; p != e; p++) {
 971                 U8 c = (U8)*p;
 972                 if (c >= 0xc4) {
 973                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 974                                 "non-Latin-1 character into Latin-1 input");
 975                 } else if (c >= 0xc2 && p+1 != e &&
 976                             (((U8)p[1]) & 0xc0) == 0x80) {
 977                     p++;
 978                     highhalf++;
 979                 } else if (c >= 0x80) {
 980                     /* malformed UTF-8 */
 981                     ENTER;
 982                     SAVESPTR(PL_warnhook);
 983                     PL_warnhook = PERL_WARNHOOK_FATAL;
 984                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 985                     LEAVE;
 986                 }
 987             }
 988             if (!highhalf)
 989                 goto plain_copy;
 990             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 991             bufptr = PL_parser->bufptr;
 992             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 993             SvCUR_set(PL_parser->linestr,
 994                 SvCUR(PL_parser->linestr) + len-highhalf);
 995             PL_parser->bufend += len-highhalf;
 996             for (p = pv; p != e; p++) {
 997                 U8 c = (U8)*p;
 998                 if (c & 0x80) {
 999                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1000                     p++;
1001                 } else {
1002                     *bufptr++ = (char)c;
1003                 }
1004             }
1005         } else {
1006             plain_copy:
1007             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1008             bufptr = PL_parser->bufptr;
1009             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1010             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1011             PL_parser->bufend += len;
1012             Copy(pv, bufptr, len, char);
1013         }
1014     }
1015 }
1016
1017 /*
1018 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1019
1020 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022 reallocating the buffer if necessary.  This means that lexing code that
1023 runs later will see the characters as if they had appeared in the input.
1024 It is not recommended to do this as part of normal parsing, and most
1025 uses of this facility run the risk of the inserted characters being
1026 interpreted in an unintended manner.
1027
1028 The string to be inserted is the string value of I<sv>.  The characters
1029 are recoded for the lexer buffer, according to how the buffer is currently
1030 being interpreted (L</lex_bufutf8>).  If a string to be interpreted is
1031 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1032 need to construct a scalar.
1033
1034 =cut
1035 */
1036
1037 void
1038 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1039 {
1040     char *pv;
1041     STRLEN len;
1042     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1043     if (flags)
1044         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1045     pv = SvPV(sv, len);
1046     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1047 }
1048
1049 /*
1050 =for apidoc Amx|void|lex_unstuff|char *ptr
1051
1052 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1053 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1054 This hides the discarded text from any lexing code that runs later,
1055 as if the text had never appeared.
1056
1057 This is not the normal way to consume lexed text.  For that, use
1058 L</lex_read_to>.
1059
1060 =cut
1061 */
1062
1063 void
1064 Perl_lex_unstuff(pTHX_ char *ptr)
1065 {
1066     char *buf, *bufend;
1067     STRLEN unstuff_len;
1068     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1069     buf = PL_parser->bufptr;
1070     if (ptr < buf)
1071         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1072     if (ptr == buf)
1073         return;
1074     bufend = PL_parser->bufend;
1075     if (ptr > bufend)
1076         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1077     unstuff_len = ptr - buf;
1078     Move(ptr, buf, bufend+1-ptr, char);
1079     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1080     PL_parser->bufend = bufend - unstuff_len;
1081 }
1082
1083 /*
1084 =for apidoc Amx|void|lex_read_to|char *ptr
1085
1086 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1087 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1088 performing the correct bookkeeping whenever a newline character is passed.
1089 This is the normal way to consume lexed text.
1090
1091 Interpretation of the buffer's octets can be abstracted out by
1092 using the slightly higher-level functions L</lex_peek_unichar> and
1093 L</lex_read_unichar>.
1094
1095 =cut
1096 */
1097
1098 void
1099 Perl_lex_read_to(pTHX_ char *ptr)
1100 {
1101     char *s;
1102     PERL_ARGS_ASSERT_LEX_READ_TO;
1103     s = PL_parser->bufptr;
1104     if (ptr < s || ptr > PL_parser->bufend)
1105         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1106     for (; s != ptr; s++)
1107         if (*s == '\n') {
1108             CopLINE_inc(PL_curcop);
1109             PL_parser->linestart = s+1;
1110         }
1111     PL_parser->bufptr = ptr;
1112 }
1113
1114 /*
1115 =for apidoc Amx|void|lex_discard_to|char *ptr
1116
1117 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1118 up to I<ptr>.  The remaining content of the buffer will be moved, and
1119 all pointers into the buffer updated appropriately.  I<ptr> must not
1120 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1121 it is not permitted to discard text that has yet to be lexed.
1122
1123 Normally it is not necessarily to do this directly, because it suffices to
1124 use the implicit discarding behaviour of L</lex_next_chunk> and things
1125 based on it.  However, if a token stretches across multiple lines,
1126 and the lexing code has kept multiple lines of text in the buffer for
1127 that purpose, then after completion of the token it would be wise to
1128 explicitly discard the now-unneeded earlier lines, to avoid future
1129 multi-line tokens growing the buffer without bound.
1130
1131 =cut
1132 */
1133
1134 void
1135 Perl_lex_discard_to(pTHX_ char *ptr)
1136 {
1137     char *buf;
1138     STRLEN discard_len;
1139     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1140     buf = SvPVX(PL_parser->linestr);
1141     if (ptr < buf)
1142         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1143     if (ptr == buf)
1144         return;
1145     if (ptr > PL_parser->bufptr)
1146         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1147     discard_len = ptr - buf;
1148     if (PL_parser->oldbufptr < ptr)
1149         PL_parser->oldbufptr = ptr;
1150     if (PL_parser->oldoldbufptr < ptr)
1151         PL_parser->oldoldbufptr = ptr;
1152     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1153         PL_parser->last_uni = NULL;
1154     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1155         PL_parser->last_lop = NULL;
1156     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1157     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1158     PL_parser->bufend -= discard_len;
1159     PL_parser->bufptr -= discard_len;
1160     PL_parser->oldbufptr -= discard_len;
1161     PL_parser->oldoldbufptr -= discard_len;
1162     if (PL_parser->last_uni)
1163         PL_parser->last_uni -= discard_len;
1164     if (PL_parser->last_lop)
1165         PL_parser->last_lop -= discard_len;
1166 }
1167
1168 /*
1169 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1170
1171 Reads in the next chunk of text to be lexed, appending it to
1172 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1173 looked to the end of the current chunk and wants to know more.  It is
1174 usual, but not necessary, for lexing to have consumed the entirety of
1175 the current chunk at this time.
1176
1177 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1178 chunk (i.e., the current chunk has been entirely consumed), normally the
1179 current chunk will be discarded at the same time that the new chunk is
1180 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1181 will not be discarded.  If the current chunk has not been entirely
1182 consumed, then it will not be discarded regardless of the flag.
1183
1184 Returns true if some new text was added to the buffer, or false if the
1185 buffer has reached the end of the input text.
1186
1187 =cut
1188 */
1189
1190 #define LEX_FAKE_EOF 0x80000000
1191
1192 bool
1193 Perl_lex_next_chunk(pTHX_ U32 flags)
1194 {
1195     SV *linestr;
1196     char *buf;
1197     STRLEN old_bufend_pos, new_bufend_pos;
1198     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1199     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1200     bool got_some_for_debugger = 0;
1201     bool got_some;
1202     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1203         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1204     linestr = PL_parser->linestr;
1205     buf = SvPVX(linestr);
1206     if (!(flags & LEX_KEEP_PREVIOUS) &&
1207             PL_parser->bufptr == PL_parser->bufend) {
1208         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1209         linestart_pos = 0;
1210         if (PL_parser->last_uni != PL_parser->bufend)
1211             PL_parser->last_uni = NULL;
1212         if (PL_parser->last_lop != PL_parser->bufend)
1213             PL_parser->last_lop = NULL;
1214         last_uni_pos = last_lop_pos = 0;
1215         *buf = 0;
1216         SvCUR(linestr) = 0;
1217     } else {
1218         old_bufend_pos = PL_parser->bufend - buf;
1219         bufptr_pos = PL_parser->bufptr - buf;
1220         oldbufptr_pos = PL_parser->oldbufptr - buf;
1221         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1222         linestart_pos = PL_parser->linestart - buf;
1223         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1224         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1225     }
1226     if (flags & LEX_FAKE_EOF) {
1227         goto eof;
1228     } else if (!PL_parser->rsfp) {
1229         got_some = 0;
1230     } else if (filter_gets(linestr, old_bufend_pos)) {
1231         got_some = 1;
1232         got_some_for_debugger = 1;
1233     } else {
1234         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1235             sv_setpvs(linestr, "");
1236         eof:
1237         /* End of real input.  Close filehandle (unless it was STDIN),
1238          * then add implicit termination.
1239          */
1240         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1241             PerlIO_clearerr(PL_parser->rsfp);
1242         else if (PL_parser->rsfp)
1243             (void)PerlIO_close(PL_parser->rsfp);
1244         PL_parser->rsfp = NULL;
1245         PL_doextract = FALSE;
1246 #ifdef PERL_MAD
1247         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1248             PL_faketokens = 1;
1249 #endif
1250         if (!PL_in_eval && PL_minus_p) {
1251             sv_catpvs(linestr,
1252                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1253             PL_minus_n = PL_minus_p = 0;
1254         } else if (!PL_in_eval && PL_minus_n) {
1255             sv_catpvs(linestr, /*{*/";}");
1256             PL_minus_n = 0;
1257         } else
1258             sv_catpvs(linestr, ";");
1259         got_some = 1;
1260     }
1261     buf = SvPVX(linestr);
1262     new_bufend_pos = SvCUR(linestr);
1263     PL_parser->bufend = buf + new_bufend_pos;
1264     PL_parser->bufptr = buf + bufptr_pos;
1265     PL_parser->oldbufptr = buf + oldbufptr_pos;
1266     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1267     PL_parser->linestart = buf + linestart_pos;
1268     if (PL_parser->last_uni)
1269         PL_parser->last_uni = buf + last_uni_pos;
1270     if (PL_parser->last_lop)
1271         PL_parser->last_lop = buf + last_lop_pos;
1272     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1273             PL_curstash != PL_debstash) {
1274         /* debugger active and we're not compiling the debugger code,
1275          * so store the line into the debugger's array of lines
1276          */
1277         update_debugger_info(NULL, buf+old_bufend_pos,
1278             new_bufend_pos-old_bufend_pos);
1279     }
1280     return got_some;
1281 }
1282
1283 /*
1284 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1285
1286 Looks ahead one (Unicode) character in the text currently being lexed.
1287 Returns the codepoint (unsigned integer value) of the next character,
1288 or -1 if lexing has reached the end of the input text.  To consume the
1289 peeked character, use L</lex_read_unichar>.
1290
1291 If the next character is in (or extends into) the next chunk of input
1292 text, the next chunk will be read in.  Normally the current chunk will be
1293 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1294 then the current chunk will not be discarded.
1295
1296 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1297 is encountered, an exception is generated.
1298
1299 =cut
1300 */
1301
1302 I32
1303 Perl_lex_peek_unichar(pTHX_ U32 flags)
1304 {
1305     dVAR;
1306     char *s, *bufend;
1307     if (flags & ~(LEX_KEEP_PREVIOUS))
1308         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1309     s = PL_parser->bufptr;
1310     bufend = PL_parser->bufend;
1311     if (UTF) {
1312         U8 head;
1313         I32 unichar;
1314         STRLEN len, retlen;
1315         if (s == bufend) {
1316             if (!lex_next_chunk(flags))
1317                 return -1;
1318             s = PL_parser->bufptr;
1319             bufend = PL_parser->bufend;
1320         }
1321         head = (U8)*s;
1322         if (!(head & 0x80))
1323             return head;
1324         if (head & 0x40) {
1325             len = PL_utf8skip[head];
1326             while ((STRLEN)(bufend-s) < len) {
1327                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1328                     break;
1329                 s = PL_parser->bufptr;
1330                 bufend = PL_parser->bufend;
1331             }
1332         }
1333         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1334         if (retlen == (STRLEN)-1) {
1335             /* malformed UTF-8 */
1336             ENTER;
1337             SAVESPTR(PL_warnhook);
1338             PL_warnhook = PERL_WARNHOOK_FATAL;
1339             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1340             LEAVE;
1341         }
1342         return unichar;
1343     } else {
1344         if (s == bufend) {
1345             if (!lex_next_chunk(flags))
1346                 return -1;
1347             s = PL_parser->bufptr;
1348         }
1349         return (U8)*s;
1350     }
1351 }
1352
1353 /*
1354 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1355
1356 Reads the next (Unicode) character in the text currently being lexed.
1357 Returns the codepoint (unsigned integer value) of the character read,
1358 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1359 if lexing has reached the end of the input text.  To non-destructively
1360 examine the next character, use L</lex_peek_unichar> instead.
1361
1362 If the next character is in (or extends into) the next chunk of input
1363 text, the next chunk will be read in.  Normally the current chunk will be
1364 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1365 then the current chunk will not be discarded.
1366
1367 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1368 is encountered, an exception is generated.
1369
1370 =cut
1371 */
1372
1373 I32
1374 Perl_lex_read_unichar(pTHX_ U32 flags)
1375 {
1376     I32 c;
1377     if (flags & ~(LEX_KEEP_PREVIOUS))
1378         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1379     c = lex_peek_unichar(flags);
1380     if (c != -1) {
1381         if (c == '\n')
1382             CopLINE_inc(PL_curcop);
1383         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1384     }
1385     return c;
1386 }
1387
1388 /*
1389 =for apidoc Amx|void|lex_read_space|U32 flags
1390
1391 Reads optional spaces, in Perl style, in the text currently being
1392 lexed.  The spaces may include ordinary whitespace characters and
1393 Perl-style comments.  C<#line> directives are processed if encountered.
1394 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1395 at a non-space character (or the end of the input text).
1396
1397 If spaces extend into the next chunk of input text, the next chunk will
1398 be read in.  Normally the current chunk will be discarded at the same
1399 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1400 chunk will not be discarded.
1401
1402 =cut
1403 */
1404
1405 #define LEX_NO_NEXT_CHUNK 0x80000000
1406
1407 void
1408 Perl_lex_read_space(pTHX_ U32 flags)
1409 {
1410     char *s, *bufend;
1411     bool need_incline = 0;
1412     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1413         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1414 #ifdef PERL_MAD
1415     if (PL_skipwhite) {
1416         sv_free(PL_skipwhite);
1417         PL_skipwhite = NULL;
1418     }
1419     if (PL_madskills)
1420         PL_skipwhite = newSVpvs("");
1421 #endif /* PERL_MAD */
1422     s = PL_parser->bufptr;
1423     bufend = PL_parser->bufend;
1424     while (1) {
1425         char c = *s;
1426         if (c == '#') {
1427             do {
1428                 c = *++s;
1429             } while (!(c == '\n' || (c == 0 && s == bufend)));
1430         } else if (c == '\n') {
1431             s++;
1432             PL_parser->linestart = s;
1433             if (s == bufend)
1434                 need_incline = 1;
1435             else
1436                 incline(s);
1437         } else if (isSPACE(c)) {
1438             s++;
1439         } else if (c == 0 && s == bufend) {
1440             bool got_more;
1441 #ifdef PERL_MAD
1442             if (PL_madskills)
1443                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1444 #endif /* PERL_MAD */
1445             if (flags & LEX_NO_NEXT_CHUNK)
1446                 break;
1447             PL_parser->bufptr = s;
1448             CopLINE_inc(PL_curcop);
1449             got_more = lex_next_chunk(flags);
1450             CopLINE_dec(PL_curcop);
1451             s = PL_parser->bufptr;
1452             bufend = PL_parser->bufend;
1453             if (!got_more)
1454                 break;
1455             if (need_incline && PL_parser->rsfp) {
1456                 incline(s);
1457                 need_incline = 0;
1458             }
1459         } else {
1460             break;
1461         }
1462     }
1463 #ifdef PERL_MAD
1464     if (PL_madskills)
1465         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1466 #endif /* PERL_MAD */
1467     PL_parser->bufptr = s;
1468 }
1469
1470 /*
1471  * S_incline
1472  * This subroutine has nothing to do with tilting, whether at windmills
1473  * or pinball tables.  Its name is short for "increment line".  It
1474  * increments the current line number in CopLINE(PL_curcop) and checks
1475  * to see whether the line starts with a comment of the form
1476  *    # line 500 "foo.pm"
1477  * If so, it sets the current line number and file to the values in the comment.
1478  */
1479
1480 STATIC void
1481 S_incline(pTHX_ const char *s)
1482 {
1483     dVAR;
1484     const char *t;
1485     const char *n;
1486     const char *e;
1487
1488     PERL_ARGS_ASSERT_INCLINE;
1489
1490     CopLINE_inc(PL_curcop);
1491     if (*s++ != '#')
1492         return;
1493     while (SPACE_OR_TAB(*s))
1494         s++;
1495     if (strnEQ(s, "line", 4))
1496         s += 4;
1497     else
1498         return;
1499     if (SPACE_OR_TAB(*s))
1500         s++;
1501     else
1502         return;
1503     while (SPACE_OR_TAB(*s))
1504         s++;
1505     if (!isDIGIT(*s))
1506         return;
1507
1508     n = s;
1509     while (isDIGIT(*s))
1510         s++;
1511     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1512         return;
1513     while (SPACE_OR_TAB(*s))
1514         s++;
1515     if (*s == '"' && (t = strchr(s+1, '"'))) {
1516         s++;
1517         e = t + 1;
1518     }
1519     else {
1520         t = s;
1521         while (!isSPACE(*t))
1522             t++;
1523         e = t;
1524     }
1525     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1526         e++;
1527     if (*e != '\n' && *e != '\0')
1528         return;         /* false alarm */
1529
1530     if (t - s > 0) {
1531         const STRLEN len = t - s;
1532 #ifndef USE_ITHREADS
1533         SV *const temp_sv = CopFILESV(PL_curcop);
1534         const char *cf;
1535         STRLEN tmplen;
1536
1537         if (temp_sv) {
1538             cf = SvPVX(temp_sv);
1539             tmplen = SvCUR(temp_sv);
1540         } else {
1541             cf = NULL;
1542             tmplen = 0;
1543         }
1544
1545         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1546             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1547              * to *{"::_<newfilename"} */
1548             /* However, the long form of evals is only turned on by the
1549                debugger - usually they're "(eval %lu)" */
1550             char smallbuf[128];
1551             char *tmpbuf;
1552             GV **gvp;
1553             STRLEN tmplen2 = len;
1554             if (tmplen + 2 <= sizeof smallbuf)
1555                 tmpbuf = smallbuf;
1556             else
1557                 Newx(tmpbuf, tmplen + 2, char);
1558             tmpbuf[0] = '_';
1559             tmpbuf[1] = '<';
1560             memcpy(tmpbuf + 2, cf, tmplen);
1561             tmplen += 2;
1562             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1563             if (gvp) {
1564                 char *tmpbuf2;
1565                 GV *gv2;
1566
1567                 if (tmplen2 + 2 <= sizeof smallbuf)
1568                     tmpbuf2 = smallbuf;
1569                 else
1570                     Newx(tmpbuf2, tmplen2 + 2, char);
1571
1572                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1573                     /* Either they malloc'd it, or we malloc'd it,
1574                        so no prefix is present in ours.  */
1575                     tmpbuf2[0] = '_';
1576                     tmpbuf2[1] = '<';
1577                 }
1578
1579                 memcpy(tmpbuf2 + 2, s, tmplen2);
1580                 tmplen2 += 2;
1581
1582                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1583                 if (!isGV(gv2)) {
1584                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1585                     /* adjust ${"::_<newfilename"} to store the new file name */
1586                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1587                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1588                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1589                 }
1590
1591                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1592             }
1593             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1594         }
1595 #endif
1596         CopFILE_free(PL_curcop);
1597         CopFILE_setn(PL_curcop, s, len);
1598     }
1599     CopLINE_set(PL_curcop, atoi(n)-1);
1600 }
1601
1602 #ifdef PERL_MAD
1603 /* skip space before PL_thistoken */
1604
1605 STATIC char *
1606 S_skipspace0(pTHX_ register char *s)
1607 {
1608     PERL_ARGS_ASSERT_SKIPSPACE0;
1609
1610     s = skipspace(s);
1611     if (!PL_madskills)
1612         return s;
1613     if (PL_skipwhite) {
1614         if (!PL_thiswhite)
1615             PL_thiswhite = newSVpvs("");
1616         sv_catsv(PL_thiswhite, PL_skipwhite);
1617         sv_free(PL_skipwhite);
1618         PL_skipwhite = 0;
1619     }
1620     PL_realtokenstart = s - SvPVX(PL_linestr);
1621     return s;
1622 }
1623
1624 /* skip space after PL_thistoken */
1625
1626 STATIC char *
1627 S_skipspace1(pTHX_ register char *s)
1628 {
1629     const char *start = s;
1630     I32 startoff = start - SvPVX(PL_linestr);
1631
1632     PERL_ARGS_ASSERT_SKIPSPACE1;
1633
1634     s = skipspace(s);
1635     if (!PL_madskills)
1636         return s;
1637     start = SvPVX(PL_linestr) + startoff;
1638     if (!PL_thistoken && PL_realtokenstart >= 0) {
1639         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1640         PL_thistoken = newSVpvn(tstart, start - tstart);
1641     }
1642     PL_realtokenstart = -1;
1643     if (PL_skipwhite) {
1644         if (!PL_nextwhite)
1645             PL_nextwhite = newSVpvs("");
1646         sv_catsv(PL_nextwhite, PL_skipwhite);
1647         sv_free(PL_skipwhite);
1648         PL_skipwhite = 0;
1649     }
1650     return s;
1651 }
1652
1653 STATIC char *
1654 S_skipspace2(pTHX_ register char *s, SV **svp)
1655 {
1656     char *start;
1657     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1658     const I32 startoff = s - SvPVX(PL_linestr);
1659
1660     PERL_ARGS_ASSERT_SKIPSPACE2;
1661
1662     s = skipspace(s);
1663     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1664     if (!PL_madskills || !svp)
1665         return s;
1666     start = SvPVX(PL_linestr) + startoff;
1667     if (!PL_thistoken && PL_realtokenstart >= 0) {
1668         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1669         PL_thistoken = newSVpvn(tstart, start - tstart);
1670         PL_realtokenstart = -1;
1671     }
1672     if (PL_skipwhite) {
1673         if (!*svp)
1674             *svp = newSVpvs("");
1675         sv_setsv(*svp, PL_skipwhite);
1676         sv_free(PL_skipwhite);
1677         PL_skipwhite = 0;
1678     }
1679
1680     return s;
1681 }
1682 #endif
1683
1684 STATIC void
1685 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1686 {
1687     AV *av = CopFILEAVx(PL_curcop);
1688     if (av) {
1689         SV * const sv = newSV_type(SVt_PVMG);
1690         if (orig_sv)
1691             sv_setsv(sv, orig_sv);
1692         else
1693             sv_setpvn(sv, buf, len);
1694         (void)SvIOK_on(sv);
1695         SvIV_set(sv, 0);
1696         av_store(av, (I32)CopLINE(PL_curcop), sv);
1697     }
1698 }
1699
1700 /*
1701  * S_skipspace
1702  * Called to gobble the appropriate amount and type of whitespace.
1703  * Skips comments as well.
1704  */
1705
1706 STATIC char *
1707 S_skipspace(pTHX_ register char *s)
1708 {
1709 #ifdef PERL_MAD
1710     char *start = s;
1711 #endif /* PERL_MAD */
1712     PERL_ARGS_ASSERT_SKIPSPACE;
1713 #ifdef PERL_MAD
1714     if (PL_skipwhite) {
1715         sv_free(PL_skipwhite);
1716         PL_skipwhite = NULL;
1717     }
1718 #endif /* PERL_MAD */
1719     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1720         while (s < PL_bufend && SPACE_OR_TAB(*s))
1721             s++;
1722     } else {
1723         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1724         PL_bufptr = s;
1725         lex_read_space(LEX_KEEP_PREVIOUS |
1726                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1727                     LEX_NO_NEXT_CHUNK : 0));
1728         s = PL_bufptr;
1729         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1730         if (PL_linestart > PL_bufptr)
1731             PL_bufptr = PL_linestart;
1732         return s;
1733     }
1734 #ifdef PERL_MAD
1735     if (PL_madskills)
1736         PL_skipwhite = newSVpvn(start, s-start);
1737 #endif /* PERL_MAD */
1738     return s;
1739 }
1740
1741 /*
1742  * S_check_uni
1743  * Check the unary operators to ensure there's no ambiguity in how they're
1744  * used.  An ambiguous piece of code would be:
1745  *     rand + 5
1746  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1747  * the +5 is its argument.
1748  */
1749
1750 STATIC void
1751 S_check_uni(pTHX)
1752 {
1753     dVAR;
1754     const char *s;
1755     const char *t;
1756
1757     if (PL_oldoldbufptr != PL_last_uni)
1758         return;
1759     while (isSPACE(*PL_last_uni))
1760         PL_last_uni++;
1761     s = PL_last_uni;
1762     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1763         s++;
1764     if ((t = strchr(s, '(')) && t < PL_bufptr)
1765         return;
1766
1767     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1768                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1769                      (int)(s - PL_last_uni), PL_last_uni);
1770 }
1771
1772 /*
1773  * LOP : macro to build a list operator.  Its behaviour has been replaced
1774  * with a subroutine, S_lop() for which LOP is just another name.
1775  */
1776
1777 #define LOP(f,x) return lop(f,x,s)
1778
1779 /*
1780  * S_lop
1781  * Build a list operator (or something that might be one).  The rules:
1782  *  - if we have a next token, then it's a list operator [why?]
1783  *  - if the next thing is an opening paren, then it's a function
1784  *  - else it's a list operator
1785  */
1786
1787 STATIC I32
1788 S_lop(pTHX_ I32 f, int x, char *s)
1789 {
1790     dVAR;
1791
1792     PERL_ARGS_ASSERT_LOP;
1793
1794     pl_yylval.ival = f;
1795     CLINE;
1796     PL_expect = x;
1797     PL_bufptr = s;
1798     PL_last_lop = PL_oldbufptr;
1799     PL_last_lop_op = (OPCODE)f;
1800 #ifdef PERL_MAD
1801     if (PL_lasttoke)
1802         return REPORT(LSTOP);
1803 #else
1804     if (PL_nexttoke)
1805         return REPORT(LSTOP);
1806 #endif
1807     if (*s == '(')
1808         return REPORT(FUNC);
1809     s = PEEKSPACE(s);
1810     if (*s == '(')
1811         return REPORT(FUNC);
1812     else
1813         return REPORT(LSTOP);
1814 }
1815
1816 #ifdef PERL_MAD
1817  /*
1818  * S_start_force
1819  * Sets up for an eventual force_next().  start_force(0) basically does
1820  * an unshift, while start_force(-1) does a push.  yylex removes items
1821  * on the "pop" end.
1822  */
1823
1824 STATIC void
1825 S_start_force(pTHX_ int where)
1826 {
1827     int i;
1828
1829     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1830         where = PL_lasttoke;
1831     assert(PL_curforce < 0 || PL_curforce == where);
1832     if (PL_curforce != where) {
1833         for (i = PL_lasttoke; i > where; --i) {
1834             PL_nexttoke[i] = PL_nexttoke[i-1];
1835         }
1836         PL_lasttoke++;
1837     }
1838     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1839         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1840     PL_curforce = where;
1841     if (PL_nextwhite) {
1842         if (PL_madskills)
1843             curmad('^', newSVpvs(""));
1844         CURMAD('_', PL_nextwhite);
1845     }
1846 }
1847
1848 STATIC void
1849 S_curmad(pTHX_ char slot, SV *sv)
1850 {
1851     MADPROP **where;
1852
1853     if (!sv)
1854         return;
1855     if (PL_curforce < 0)
1856         where = &PL_thismad;
1857     else
1858         where = &PL_nexttoke[PL_curforce].next_mad;
1859
1860     if (PL_faketokens)
1861         sv_setpvs(sv, "");
1862     else {
1863         if (!IN_BYTES) {
1864             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1865                 SvUTF8_on(sv);
1866             else if (PL_encoding) {
1867                 sv_recode_to_utf8(sv, PL_encoding);
1868             }
1869         }
1870     }
1871
1872     /* keep a slot open for the head of the list? */
1873     if (slot != '_' && *where && (*where)->mad_key == '^') {
1874         (*where)->mad_key = slot;
1875         sv_free(MUTABLE_SV(((*where)->mad_val)));
1876         (*where)->mad_val = (void*)sv;
1877     }
1878     else
1879         addmad(newMADsv(slot, sv), where, 0);
1880 }
1881 #else
1882 #  define start_force(where)    NOOP
1883 #  define curmad(slot, sv)      NOOP
1884 #endif
1885
1886 /*
1887  * S_force_next
1888  * When the lexer realizes it knows the next token (for instance,
1889  * it is reordering tokens for the parser) then it can call S_force_next
1890  * to know what token to return the next time the lexer is called.  Caller
1891  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1892  * and possibly PL_expect to ensure the lexer handles the token correctly.
1893  */
1894
1895 STATIC void
1896 S_force_next(pTHX_ I32 type)
1897 {
1898     dVAR;
1899 #ifdef DEBUGGING
1900     if (DEBUG_T_TEST) {
1901         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1902         tokereport(type, &NEXTVAL_NEXTTOKE);
1903     }
1904 #endif
1905 #ifdef PERL_MAD
1906     if (PL_curforce < 0)
1907         start_force(PL_lasttoke);
1908     PL_nexttoke[PL_curforce].next_type = type;
1909     if (PL_lex_state != LEX_KNOWNEXT)
1910         PL_lex_defer = PL_lex_state;
1911     PL_lex_state = LEX_KNOWNEXT;
1912     PL_lex_expect = PL_expect;
1913     PL_curforce = -1;
1914 #else
1915     PL_nexttype[PL_nexttoke] = type;
1916     PL_nexttoke++;
1917     if (PL_lex_state != LEX_KNOWNEXT) {
1918         PL_lex_defer = PL_lex_state;
1919         PL_lex_expect = PL_expect;
1920         PL_lex_state = LEX_KNOWNEXT;
1921     }
1922 #endif
1923 }
1924
1925 void
1926 Perl_yyunlex(pTHX)
1927 {
1928     if (PL_parser->yychar != YYEMPTY) {
1929         start_force(-1);
1930         NEXTVAL_NEXTTOKE = PL_parser->yylval;
1931         force_next(PL_parser->yychar);
1932         PL_parser->yychar = YYEMPTY;
1933     }
1934 }
1935
1936 STATIC SV *
1937 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1938 {
1939     dVAR;
1940     SV * const sv = newSVpvn_utf8(start, len,
1941                                   !IN_BYTES
1942                                   && UTF
1943                                   && !is_ascii_string((const U8*)start, len)
1944                                   && is_utf8_string((const U8*)start, len));
1945     return sv;
1946 }
1947
1948 /*
1949  * S_force_word
1950  * When the lexer knows the next thing is a word (for instance, it has
1951  * just seen -> and it knows that the next char is a word char, then
1952  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1953  * lookahead.
1954  *
1955  * Arguments:
1956  *   char *start : buffer position (must be within PL_linestr)
1957  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1958  *   int check_keyword : if true, Perl checks to make sure the word isn't
1959  *       a keyword (do this if the word is a label, e.g. goto FOO)
1960  *   int allow_pack : if true, : characters will also be allowed (require,
1961  *       use, etc. do this)
1962  *   int allow_initial_tick : used by the "sub" lexer only.
1963  */
1964
1965 STATIC char *
1966 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1967 {
1968     dVAR;
1969     register char *s;
1970     STRLEN len;
1971
1972     PERL_ARGS_ASSERT_FORCE_WORD;
1973
1974     start = SKIPSPACE1(start);
1975     s = start;
1976     if (isIDFIRST_lazy_if(s,UTF) ||
1977         (allow_pack && *s == ':') ||
1978         (allow_initial_tick && *s == '\'') )
1979     {
1980         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1981         if (check_keyword && keyword(PL_tokenbuf, len, 0))
1982             return start;
1983         start_force(PL_curforce);
1984         if (PL_madskills)
1985             curmad('X', newSVpvn(start,s-start));
1986         if (token == METHOD) {
1987             s = SKIPSPACE1(s);
1988             if (*s == '(')
1989                 PL_expect = XTERM;
1990             else {
1991                 PL_expect = XOPERATOR;
1992             }
1993         }
1994         if (PL_madskills)
1995             curmad('g', newSVpvs( "forced" ));
1996         NEXTVAL_NEXTTOKE.opval
1997             = (OP*)newSVOP(OP_CONST,0,
1998                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1999         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2000         force_next(token);
2001     }
2002     return s;
2003 }
2004
2005 /*
2006  * S_force_ident
2007  * Called when the lexer wants $foo *foo &foo etc, but the program
2008  * text only contains the "foo" portion.  The first argument is a pointer
2009  * to the "foo", and the second argument is the type symbol to prefix.
2010  * Forces the next token to be a "WORD".
2011  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2012  */
2013
2014 STATIC void
2015 S_force_ident(pTHX_ register const char *s, int kind)
2016 {
2017     dVAR;
2018
2019     PERL_ARGS_ASSERT_FORCE_IDENT;
2020
2021     if (*s) {
2022         const STRLEN len = strlen(s);
2023         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2024         start_force(PL_curforce);
2025         NEXTVAL_NEXTTOKE.opval = o;
2026         force_next(WORD);
2027         if (kind) {
2028             o->op_private = OPpCONST_ENTERED;
2029             /* XXX see note in pp_entereval() for why we forgo typo
2030                warnings if the symbol must be introduced in an eval.
2031                GSAR 96-10-12 */
2032             gv_fetchpvn_flags(s, len,
2033                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2034                               : GV_ADD,
2035                               kind == '$' ? SVt_PV :
2036                               kind == '@' ? SVt_PVAV :
2037                               kind == '%' ? SVt_PVHV :
2038                               SVt_PVGV
2039                               );
2040         }
2041     }
2042 }
2043
2044 NV
2045 Perl_str_to_version(pTHX_ SV *sv)
2046 {
2047     NV retval = 0.0;
2048     NV nshift = 1.0;
2049     STRLEN len;
2050     const char *start = SvPV_const(sv,len);
2051     const char * const end = start + len;
2052     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2053
2054     PERL_ARGS_ASSERT_STR_TO_VERSION;
2055
2056     while (start < end) {
2057         STRLEN skip;
2058         UV n;
2059         if (utf)
2060             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2061         else {
2062             n = *(U8*)start;
2063             skip = 1;
2064         }
2065         retval += ((NV)n)/nshift;
2066         start += skip;
2067         nshift *= 1000;
2068     }
2069     return retval;
2070 }
2071
2072 /*
2073  * S_force_version
2074  * Forces the next token to be a version number.
2075  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2076  * and if "guessing" is TRUE, then no new token is created (and the caller
2077  * must use an alternative parsing method).
2078  */
2079
2080 STATIC char *
2081 S_force_version(pTHX_ char *s, int guessing)
2082 {
2083     dVAR;
2084     OP *version = NULL;
2085     char *d;
2086 #ifdef PERL_MAD
2087     I32 startoff = s - SvPVX(PL_linestr);
2088 #endif
2089
2090     PERL_ARGS_ASSERT_FORCE_VERSION;
2091
2092     s = SKIPSPACE1(s);
2093
2094     d = s;
2095     if (*d == 'v')
2096         d++;
2097     if (isDIGIT(*d)) {
2098         while (isDIGIT(*d) || *d == '_' || *d == '.')
2099             d++;
2100 #ifdef PERL_MAD
2101         if (PL_madskills) {
2102             start_force(PL_curforce);
2103             curmad('X', newSVpvn(s,d-s));
2104         }
2105 #endif
2106         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2107             SV *ver;
2108 #ifdef USE_LOCALE_NUMERIC
2109             char *loc = setlocale(LC_NUMERIC, "C");
2110 #endif
2111             s = scan_num(s, &pl_yylval);
2112 #ifdef USE_LOCALE_NUMERIC
2113             setlocale(LC_NUMERIC, loc);
2114 #endif
2115             version = pl_yylval.opval;
2116             ver = cSVOPx(version)->op_sv;
2117             if (SvPOK(ver) && !SvNIOK(ver)) {
2118                 SvUPGRADE(ver, SVt_PVNV);
2119                 SvNV_set(ver, str_to_version(ver));
2120                 SvNOK_on(ver);          /* hint that it is a version */
2121             }
2122         }
2123         else if (guessing) {
2124 #ifdef PERL_MAD
2125             if (PL_madskills) {
2126                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2127                 PL_nextwhite = 0;
2128                 s = SvPVX(PL_linestr) + startoff;
2129             }
2130 #endif
2131             return s;
2132         }
2133     }
2134
2135 #ifdef PERL_MAD
2136     if (PL_madskills && !version) {
2137         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2138         PL_nextwhite = 0;
2139         s = SvPVX(PL_linestr) + startoff;
2140     }
2141 #endif
2142     /* NOTE: The parser sees the package name and the VERSION swapped */
2143     start_force(PL_curforce);
2144     NEXTVAL_NEXTTOKE.opval = version;
2145     force_next(WORD);
2146
2147     return s;
2148 }
2149
2150 /*
2151  * S_force_strict_version
2152  * Forces the next token to be a version number using strict syntax rules.
2153  */
2154
2155 STATIC char *
2156 S_force_strict_version(pTHX_ char *s)
2157 {
2158     dVAR;
2159     OP *version = NULL;
2160 #ifdef PERL_MAD
2161     I32 startoff = s - SvPVX(PL_linestr);
2162 #endif
2163     const char *errstr = NULL;
2164
2165     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2166
2167     while (isSPACE(*s)) /* leading whitespace */
2168         s++;
2169
2170     if (is_STRICT_VERSION(s,&errstr)) {
2171         SV *ver = newSV(0);
2172         s = (char *)scan_version(s, ver, 0);
2173         version = newSVOP(OP_CONST, 0, ver);
2174     }
2175     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2176             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2177     {
2178         PL_bufptr = s;
2179         if (errstr)
2180             yyerror(errstr); /* version required */
2181         return s;
2182     }
2183
2184 #ifdef PERL_MAD
2185     if (PL_madskills && !version) {
2186         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2187         PL_nextwhite = 0;
2188         s = SvPVX(PL_linestr) + startoff;
2189     }
2190 #endif
2191     /* NOTE: The parser sees the package name and the VERSION swapped */
2192     start_force(PL_curforce);
2193     NEXTVAL_NEXTTOKE.opval = version;
2194     force_next(WORD);
2195
2196     return s;
2197 }
2198
2199 /*
2200  * S_tokeq
2201  * Tokenize a quoted string passed in as an SV.  It finds the next
2202  * chunk, up to end of string or a backslash.  It may make a new
2203  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2204  * turns \\ into \.
2205  */
2206
2207 STATIC SV *
2208 S_tokeq(pTHX_ SV *sv)
2209 {
2210     dVAR;
2211     register char *s;
2212     register char *send;
2213     register char *d;
2214     STRLEN len = 0;
2215     SV *pv = sv;
2216
2217     PERL_ARGS_ASSERT_TOKEQ;
2218
2219     if (!SvLEN(sv))
2220         goto finish;
2221
2222     s = SvPV_force(sv, len);
2223     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2224         goto finish;
2225     send = s + len;
2226     while (s < send && *s != '\\')
2227         s++;
2228     if (s == send)
2229         goto finish;
2230     d = s;
2231     if ( PL_hints & HINT_NEW_STRING ) {
2232         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2233     }
2234     while (s < send) {
2235         if (*s == '\\') {
2236             if (s + 1 < send && (s[1] == '\\'))
2237                 s++;            /* all that, just for this */
2238         }
2239         *d++ = *s++;
2240     }
2241     *d = '\0';
2242     SvCUR_set(sv, d - SvPVX_const(sv));
2243   finish:
2244     if ( PL_hints & HINT_NEW_STRING )
2245        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2246     return sv;
2247 }
2248
2249 /*
2250  * Now come three functions related to double-quote context,
2251  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2252  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2253  * interact with PL_lex_state, and create fake ( ... ) argument lists
2254  * to handle functions and concatenation.
2255  * They assume that whoever calls them will be setting up a fake
2256  * join call, because each subthing puts a ',' after it.  This lets
2257  *   "lower \luPpEr"
2258  * become
2259  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2260  *
2261  * (I'm not sure whether the spurious commas at the end of lcfirst's
2262  * arguments and join's arguments are created or not).
2263  */
2264
2265 /*
2266  * S_sublex_start
2267  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2268  *
2269  * Pattern matching will set PL_lex_op to the pattern-matching op to
2270  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2271  *
2272  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2273  *
2274  * Everything else becomes a FUNC.
2275  *
2276  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2277  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2278  * call to S_sublex_push().
2279  */
2280
2281 STATIC I32
2282 S_sublex_start(pTHX)
2283 {
2284     dVAR;
2285     register const I32 op_type = pl_yylval.ival;
2286
2287     if (op_type == OP_NULL) {
2288         pl_yylval.opval = PL_lex_op;
2289         PL_lex_op = NULL;
2290         return THING;
2291     }
2292     if (op_type == OP_CONST || op_type == OP_READLINE) {
2293         SV *sv = tokeq(PL_lex_stuff);
2294
2295         if (SvTYPE(sv) == SVt_PVIV) {
2296             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2297             STRLEN len;
2298             const char * const p = SvPV_const(sv, len);
2299             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2300             SvREFCNT_dec(sv);
2301             sv = nsv;
2302         }
2303         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2304         PL_lex_stuff = NULL;
2305         /* Allow <FH> // "foo" */
2306         if (op_type == OP_READLINE)
2307             PL_expect = XTERMORDORDOR;
2308         return THING;
2309     }
2310     else if (op_type == OP_BACKTICK && PL_lex_op) {
2311         /* readpipe() vas overriden */
2312         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2313         pl_yylval.opval = PL_lex_op;
2314         PL_lex_op = NULL;
2315         PL_lex_stuff = NULL;
2316         return THING;
2317     }
2318
2319     PL_sublex_info.super_state = PL_lex_state;
2320     PL_sublex_info.sub_inwhat = (U16)op_type;
2321     PL_sublex_info.sub_op = PL_lex_op;
2322     PL_lex_state = LEX_INTERPPUSH;
2323
2324     PL_expect = XTERM;
2325     if (PL_lex_op) {
2326         pl_yylval.opval = PL_lex_op;
2327         PL_lex_op = NULL;
2328         return PMFUNC;
2329     }
2330     else
2331         return FUNC;
2332 }
2333
2334 /*
2335  * S_sublex_push
2336  * Create a new scope to save the lexing state.  The scope will be
2337  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2338  * to the uc, lc, etc. found before.
2339  * Sets PL_lex_state to LEX_INTERPCONCAT.
2340  */
2341
2342 STATIC I32
2343 S_sublex_push(pTHX)
2344 {
2345     dVAR;
2346     ENTER;
2347
2348     PL_lex_state = PL_sublex_info.super_state;
2349     SAVEBOOL(PL_lex_dojoin);
2350     SAVEI32(PL_lex_brackets);
2351     SAVEI32(PL_lex_casemods);
2352     SAVEI32(PL_lex_starts);
2353     SAVEI8(PL_lex_state);
2354     SAVEVPTR(PL_lex_inpat);
2355     SAVEI16(PL_lex_inwhat);
2356     SAVECOPLINE(PL_curcop);
2357     SAVEPPTR(PL_bufptr);
2358     SAVEPPTR(PL_bufend);
2359     SAVEPPTR(PL_oldbufptr);
2360     SAVEPPTR(PL_oldoldbufptr);
2361     SAVEPPTR(PL_last_lop);
2362     SAVEPPTR(PL_last_uni);
2363     SAVEPPTR(PL_linestart);
2364     SAVESPTR(PL_linestr);
2365     SAVEGENERICPV(PL_lex_brackstack);
2366     SAVEGENERICPV(PL_lex_casestack);
2367
2368     PL_linestr = PL_lex_stuff;
2369     PL_lex_stuff = NULL;
2370
2371     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2372         = SvPVX(PL_linestr);
2373     PL_bufend += SvCUR(PL_linestr);
2374     PL_last_lop = PL_last_uni = NULL;
2375     SAVEFREESV(PL_linestr);
2376
2377     PL_lex_dojoin = FALSE;
2378     PL_lex_brackets = 0;
2379     Newx(PL_lex_brackstack, 120, char);
2380     Newx(PL_lex_casestack, 12, char);
2381     PL_lex_casemods = 0;
2382     *PL_lex_casestack = '\0';
2383     PL_lex_starts = 0;
2384     PL_lex_state = LEX_INTERPCONCAT;
2385     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2386
2387     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2388     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2389         PL_lex_inpat = PL_sublex_info.sub_op;
2390     else
2391         PL_lex_inpat = NULL;
2392
2393     return '(';
2394 }
2395
2396 /*
2397  * S_sublex_done
2398  * Restores lexer state after a S_sublex_push.
2399  */
2400
2401 STATIC I32
2402 S_sublex_done(pTHX)
2403 {
2404     dVAR;
2405     if (!PL_lex_starts++) {
2406         SV * const sv = newSVpvs("");
2407         if (SvUTF8(PL_linestr))
2408             SvUTF8_on(sv);
2409         PL_expect = XOPERATOR;
2410         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2411         return THING;
2412     }
2413
2414     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2415         PL_lex_state = LEX_INTERPCASEMOD;
2416         return yylex();
2417     }
2418
2419     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2420     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2421         PL_linestr = PL_lex_repl;
2422         PL_lex_inpat = 0;
2423         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2424         PL_bufend += SvCUR(PL_linestr);
2425         PL_last_lop = PL_last_uni = NULL;
2426         SAVEFREESV(PL_linestr);
2427         PL_lex_dojoin = FALSE;
2428         PL_lex_brackets = 0;
2429         PL_lex_casemods = 0;
2430         *PL_lex_casestack = '\0';
2431         PL_lex_starts = 0;
2432         if (SvEVALED(PL_lex_repl)) {
2433             PL_lex_state = LEX_INTERPNORMAL;
2434             PL_lex_starts++;
2435             /*  we don't clear PL_lex_repl here, so that we can check later
2436                 whether this is an evalled subst; that means we rely on the
2437                 logic to ensure sublex_done() is called again only via the
2438                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2439         }
2440         else {
2441             PL_lex_state = LEX_INTERPCONCAT;
2442             PL_lex_repl = NULL;
2443         }
2444         return ',';
2445     }
2446     else {
2447 #ifdef PERL_MAD
2448         if (PL_madskills) {
2449             if (PL_thiswhite) {
2450                 if (!PL_endwhite)
2451                     PL_endwhite = newSVpvs("");
2452                 sv_catsv(PL_endwhite, PL_thiswhite);
2453                 PL_thiswhite = 0;
2454             }
2455             if (PL_thistoken)
2456                 sv_setpvs(PL_thistoken,"");
2457             else
2458                 PL_realtokenstart = -1;
2459         }
2460 #endif
2461         LEAVE;
2462         PL_bufend = SvPVX(PL_linestr);
2463         PL_bufend += SvCUR(PL_linestr);
2464         PL_expect = XOPERATOR;
2465         PL_sublex_info.sub_inwhat = 0;
2466         return ')';
2467     }
2468 }
2469
2470 /*
2471   scan_const
2472
2473   Extracts a pattern, double-quoted string, or transliteration.  This
2474   is terrifying code.
2475
2476   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2477   processing a pattern (PL_lex_inpat is true), a transliteration
2478   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2479
2480   Returns a pointer to the character scanned up to. If this is
2481   advanced from the start pointer supplied (i.e. if anything was
2482   successfully parsed), will leave an OP for the substring scanned
2483   in pl_yylval. Caller must intuit reason for not parsing further
2484   by looking at the next characters herself.
2485
2486   In patterns:
2487     backslashes:
2488       constants: \N{NAME} only
2489       case and quoting: \U \Q \E
2490     stops on @ and $, but not for $ as tail anchor
2491
2492   In transliterations:
2493     characters are VERY literal, except for - not at the start or end
2494     of the string, which indicates a range. If the range is in bytes,
2495     scan_const expands the range to the full set of intermediate
2496     characters. If the range is in utf8, the hyphen is replaced with
2497     a certain range mark which will be handled by pmtrans() in op.c.
2498
2499   In double-quoted strings:
2500     backslashes:
2501       double-quoted style: \r and \n
2502       constants: \x31, etc.
2503       deprecated backrefs: \1 (in substitution replacements)
2504       case and quoting: \U \Q \E
2505     stops on @ and $
2506
2507   scan_const does *not* construct ops to handle interpolated strings.
2508   It stops processing as soon as it finds an embedded $ or @ variable
2509   and leaves it to the caller to work out what's going on.
2510
2511   embedded arrays (whether in pattern or not) could be:
2512       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2513
2514   $ in double-quoted strings must be the symbol of an embedded scalar.
2515
2516   $ in pattern could be $foo or could be tail anchor.  Assumption:
2517   it's a tail anchor if $ is the last thing in the string, or if it's
2518   followed by one of "()| \r\n\t"
2519
2520   \1 (backreferences) are turned into $1
2521
2522   The structure of the code is
2523       while (there's a character to process) {
2524           handle transliteration ranges
2525           skip regexp comments /(?#comment)/ and codes /(?{code})/
2526           skip #-initiated comments in //x patterns
2527           check for embedded arrays
2528           check for embedded scalars
2529           if (backslash) {
2530               deprecate \1 in substitution replacements
2531               handle string-changing backslashes \l \U \Q \E, etc.
2532               switch (what was escaped) {
2533                   handle \- in a transliteration (becomes a literal -)
2534                   if a pattern and not \N{, go treat as regular character
2535                   handle \132 (octal characters)
2536                   handle \x15 and \x{1234} (hex characters)
2537                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2538                   handle \cV (control characters)
2539                   handle printf-style backslashes (\f, \r, \n, etc)
2540               } (end switch)
2541               continue
2542           } (end if backslash)
2543           handle regular character
2544     } (end while character to read)
2545
2546 */
2547
2548 STATIC char *
2549 S_scan_const(pTHX_ char *start)
2550 {
2551     dVAR;
2552     register char *send = PL_bufend;            /* end of the constant */
2553     SV *sv = newSV(send - start);               /* sv for the constant.  See
2554                                                    note below on sizing. */
2555     register char *s = start;                   /* start of the constant */
2556     register char *d = SvPVX(sv);               /* destination for copies */
2557     bool dorange = FALSE;                       /* are we in a translit range? */
2558     bool didrange = FALSE;                      /* did we just finish a range? */
2559     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2560     I32  this_utf8 = UTF;                       /* Is the source string assumed
2561                                                    to be UTF8?  But, this can
2562                                                    show as true when the source
2563                                                    isn't utf8, as for example
2564                                                    when it is entirely composed
2565                                                    of hex constants */
2566
2567     /* Note on sizing:  The scanned constant is placed into sv, which is
2568      * initialized by newSV() assuming one byte of output for every byte of
2569      * input.  This routine expects newSV() to allocate an extra byte for a
2570      * trailing NUL, which this routine will append if it gets to the end of
2571      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2572      * CAPITAL LETTER A}), or more output than input if the constant ends up
2573      * recoded to utf8, but each time a construct is found that might increase
2574      * the needed size, SvGROW() is called.  Its size parameter each time is
2575      * based on the best guess estimate at the time, namely the length used so
2576      * far, plus the length the current construct will occupy, plus room for
2577      * the trailing NUL, plus one byte for every input byte still unscanned */
2578
2579     UV uv;
2580 #ifdef EBCDIC
2581     UV literal_endpoint = 0;
2582     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2583 #endif
2584
2585     PERL_ARGS_ASSERT_SCAN_CONST;
2586
2587     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2588         /* If we are doing a trans and we know we want UTF8 set expectation */
2589         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2590         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2591     }
2592
2593
2594     while (s < send || dorange) {
2595
2596         /* get transliterations out of the way (they're most literal) */
2597         if (PL_lex_inwhat == OP_TRANS) {
2598             /* expand a range A-Z to the full set of characters.  AIE! */
2599             if (dorange) {
2600                 I32 i;                          /* current expanded character */
2601                 I32 min;                        /* first character in range */
2602                 I32 max;                        /* last character in range */
2603
2604 #ifdef EBCDIC
2605                 UV uvmax = 0;
2606 #endif
2607
2608                 if (has_utf8
2609 #ifdef EBCDIC
2610                     && !native_range
2611 #endif
2612                     ) {
2613                     char * const c = (char*)utf8_hop((U8*)d, -1);
2614                     char *e = d++;
2615                     while (e-- > c)
2616                         *(e + 1) = *e;
2617                     *c = (char)UTF_TO_NATIVE(0xff);
2618                     /* mark the range as done, and continue */
2619                     dorange = FALSE;
2620                     didrange = TRUE;
2621                     continue;
2622                 }
2623
2624                 i = d - SvPVX_const(sv);                /* remember current offset */
2625 #ifdef EBCDIC
2626                 SvGROW(sv,
2627                        SvLEN(sv) + (has_utf8 ?
2628                                     (512 - UTF_CONTINUATION_MARK +
2629                                      UNISKIP(0x100))
2630                                     : 256));
2631                 /* How many two-byte within 0..255: 128 in UTF-8,
2632                  * 96 in UTF-8-mod. */
2633 #else
2634                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2635 #endif
2636                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2637 #ifdef EBCDIC
2638                 if (has_utf8) {
2639                     int j;
2640                     for (j = 0; j <= 1; j++) {
2641                         char * const c = (char*)utf8_hop((U8*)d, -1);
2642                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2643                         if (j)
2644                             min = (U8)uv;
2645                         else if (uv < 256)
2646                             max = (U8)uv;
2647                         else {
2648                             max = (U8)0xff; /* only to \xff */
2649                             uvmax = uv; /* \x{100} to uvmax */
2650                         }
2651                         d = c; /* eat endpoint chars */
2652                      }
2653                 }
2654                else {
2655 #endif
2656                    d -= 2;              /* eat the first char and the - */
2657                    min = (U8)*d;        /* first char in range */
2658                    max = (U8)d[1];      /* last char in range  */
2659 #ifdef EBCDIC
2660                }
2661 #endif
2662
2663                 if (min > max) {
2664                     Perl_croak(aTHX_
2665                                "Invalid range \"%c-%c\" in transliteration operator",
2666                                (char)min, (char)max);
2667                 }
2668
2669 #ifdef EBCDIC
2670                 if (literal_endpoint == 2 &&
2671                     ((isLOWER(min) && isLOWER(max)) ||
2672                      (isUPPER(min) && isUPPER(max)))) {
2673                     if (isLOWER(min)) {
2674                         for (i = min; i <= max; i++)
2675                             if (isLOWER(i))
2676                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2677                     } else {
2678                         for (i = min; i <= max; i++)
2679                             if (isUPPER(i))
2680                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2681                     }
2682                 }
2683                 else
2684 #endif
2685                     for (i = min; i <= max; i++)
2686 #ifdef EBCDIC
2687                         if (has_utf8) {
2688                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2689                             if (UNI_IS_INVARIANT(ch))
2690                                 *d++ = (U8)i;
2691                             else {
2692                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2693                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2694                             }
2695                         }
2696                         else
2697 #endif
2698                             *d++ = (char)i;
2699
2700 #ifdef EBCDIC
2701                 if (uvmax) {
2702                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2703                     if (uvmax > 0x101)
2704                         *d++ = (char)UTF_TO_NATIVE(0xff);
2705                     if (uvmax > 0x100)
2706                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2707                 }
2708 #endif
2709
2710                 /* mark the range as done, and continue */
2711                 dorange = FALSE;
2712                 didrange = TRUE;
2713 #ifdef EBCDIC
2714                 literal_endpoint = 0;
2715 #endif
2716                 continue;
2717             }
2718
2719             /* range begins (ignore - as first or last char) */
2720             else if (*s == '-' && s+1 < send  && s != start) {
2721                 if (didrange) {
2722                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2723                 }
2724                 if (has_utf8
2725 #ifdef EBCDIC
2726                     && !native_range
2727 #endif
2728                     ) {
2729                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2730                     s++;
2731                     continue;
2732                 }
2733                 dorange = TRUE;
2734                 s++;
2735             }
2736             else {
2737                 didrange = FALSE;
2738 #ifdef EBCDIC
2739                 literal_endpoint = 0;
2740                 native_range = TRUE;
2741 #endif
2742             }
2743         }
2744
2745         /* if we get here, we're not doing a transliteration */
2746
2747         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2748            except for the last char, which will be done separately. */
2749         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2750             if (s[2] == '#') {
2751                 while (s+1 < send && *s != ')')
2752                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2753             }
2754             else if (s[2] == '{' /* This should match regcomp.c */
2755                     || (s[2] == '?' && s[3] == '{'))
2756             {
2757                 I32 count = 1;
2758                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2759                 char c;
2760
2761                 while (count && (c = *regparse)) {
2762                     if (c == '\\' && regparse[1])
2763                         regparse++;
2764                     else if (c == '{')
2765                         count++;
2766                     else if (c == '}')
2767                         count--;
2768                     regparse++;
2769                 }
2770                 if (*regparse != ')')
2771                     regparse--;         /* Leave one char for continuation. */
2772                 while (s < regparse)
2773                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2774             }
2775         }
2776
2777         /* likewise skip #-initiated comments in //x patterns */
2778         else if (*s == '#' && PL_lex_inpat &&
2779           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2780             while (s+1 < send && *s != '\n')
2781                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2782         }
2783
2784         /* check for embedded arrays
2785            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2786            */
2787         else if (*s == '@' && s[1]) {
2788             if (isALNUM_lazy_if(s+1,UTF))
2789                 break;
2790             if (strchr(":'{$", s[1]))
2791                 break;
2792             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2793                 break; /* in regexp, neither @+ nor @- are interpolated */
2794         }
2795
2796         /* check for embedded scalars.  only stop if we're sure it's a
2797            variable.
2798         */
2799         else if (*s == '$') {
2800             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2801                 break;
2802             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2803                 if (s[1] == '\\') {
2804                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2805                                    "Possible unintended interpolation of $\\ in regex");
2806                 }
2807                 break;          /* in regexp, $ might be tail anchor */
2808             }
2809         }
2810
2811         /* End of else if chain - OP_TRANS rejoin rest */
2812
2813         /* backslashes */
2814         if (*s == '\\' && s+1 < send) {
2815             char* e;    /* Can be used for ending '}', etc. */
2816
2817             s++;
2818
2819             /* warn on \1 - \9 in substitution replacements, but note that \11
2820              * is an octal; and \19 is \1 followed by '9' */
2821             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2822                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2823             {
2824                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2825                 *--s = '$';
2826                 break;
2827             }
2828
2829             /* string-change backslash escapes */
2830             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2831                 --s;
2832                 break;
2833             }
2834             /* In a pattern, process \N, but skip any other backslash escapes.
2835              * This is because we don't want to translate an escape sequence
2836              * into a meta symbol and have the regex compiler use the meta
2837              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2838              * in spite of this, we do have to process \N here while the proper
2839              * charnames handler is in scope.  See bugs #56444 and #62056.
2840              * There is a complication because \N in a pattern may also stand
2841              * for 'match a non-nl', and not mean a charname, in which case its
2842              * processing should be deferred to the regex compiler.  To be a
2843              * charname it must be followed immediately by a '{', and not look
2844              * like \N followed by a curly quantifier, i.e., not something like
2845              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2846              * quantifier */
2847             else if (PL_lex_inpat
2848                     && (*s != 'N'
2849                         || s[1] != '{'
2850                         || regcurly(s + 1)))
2851             {
2852                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2853                 goto default_action;
2854             }
2855
2856             switch (*s) {
2857
2858             /* quoted - in transliterations */
2859             case '-':
2860                 if (PL_lex_inwhat == OP_TRANS) {
2861                     *d++ = *s++;
2862                     continue;
2863                 }
2864                 /* FALL THROUGH */
2865             default:
2866                 {
2867                     if ((isALPHA(*s) || isDIGIT(*s)))
2868                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2869                                        "Unrecognized escape \\%c passed through",
2870                                        *s);
2871                     /* default action is to copy the quoted character */
2872                     goto default_action;
2873                 }
2874
2875             /* eg. \132 indicates the octal constant 0132 */
2876             case '0': case '1': case '2': case '3':
2877             case '4': case '5': case '6': case '7':
2878                 {
2879                     I32 flags = 0;
2880                     STRLEN len = 3;
2881                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2882                     s += len;
2883                 }
2884                 goto NUM_ESCAPE_INSERT;
2885
2886             /* eg. \o{24} indicates the octal constant \024 */
2887             case 'o':
2888                 {
2889                     STRLEN len;
2890                     const char* error;
2891
2892                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2893                     s += len;
2894                     if (! valid) {
2895                         yyerror(error);
2896                         continue;
2897                     }
2898                     goto NUM_ESCAPE_INSERT;
2899                 }
2900
2901             /* eg. \x24 indicates the hex constant 0x24 */
2902             case 'x':
2903                 ++s;
2904                 if (*s == '{') {
2905                     char* const e = strchr(s, '}');
2906                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2907                       PERL_SCAN_DISALLOW_PREFIX;
2908                     STRLEN len;
2909
2910                     ++s;
2911                     if (!e) {
2912                         yyerror("Missing right brace on \\x{}");
2913                         continue;
2914                     }
2915                     len = e - s;
2916                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2917                     s = e + 1;
2918                 }
2919                 else {
2920                     {
2921                         STRLEN len = 2;
2922                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2923                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2924                         s += len;
2925                     }
2926                 }
2927
2928               NUM_ESCAPE_INSERT:
2929                 /* Insert oct or hex escaped character.  There will always be
2930                  * enough room in sv since such escapes will be longer than any
2931                  * UTF-8 sequence they can end up as, except if they force us
2932                  * to recode the rest of the string into utf8 */
2933
2934                 /* Here uv is the ordinal of the next character being added in
2935                  * unicode (converted from native). */
2936                 if (!UNI_IS_INVARIANT(uv)) {
2937                     if (!has_utf8 && uv > 255) {
2938                         /* Might need to recode whatever we have accumulated so
2939                          * far if it contains any chars variant in utf8 or
2940                          * utf-ebcdic. */
2941
2942                         SvCUR_set(sv, d - SvPVX_const(sv));
2943                         SvPOK_on(sv);
2944                         *d = '\0';
2945                         /* See Note on sizing above.  */
2946                         sv_utf8_upgrade_flags_grow(sv,
2947                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2948                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2949                         d = SvPVX(sv) + SvCUR(sv);
2950                         has_utf8 = TRUE;
2951                     }
2952
2953                     if (has_utf8) {
2954                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2955                         if (PL_lex_inwhat == OP_TRANS &&
2956                             PL_sublex_info.sub_op) {
2957                             PL_sublex_info.sub_op->op_private |=
2958                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2959                                              : OPpTRANS_TO_UTF);
2960                         }
2961 #ifdef EBCDIC
2962                         if (uv > 255 && !dorange)
2963                             native_range = FALSE;
2964 #endif
2965                     }
2966                     else {
2967                         *d++ = (char)uv;
2968                     }
2969                 }
2970                 else {
2971                     *d++ = (char) uv;
2972                 }
2973                 continue;
2974
2975             case 'N':
2976                 /* In a non-pattern \N must be a named character, like \N{LATIN
2977                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
2978                  * mean to match a non-newline.  For non-patterns, named
2979                  * characters are converted to their string equivalents. In
2980                  * patterns, named characters are not converted to their
2981                  * ultimate forms for the same reasons that other escapes
2982                  * aren't.  Instead, they are converted to the \N{U+...} form
2983                  * to get the value from the charnames that is in effect right
2984                  * now, while preserving the fact that it was a named character
2985                  * so that the regex compiler knows this */
2986
2987                 /* This section of code doesn't generally use the
2988                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
2989                  * a close examination of this macro and determined it is a
2990                  * no-op except on utfebcdic variant characters.  Every
2991                  * character generated by this that would normally need to be
2992                  * enclosed by this macro is invariant, so the macro is not
2993                  * needed, and would complicate use of copy(). There are other
2994                  * parts of this file where the macro is used inconsistently,
2995                  * but are saved by it being a no-op */
2996
2997                 /* The structure of this section of code (besides checking for
2998                  * errors and upgrading to utf8) is:
2999                  *  Further disambiguate between the two meanings of \N, and if
3000                  *      not a charname, go process it elsewhere
3001                  *  If of form \N{U+...}, pass it through if a pattern;
3002                  *      otherwise convert to utf8
3003                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3004                  *  pattern; otherwise convert to utf8 */
3005
3006                 /* Here, s points to the 'N'; the test below is guaranteed to
3007                  * succeed if we are being called on a pattern as we already
3008                  * know from a test above that the next character is a '{'.
3009                  * On a non-pattern \N must mean 'named sequence, which
3010                  * requires braces */
3011                 s++;
3012                 if (*s != '{') {
3013                     yyerror("Missing braces on \\N{}");
3014                     continue;
3015                 }
3016                 s++;
3017
3018                 /* If there is no matching '}', it is an error. */
3019                 if (! (e = strchr(s, '}'))) {
3020                     if (! PL_lex_inpat) {
3021                         yyerror("Missing right brace on \\N{}");
3022                     } else {
3023                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3024                     }
3025                     continue;
3026                 }
3027
3028                 /* Here it looks like a named character */
3029
3030                 if (PL_lex_inpat) {
3031
3032                     /* XXX This block is temporary code.  \N{} implies that the
3033                      * pattern is to have Unicode semantics, and therefore
3034                      * currently has to be encoded in utf8.  By putting it in
3035                      * utf8 now, we save a whole pass in the regular expression
3036                      * compiler.  Once that code is changed so Unicode
3037                      * semantics doesn't necessarily have to be in utf8, this
3038                      * block should be removed */
3039                     if (!has_utf8) {
3040                         SvCUR_set(sv, d - SvPVX_const(sv));
3041                         SvPOK_on(sv);
3042                         *d = '\0';
3043                         /* See Note on sizing above.  */
3044                         sv_utf8_upgrade_flags_grow(sv,
3045                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3046                                         /* 5 = '\N{' + cur char + NUL */
3047                                         (STRLEN)(send - s) + 5);
3048                         d = SvPVX(sv) + SvCUR(sv);
3049                         has_utf8 = TRUE;
3050                     }
3051                 }
3052
3053                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3054                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3055                                 | PERL_SCAN_DISALLOW_PREFIX;
3056                     STRLEN len;
3057
3058                     /* For \N{U+...}, the '...' is a unicode value even on
3059                      * EBCDIC machines */
3060                     s += 2;         /* Skip to next char after the 'U+' */
3061                     len = e - s;
3062                     uv = grok_hex(s, &len, &flags, NULL);
3063                     if (len == 0 || len != (STRLEN)(e - s)) {
3064                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3065                         s = e + 1;
3066                         continue;
3067                     }
3068
3069                     if (PL_lex_inpat) {
3070
3071                         /* Pass through to the regex compiler unchanged.  The
3072                          * reason we evaluated the number above is to make sure
3073                          * there wasn't a syntax error. */
3074                         s -= 5;     /* Include the '\N{U+' */
3075                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3076                         d += e - s + 1;
3077                     }
3078                     else {  /* Not a pattern: convert the hex to string */
3079
3080                          /* If destination is not in utf8, unconditionally
3081                           * recode it to be so.  This is because \N{} implies
3082                           * Unicode semantics, and scalars have to be in utf8
3083                           * to guarantee those semantics */
3084                         if (! has_utf8) {
3085                             SvCUR_set(sv, d - SvPVX_const(sv));
3086                             SvPOK_on(sv);
3087                             *d = '\0';
3088                             /* See Note on sizing above.  */
3089                             sv_utf8_upgrade_flags_grow(
3090                                         sv,
3091                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3092                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3093                             d = SvPVX(sv) + SvCUR(sv);
3094                             has_utf8 = TRUE;
3095                         }
3096
3097                         /* Add the string to the output */
3098                         if (UNI_IS_INVARIANT(uv)) {
3099                             *d++ = (char) uv;
3100                         }
3101                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3102                     }
3103                 }
3104                 else { /* Here is \N{NAME} but not \N{U+...}. */
3105
3106                     SV *res;            /* result from charnames */
3107                     const char *str;    /* the string in 'res' */
3108                     STRLEN len;         /* its length */
3109
3110                     /* Get the value for NAME */
3111                     res = newSVpvn(s, e - s);
3112                     res = new_constant( NULL, 0, "charnames",
3113                                         /* includes all of: \N{...} */
3114                                         res, NULL, s - 3, e - s + 4 );
3115
3116                     /* Most likely res will be in utf8 already since the
3117                      * standard charnames uses pack U, but a custom translator
3118                      * can leave it otherwise, so make sure.  XXX This can be
3119                      * revisited to not have charnames use utf8 for characters
3120                      * that don't need it when regexes don't have to be in utf8
3121                      * for Unicode semantics.  If doing so, remember EBCDIC */
3122                     sv_utf8_upgrade(res);
3123                     str = SvPV_const(res, len);
3124
3125                     /* Don't accept malformed input */
3126                     if (! is_utf8_string((U8 *) str, len)) {
3127                         yyerror("Malformed UTF-8 returned by \\N");
3128                     }
3129                     else if (PL_lex_inpat) {
3130
3131                         if (! len) { /* The name resolved to an empty string */
3132                             Copy("\\N{}", d, 4, char);
3133                             d += 4;
3134                         }
3135                         else {
3136                             /* In order to not lose information for the regex
3137                             * compiler, pass the result in the specially made
3138                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3139                             * the code points in hex of each character
3140                             * returned by charnames */
3141
3142                             const char *str_end = str + len;
3143                             STRLEN char_length;     /* cur char's byte length */
3144                             STRLEN output_length;   /* and the number of bytes
3145                                                        after this is translated
3146                                                        into hex digits */
3147                             const STRLEN off = d - SvPVX_const(sv);
3148
3149                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3150                              * max('U+', '.'); and 1 for NUL */
3151                             char hex_string[2 * UTF8_MAXBYTES + 5];
3152
3153                             /* Get the first character of the result. */
3154                             U32 uv = utf8n_to_uvuni((U8 *) str,
3155                                                     len,
3156                                                     &char_length,
3157                                                     UTF8_ALLOW_ANYUV);
3158
3159                             /* The call to is_utf8_string() above hopefully
3160                              * guarantees that there won't be an error.  But
3161                              * it's easy here to make sure.  The function just
3162                              * above warns and returns 0 if invalid utf8, but
3163                              * it can also return 0 if the input is validly a
3164                              * NUL. Disambiguate */
3165                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3166                                 uv = UNICODE_REPLACEMENT;
3167                             }
3168
3169                             /* Convert first code point to hex, including the
3170                              * boiler plate before it */
3171                             sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3172                             output_length = strlen(hex_string);
3173
3174                             /* Make sure there is enough space to hold it */
3175                             d = off + SvGROW(sv, off
3176                                                  + output_length
3177                                                  + (STRLEN)(send - e)
3178                                                  + 2);  /* '}' + NUL */
3179                             /* And output it */
3180                             Copy(hex_string, d, output_length, char);
3181                             d += output_length;
3182
3183                             /* For each subsequent character, append dot and
3184                              * its ordinal in hex */
3185                             while ((str += char_length) < str_end) {
3186                                 const STRLEN off = d - SvPVX_const(sv);
3187                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3188                                                         str_end - str,
3189                                                         &char_length,
3190                                                         UTF8_ALLOW_ANYUV);
3191                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3192                                     uv = UNICODE_REPLACEMENT;
3193                                 }
3194
3195                                 sprintf(hex_string, ".%X", (unsigned int) uv);
3196                                 output_length = strlen(hex_string);
3197
3198                                 d = off + SvGROW(sv, off
3199                                                      + output_length
3200                                                      + (STRLEN)(send - e)
3201                                                      + 2);      /* '}' +  NUL */
3202                                 Copy(hex_string, d, output_length, char);
3203                                 d += output_length;
3204                             }
3205
3206                             *d++ = '}'; /* Done.  Add the trailing brace */
3207                         }
3208                     }
3209                     else { /* Here, not in a pattern.  Convert the name to a
3210                             * string. */
3211
3212                          /* If destination is not in utf8, unconditionally
3213                           * recode it to be so.  This is because \N{} implies
3214                           * Unicode semantics, and scalars have to be in utf8
3215                           * to guarantee those semantics */
3216                         if (! has_utf8) {
3217                             SvCUR_set(sv, d - SvPVX_const(sv));
3218                             SvPOK_on(sv);
3219                             *d = '\0';
3220                             /* See Note on sizing above.  */
3221                             sv_utf8_upgrade_flags_grow(sv,
3222                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3223                                                 len + (STRLEN)(send - s) + 1);
3224                             d = SvPVX(sv) + SvCUR(sv);
3225                             has_utf8 = TRUE;
3226                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3227
3228                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3229                              * set correctly here). */
3230                             const STRLEN off = d - SvPVX_const(sv);
3231                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3232                         }
3233                         Copy(str, d, len, char);
3234                         d += len;
3235                     }
3236                     SvREFCNT_dec(res);
3237
3238                     /* Deprecate non-approved name syntax */
3239                     if (ckWARN_d(WARN_DEPRECATED)) {
3240                         bool problematic = FALSE;
3241                         char* i = s;
3242
3243                         /* For non-ut8 input, look to see that the first
3244                          * character is an alpha, then loop through the rest
3245                          * checking that each is a continuation */
3246                         if (! this_utf8) {
3247                             if (! isALPHAU(*i)) problematic = TRUE;
3248                             else for (i = s + 1; i < e; i++) {
3249                                 if (isCHARNAME_CONT(*i)) continue;
3250                                 problematic = TRUE;
3251                                 break;
3252                             }
3253                         }
3254                         else {
3255                             /* Similarly for utf8.  For invariants can check
3256                              * directly.  We accept anything above the latin1
3257                              * range because it is immaterial to Perl if it is
3258                              * correct or not, and is expensive to check.  But
3259                              * it is fairly easy in the latin1 range to convert
3260                              * the variants into a single character and check
3261                              * those */
3262                             if (UTF8_IS_INVARIANT(*i)) {
3263                                 if (! isALPHAU(*i)) problematic = TRUE;
3264                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3265                                 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3266                                                                             *(i+1)))))
3267                                 {
3268                                     problematic = TRUE;
3269                                 }
3270                             }
3271                             if (! problematic) for (i = s + UTF8SKIP(s);
3272                                                     i < e;
3273                                                     i+= UTF8SKIP(i))
3274                             {
3275                                 if (UTF8_IS_INVARIANT(*i)) {
3276                                     if (isCHARNAME_CONT(*i)) continue;
3277                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3278                                     continue;
3279                                 } else if (isCHARNAME_CONT(
3280                                             UNI_TO_NATIVE(
3281                                             UTF8_ACCUMULATE(*i, *(i+1)))))
3282                                 {
3283                                     continue;
3284                                 }
3285                                 problematic = TRUE;
3286                                 break;
3287                             }
3288                         }
3289                         if (problematic) {
3290                             /* The e-i passed to the final %.*s makes sure that
3291                              * should the trailing NUL be missing that this
3292                              * print won't run off the end of the string */
3293                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3294                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3295                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3296                         }
3297                     }
3298                 } /* End \N{NAME} */
3299 #ifdef EBCDIC
3300                 if (!dorange)
3301                     native_range = FALSE; /* \N{} is defined to be Unicode */
3302 #endif
3303                 s = e + 1;  /* Point to just after the '}' */
3304                 continue;
3305
3306             /* \c is a control character */
3307             case 'c':
3308                 s++;
3309                 if (s < send) {
3310                     *d++ = grok_bslash_c(*s++, 1);
3311                 }
3312                 else {
3313                     yyerror("Missing control char name in \\c");
3314                 }
3315                 continue;
3316
3317             /* printf-style backslashes, formfeeds, newlines, etc */
3318             case 'b':
3319                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3320                 break;
3321             case 'n':
3322                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3323                 break;
3324             case 'r':
3325                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3326                 break;
3327             case 'f':
3328                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3329                 break;
3330             case 't':
3331                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3332                 break;
3333             case 'e':
3334                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3335                 break;
3336             case 'a':
3337                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3338                 break;
3339             } /* end switch */
3340
3341             s++;
3342             continue;
3343         } /* end if (backslash) */
3344 #ifdef EBCDIC
3345         else
3346             literal_endpoint++;
3347 #endif
3348
3349     default_action:
3350         /* If we started with encoded form, or already know we want it,
3351            then encode the next character */
3352         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3353             STRLEN len  = 1;
3354
3355
3356             /* One might think that it is wasted effort in the case of the
3357              * source being utf8 (this_utf8 == TRUE) to take the next character
3358              * in the source, convert it to an unsigned value, and then convert
3359              * it back again.  But the source has not been validated here.  The
3360              * routine that does the conversion checks for errors like
3361              * malformed utf8 */
3362
3363             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3364             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3365             if (!has_utf8) {
3366                 SvCUR_set(sv, d - SvPVX_const(sv));
3367                 SvPOK_on(sv);
3368                 *d = '\0';
3369                 /* See Note on sizing above.  */
3370                 sv_utf8_upgrade_flags_grow(sv,
3371                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3372                                         need + (STRLEN)(send - s) + 1);
3373                 d = SvPVX(sv) + SvCUR(sv);
3374                 has_utf8 = TRUE;
3375             } else if (need > len) {
3376                 /* encoded value larger than old, may need extra space (NOTE:
3377                  * SvCUR() is not set correctly here).   See Note on sizing
3378                  * above.  */
3379                 const STRLEN off = d - SvPVX_const(sv);
3380                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3381             }
3382             s += len;
3383
3384             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3385 #ifdef EBCDIC
3386             if (uv > 255 && !dorange)
3387                 native_range = FALSE;
3388 #endif
3389         }
3390         else {
3391             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3392         }
3393     } /* while loop to process each character */
3394
3395     /* terminate the string and set up the sv */
3396     *d = '\0';
3397     SvCUR_set(sv, d - SvPVX_const(sv));
3398     if (SvCUR(sv) >= SvLEN(sv))
3399         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3400
3401     SvPOK_on(sv);
3402     if (PL_encoding && !has_utf8) {
3403         sv_recode_to_utf8(sv, PL_encoding);
3404         if (SvUTF8(sv))
3405             has_utf8 = TRUE;
3406     }
3407     if (has_utf8) {
3408         SvUTF8_on(sv);
3409         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3410             PL_sublex_info.sub_op->op_private |=
3411                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3412         }
3413     }
3414
3415     /* shrink the sv if we allocated more than we used */
3416     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3417         SvPV_shrink_to_cur(sv);
3418     }
3419
3420     /* return the substring (via pl_yylval) only if we parsed anything */
3421     if (s > PL_bufptr) {
3422         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3423             const char *const key = PL_lex_inpat ? "qr" : "q";
3424             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3425             const char *type;
3426             STRLEN typelen;
3427
3428             if (PL_lex_inwhat == OP_TRANS) {
3429                 type = "tr";
3430                 typelen = 2;
3431             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3432                 type = "s";
3433                 typelen = 1;
3434             } else  {
3435                 type = "qq";
3436                 typelen = 2;
3437             }
3438
3439             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3440                                 type, typelen);
3441         }
3442         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3443     } else
3444         SvREFCNT_dec(sv);
3445     return s;
3446 }
3447
3448 /* S_intuit_more
3449  * Returns TRUE if there's more to the expression (e.g., a subscript),
3450  * FALSE otherwise.
3451  *
3452  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3453  *
3454  * ->[ and ->{ return TRUE
3455  * { and [ outside a pattern are always subscripts, so return TRUE
3456  * if we're outside a pattern and it's not { or [, then return FALSE
3457  * if we're in a pattern and the first char is a {
3458  *   {4,5} (any digits around the comma) returns FALSE
3459  * if we're in a pattern and the first char is a [
3460  *   [] returns FALSE
3461  *   [SOMETHING] has a funky algorithm to decide whether it's a
3462  *      character class or not.  It has to deal with things like
3463  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3464  * anything else returns TRUE
3465  */
3466
3467 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3468
3469 STATIC int
3470 S_intuit_more(pTHX_ register char *s)
3471 {
3472     dVAR;
3473
3474     PERL_ARGS_ASSERT_INTUIT_MORE;
3475
3476     if (PL_lex_brackets)
3477         return TRUE;
3478     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3479         return TRUE;
3480     if (*s != '{' && *s != '[')
3481         return FALSE;
3482     if (!PL_lex_inpat)
3483         return TRUE;
3484
3485     /* In a pattern, so maybe we have {n,m}. */
3486     if (*s == '{') {
3487         s++;
3488         if (!isDIGIT(*s))
3489             return TRUE;
3490         while (isDIGIT(*s))
3491             s++;
3492         if (*s == ',')
3493             s++;
3494         while (isDIGIT(*s))
3495             s++;
3496         if (*s == '}')
3497             return FALSE;
3498         return TRUE;
3499
3500     }
3501
3502     /* On the other hand, maybe we have a character class */
3503
3504     s++;
3505     if (*s == ']' || *s == '^')
3506         return FALSE;
3507     else {
3508         /* this is terrifying, and it works */
3509         int weight = 2;         /* let's weigh the evidence */
3510         char seen[256];
3511         unsigned char un_char = 255, last_un_char;
3512         const char * const send = strchr(s,']');
3513         char tmpbuf[sizeof PL_tokenbuf * 4];
3514
3515         if (!send)              /* has to be an expression */
3516             return TRUE;
3517
3518         Zero(seen,256,char);
3519         if (*s == '$')
3520             weight -= 3;
3521         else if (isDIGIT(*s)) {
3522             if (s[1] != ']') {
3523                 if (isDIGIT(s[1]) && s[2] == ']')
3524                     weight -= 10;
3525             }
3526             else
3527                 weight -= 100;
3528         }
3529         for (; s < send; s++) {
3530             last_un_char = un_char;
3531             un_char = (unsigned char)*s;
3532             switch (*s) {
3533             case '@':
3534             case '&':
3535             case '$':
3536                 weight -= seen[un_char] * 10;
3537                 if (isALNUM_lazy_if(s+1,UTF)) {
3538                     int len;
3539                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3540                     len = (int)strlen(tmpbuf);
3541                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3542                         weight -= 100;
3543                     else
3544                         weight -= 10;
3545                 }
3546                 else if (*s == '$' && s[1] &&
3547                   strchr("[#!%*<>()-=",s[1])) {
3548                     if (/*{*/ strchr("])} =",s[2]))
3549                         weight -= 10;
3550                     else
3551                         weight -= 1;
3552                 }
3553                 break;
3554             case '\\':
3555                 un_char = 254;
3556                 if (s[1]) {
3557                     if (strchr("wds]",s[1]))
3558                         weight += 100;
3559                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3560                         weight += 1;
3561                     else if (strchr("rnftbxcav",s[1]))
3562                         weight += 40;
3563                     else if (isDIGIT(s[1])) {
3564                         weight += 40;
3565                         while (s[1] && isDIGIT(s[1]))
3566                             s++;
3567                     }
3568                 }
3569                 else
3570                     weight += 100;
3571                 break;
3572             case '-':
3573                 if (s[1] == '\\')
3574                     weight += 50;
3575                 if (strchr("aA01! ",last_un_char))
3576                     weight += 30;
3577                 if (strchr("zZ79~",s[1]))
3578                     weight += 30;
3579                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3580                     weight -= 5;        /* cope with negative subscript */
3581                 break;
3582             default:
3583                 if (!isALNUM(last_un_char)
3584                     && !(last_un_char == '$' || last_un_char == '@'
3585                          || last_un_char == '&')
3586                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3587                     char *d = tmpbuf;
3588                     while (isALPHA(*s))
3589                         *d++ = *s++;
3590                     *d = '\0';
3591                     if (keyword(tmpbuf, d - tmpbuf, 0))
3592                         weight -= 150;
3593                 }
3594                 if (un_char == last_un_char + 1)
3595                     weight += 5;
3596                 weight -= seen[un_char];
3597                 break;
3598             }
3599             seen[un_char]++;
3600         }
3601         if (weight >= 0)        /* probably a character class */
3602             return FALSE;
3603     }
3604
3605     return TRUE;
3606 }
3607
3608 /*
3609  * S_intuit_method
3610  *
3611  * Does all the checking to disambiguate
3612  *   foo bar
3613  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3614  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3615  *
3616  * First argument is the stuff after the first token, e.g. "bar".
3617  *
3618  * Not a method if bar is a filehandle.
3619  * Not a method if foo is a subroutine prototyped to take a filehandle.
3620  * Not a method if it's really "Foo $bar"
3621  * Method if it's "foo $bar"
3622  * Not a method if it's really "print foo $bar"
3623  * Method if it's really "foo package::" (interpreted as package->foo)
3624  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3625  * Not a method if bar is a filehandle or package, but is quoted with
3626  *   =>
3627  */
3628
3629 STATIC int
3630 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3631 {
3632     dVAR;
3633     char *s = start + (*start == '$');
3634     char tmpbuf[sizeof PL_tokenbuf];
3635     STRLEN len;
3636     GV* indirgv;
3637 #ifdef PERL_MAD
3638     int soff;
3639 #endif
3640
3641     PERL_ARGS_ASSERT_INTUIT_METHOD;
3642
3643     if (gv) {
3644         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3645             return 0;
3646         if (cv) {
3647             if (SvPOK(cv)) {
3648                 const char *proto = SvPVX_const(cv);
3649                 if (proto) {
3650                     if (*proto == ';')
3651                         proto++;
3652                     if (*proto == '*')
3653                         return 0;
3654                 }
3655             }
3656         } else
3657             gv = NULL;
3658     }
3659     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3660     /* start is the beginning of the possible filehandle/object,
3661      * and s is the end of it
3662      * tmpbuf is a copy of it
3663      */
3664
3665     if (*start == '$') {
3666         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3667                 isUPPER(*PL_tokenbuf))
3668             return 0;
3669 #ifdef PERL_MAD
3670         len = start - SvPVX(PL_linestr);
3671 #endif
3672         s = PEEKSPACE(s);
3673 #ifdef PERL_MAD
3674         start = SvPVX(PL_linestr) + len;
3675 #endif
3676         PL_bufptr = start;
3677         PL_expect = XREF;
3678         return *s == '(' ? FUNCMETH : METHOD;
3679     }
3680     if (!keyword(tmpbuf, len, 0)) {
3681         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3682             len -= 2;
3683             tmpbuf[len] = '\0';
3684 #ifdef PERL_MAD
3685             soff = s - SvPVX(PL_linestr);
3686 #endif
3687             goto bare_package;
3688         }
3689         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3690         if (indirgv && GvCVu(indirgv))
3691             return 0;
3692         /* filehandle or package name makes it a method */
3693         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3694 #ifdef PERL_MAD
3695             soff = s - SvPVX(PL_linestr);
3696 #endif
3697             s = PEEKSPACE(s);
3698             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3699                 return 0;       /* no assumptions -- "=>" quotes bearword */
3700       bare_package:
3701             start_force(PL_curforce);
3702             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3703                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3704             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3705             if (PL_madskills)
3706                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3707             PL_expect = XTERM;
3708             force_next(WORD);
3709             PL_bufptr = s;
3710 #ifdef PERL_MAD
3711             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3712 #endif
3713             return *s == '(' ? FUNCMETH : METHOD;
3714         }
3715     }
3716     return 0;
3717 }
3718
3719 /* Encoded script support. filter_add() effectively inserts a
3720  * 'pre-processing' function into the current source input stream.
3721  * Note that the filter function only applies to the current source file
3722  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3723  *
3724  * The datasv parameter (which may be NULL) can be used to pass
3725  * private data to this instance of the filter. The filter function
3726  * can recover the SV using the FILTER_DATA macro and use it to
3727  * store private buffers and state information.
3728  *
3729  * The supplied datasv parameter is upgraded to a PVIO type
3730  * and the IoDIRP/IoANY field is used to store the function pointer,
3731  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3732  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3733  * private use must be set using malloc'd pointers.
3734  */
3735
3736 SV *
3737 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3738 {
3739     dVAR;
3740     if (!funcp)
3741         return NULL;
3742
3743     if (!PL_parser)
3744         return NULL;
3745
3746     if (!PL_rsfp_filters)
3747         PL_rsfp_filters = newAV();
3748     if (!datasv)
3749         datasv = newSV(0);
3750     SvUPGRADE(datasv, SVt_PVIO);
3751     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3752     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3753     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3754                           FPTR2DPTR(void *, IoANY(datasv)),
3755                           SvPV_nolen(datasv)));
3756     av_unshift(PL_rsfp_filters, 1);
3757     av_store(PL_rsfp_filters, 0, datasv) ;
3758     return(datasv);
3759 }
3760
3761
3762 /* Delete most recently added instance of this filter function. */
3763 void
3764 Perl_filter_del(pTHX_ filter_t funcp)
3765 {
3766     dVAR;
3767     SV *datasv;
3768
3769     PERL_ARGS_ASSERT_FILTER_DEL;
3770
3771 #ifdef DEBUGGING
3772     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3773                           FPTR2DPTR(void*, funcp)));
3774 #endif
3775     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3776         return;
3777     /* if filter is on top of stack (usual case) just pop it off */
3778     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3779     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3780         sv_free(av_pop(PL_rsfp_filters));
3781
3782         return;
3783     }
3784     /* we need to search for the correct entry and clear it     */
3785     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3786 }
3787
3788
3789 /* Invoke the idxth filter function for the current rsfp.        */
3790 /* maxlen 0 = read one text line */
3791 I32
3792 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3793 {
3794     dVAR;
3795     filter_t funcp;
3796     SV *datasv = NULL;
3797     /* This API is bad. It should have been using unsigned int for maxlen.
3798        Not sure if we want to change the API, but if not we should sanity
3799        check the value here.  */
3800     const unsigned int correct_length
3801         = maxlen < 0 ?
3802 #ifdef PERL_MICRO
3803         0x7FFFFFFF
3804 #else
3805         INT_MAX
3806 #endif
3807         : maxlen;
3808
3809     PERL_ARGS_ASSERT_FILTER_READ;
3810
3811     if (!PL_parser || !PL_rsfp_filters)
3812         return -1;
3813     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3814         /* Provide a default input filter to make life easy.    */
3815         /* Note that we append to the line. This is handy.      */
3816         DEBUG_P(PerlIO_printf(Perl_debug_log,
3817                               "filter_read %d: from rsfp\n", idx));
3818         if (correct_length) {
3819             /* Want a block */
3820             int len ;
3821             const int old_len = SvCUR(buf_sv);
3822
3823             /* ensure buf_sv is large enough */
3824             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3825             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3826                                    correct_length)) <= 0) {
3827                 if (PerlIO_error(PL_rsfp))
3828                     return -1;          /* error */
3829                 else
3830                     return 0 ;          /* end of file */
3831             }
3832             SvCUR_set(buf_sv, old_len + len) ;
3833             SvPVX(buf_sv)[old_len + len] = '\0';
3834         } else {
3835             /* Want a line */
3836             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3837                 if (PerlIO_error(PL_rsfp))
3838                     return -1;          /* error */
3839                 else
3840                     return 0 ;          /* end of file */
3841             }
3842         }
3843         return SvCUR(buf_sv);
3844     }
3845     /* Skip this filter slot if filter has been deleted */
3846     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3847         DEBUG_P(PerlIO_printf(Perl_debug_log,
3848                               "filter_read %d: skipped (filter deleted)\n",
3849                               idx));
3850         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3851     }
3852     /* Get function pointer hidden within datasv        */
3853     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3854     DEBUG_P(PerlIO_printf(Perl_debug_log,
3855                           "filter_read %d: via function %p (%s)\n",
3856                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3857     /* Call function. The function is expected to       */
3858     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3859     /* Return: <0:error, =0:eof, >0:not eof             */
3860     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3861 }
3862
3863 STATIC char *
3864 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3865 {
3866     dVAR;
3867
3868     PERL_ARGS_ASSERT_FILTER_GETS;
3869
3870 #ifdef PERL_CR_FILTER
3871     if (!PL_rsfp_filters) {
3872         filter_add(S_cr_textfilter,NULL);
3873     }
3874 #endif
3875     if (PL_rsfp_filters) {
3876         if (!append)
3877             SvCUR_set(sv, 0);   /* start with empty line        */
3878         if (FILTER_READ(0, sv, 0) > 0)
3879             return ( SvPVX(sv) ) ;
3880         else
3881             return NULL ;
3882     }
3883     else
3884         return (sv_gets(sv, PL_rsfp, append));
3885 }
3886
3887 STATIC HV *
3888 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3889 {
3890     dVAR;
3891     GV *gv;
3892
3893     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3894
3895     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3896         return PL_curstash;
3897
3898     if (len > 2 &&
3899         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3900         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3901     {
3902         return GvHV(gv);                        /* Foo:: */
3903     }
3904
3905     /* use constant CLASS => 'MyClass' */
3906     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3907     if (gv && GvCV(gv)) {
3908         SV * const sv = cv_const_sv(GvCV(gv));
3909         if (sv)
3910             pkgname = SvPV_const(sv, len);
3911     }
3912
3913     return gv_stashpvn(pkgname, len, 0);
3914 }
3915
3916 /*
3917  * S_readpipe_override
3918  * Check whether readpipe() is overriden, and generates the appropriate
3919  * optree, provided sublex_start() is called afterwards.
3920  */
3921 STATIC void
3922 S_readpipe_override(pTHX)
3923 {
3924     GV **gvp;
3925     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3926     pl_yylval.ival = OP_BACKTICK;
3927     if ((gv_readpipe
3928                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3929             ||
3930             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3931              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3932              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3933     {
3934         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3935             append_elem(OP_LIST,
3936                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3937                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3938     }
3939 }
3940
3941 #ifdef PERL_MAD
3942  /*
3943  * Perl_madlex
3944  * The intent of this yylex wrapper is to minimize the changes to the
3945  * tokener when we aren't interested in collecting madprops.  It remains
3946  * to be seen how successful this strategy will be...
3947  */
3948
3949 int
3950 Perl_madlex(pTHX)
3951 {
3952     int optype;
3953     char *s = PL_bufptr;
3954
3955     /* make sure PL_thiswhite is initialized */
3956     PL_thiswhite = 0;
3957     PL_thismad = 0;
3958
3959     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3960     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
3961         return S_pending_ident(aTHX);
3962
3963     /* previous token ate up our whitespace? */
3964     if (!PL_lasttoke && PL_nextwhite) {
3965         PL_thiswhite = PL_nextwhite;
3966         PL_nextwhite = 0;
3967     }
3968
3969     /* isolate the token, and figure out where it is without whitespace */
3970     PL_realtokenstart = -1;
3971     PL_thistoken = 0;
3972     optype = yylex();
3973     s = PL_bufptr;
3974     assert(PL_curforce < 0);
3975
3976     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3977         if (!PL_thistoken) {
3978             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3979                 PL_thistoken = newSVpvs("");
3980             else {
3981                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3982                 PL_thistoken = newSVpvn(tstart, s - tstart);
3983             }
3984         }
3985         if (PL_thismad) /* install head */
3986             CURMAD('X', PL_thistoken);
3987     }
3988
3989     /* last whitespace of a sublex? */
3990     if (optype == ')' && PL_endwhite) {
3991         CURMAD('X', PL_endwhite);
3992     }
3993
3994     if (!PL_thismad) {
3995
3996         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
3997         if (!PL_thiswhite && !PL_endwhite && !optype) {
3998             sv_free(PL_thistoken);
3999             PL_thistoken = 0;
4000             return 0;
4001         }
4002
4003         /* put off final whitespace till peg */
4004         if (optype == ';' && !PL_rsfp) {
4005             PL_nextwhite = PL_thiswhite;
4006             PL_thiswhite = 0;
4007         }
4008         else if (PL_thisopen) {
4009             CURMAD('q', PL_thisopen);
4010             if (PL_thistoken)
4011                 sv_free(PL_thistoken);
4012             PL_thistoken = 0;
4013         }
4014         else {
4015             /* Store actual token text as madprop X */
4016             CURMAD('X', PL_thistoken);
4017         }
4018
4019         if (PL_thiswhite) {
4020             /* add preceding whitespace as madprop _ */
4021             CURMAD('_', PL_thiswhite);
4022         }
4023
4024         if (PL_thisstuff) {
4025             /* add quoted material as madprop = */
4026             CURMAD('=', PL_thisstuff);
4027         }
4028
4029         if (PL_thisclose) {
4030             /* add terminating quote as madprop Q */
4031             CURMAD('Q', PL_thisclose);
4032         }
4033     }
4034
4035     /* special processing based on optype */
4036
4037     switch (optype) {
4038
4039     /* opval doesn't need a TOKEN since it can already store mp */
4040     case WORD:
4041     case METHOD:
4042     case FUNCMETH:
4043     case THING:
4044     case PMFUNC:
4045     case PRIVATEREF:
4046     case FUNC0SUB:
4047     case UNIOPSUB:
4048     case LSTOPSUB:
4049         if (pl_yylval.opval)
4050             append_madprops(PL_thismad, pl_yylval.opval, 0);
4051         PL_thismad = 0;
4052         return optype;
4053
4054     /* fake EOF */
4055     case 0:
4056         optype = PEG;
4057         if (PL_endwhite) {
4058             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4059             PL_endwhite = 0;
4060         }
4061         break;
4062
4063     case ']':
4064     case '}':
4065         if (PL_faketokens)
4066             break;
4067         /* remember any fake bracket that lexer is about to discard */
4068         if (PL_lex_brackets == 1 &&
4069             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4070         {
4071             s = PL_bufptr;
4072             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4073                 s++;
4074             if (*s == '}') {
4075                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4076                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4077                 PL_thiswhite = 0;
4078                 PL_bufptr = s - 1;
4079                 break;  /* don't bother looking for trailing comment */
4080             }
4081             else
4082                 s = PL_bufptr;
4083         }
4084         if (optype == ']')
4085             break;
4086         /* FALLTHROUGH */
4087
4088     /* attach a trailing comment to its statement instead of next token */
4089     case ';':
4090         if (PL_faketokens)
4091             break;
4092         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4093             s = PL_bufptr;
4094             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4095                 s++;
4096             if (*s == '\n' || *s == '#') {
4097                 while (s < PL_bufend && *s != '\n')
4098                     s++;
4099                 if (s < PL_bufend)
4100                     s++;
4101                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4102                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4103                 PL_thiswhite = 0;
4104                 PL_bufptr = s;
4105             }
4106         }
4107         break;
4108
4109     /* pval */
4110     case LABEL:
4111         break;
4112
4113     /* ival */
4114     default:
4115         break;
4116
4117     }
4118
4119     /* Create new token struct.  Note: opvals return early above. */
4120     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4121     PL_thismad = 0;
4122     return optype;
4123 }
4124 #endif
4125
4126 STATIC char *
4127 S_tokenize_use(pTHX_ int is_use, char *s) {
4128     dVAR;
4129
4130     PERL_ARGS_ASSERT_TOKENIZE_USE;
4131
4132     if (PL_expect != XSTATE)
4133         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4134                     is_use ? "use" : "no"));
4135     s = SKIPSPACE1(s);
4136     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4137         s = force_version(s, TRUE);
4138         if (*s == ';' || *s == '}'
4139                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4140             start_force(PL_curforce);
4141             NEXTVAL_NEXTTOKE.opval = NULL;
4142             force_next(WORD);
4143         }
4144         else if (*s == 'v') {
4145             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4146             s = force_version(s, FALSE);
4147         }
4148     }
4149     else {
4150         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4151         s = force_version(s, FALSE);
4152     }
4153     pl_yylval.ival = is_use;
4154     return s;
4155 }
4156 #ifdef DEBUGGING
4157     static const char* const exp_name[] =
4158         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4159           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4160         };
4161 #endif
4162
4163 /*
4164   yylex
4165
4166   Works out what to call the token just pulled out of the input
4167   stream.  The yacc parser takes care of taking the ops we return and
4168   stitching them into a tree.
4169
4170   Returns:
4171     PRIVATEREF
4172
4173   Structure:
4174       if read an identifier
4175           if we're in a my declaration
4176               croak if they tried to say my($foo::bar)
4177               build the ops for a my() declaration
4178           if it's an access to a my() variable
4179               are we in a sort block?
4180                   croak if my($a); $a <=> $b
4181               build ops for access to a my() variable
4182           if in a dq string, and they've said @foo and we can't find @foo
4183               croak
4184           build ops for a bareword
4185       if we already built the token before, use it.
4186 */
4187
4188
4189 #ifdef __SC__
4190 #pragma segment Perl_yylex
4191 #endif
4192 int
4193 Perl_yylex(pTHX)
4194 {
4195     dVAR;
4196     register char *s = PL_bufptr;
4197     register char *d;
4198     STRLEN len;
4199     bool bof = FALSE;
4200     U32 fake_eof = 0;
4201
4202     /* orig_keyword, gvp, and gv are initialized here because
4203      * jump to the label just_a_word_zero can bypass their
4204      * initialization later. */
4205     I32 orig_keyword = 0;
4206     GV *gv = NULL;
4207     GV **gvp = NULL;
4208
4209     DEBUG_T( {
4210         SV* tmp = newSVpvs("");
4211         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4212             (IV)CopLINE(PL_curcop),
4213             lex_state_names[PL_lex_state],
4214             exp_name[PL_expect],
4215             pv_display(tmp, s, strlen(s), 0, 60));
4216         SvREFCNT_dec(tmp);
4217     } );
4218     /* check if there's an identifier for us to look at */
4219     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4220         return REPORT(S_pending_ident(aTHX));
4221
4222     /* no identifier pending identification */
4223
4224     switch (PL_lex_state) {
4225 #ifdef COMMENTARY
4226     case LEX_NORMAL:            /* Some compilers will produce faster */
4227     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4228         break;
4229 #endif
4230
4231     /* when we've already built the next token, just pull it out of the queue */
4232     case LEX_KNOWNEXT:
4233 #ifdef PERL_MAD
4234         PL_lasttoke--;
4235         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4236         if (PL_madskills) {
4237             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4238             PL_nexttoke[PL_lasttoke].next_mad = 0;
4239             if (PL_thismad && PL_thismad->mad_key == '_') {
4240                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4241                 PL_thismad->mad_val = 0;
4242                 mad_free(PL_thismad);
4243                 PL_thismad = 0;
4244             }
4245         }
4246         if (!PL_lasttoke) {
4247             PL_lex_state = PL_lex_defer;
4248             PL_expect = PL_lex_expect;
4249             PL_lex_defer = LEX_NORMAL;
4250             if (!PL_nexttoke[PL_lasttoke].next_type)
4251                 return yylex();
4252         }
4253 #else
4254         PL_nexttoke--;
4255         pl_yylval = PL_nextval[PL_nexttoke];
4256         if (!PL_nexttoke) {
4257             PL_lex_state = PL_lex_defer;
4258             PL_expect = PL_lex_expect;
4259             PL_lex_defer = LEX_NORMAL;
4260         }
4261 #endif
4262 #ifdef PERL_MAD
4263         /* FIXME - can these be merged?  */
4264         return(PL_nexttoke[PL_lasttoke].next_type);
4265 #else
4266         return REPORT(PL_nexttype[PL_nexttoke]);
4267 #endif
4268
4269     /* interpolated case modifiers like \L \U, including \Q and \E.
4270        when we get here, PL_bufptr is at the \
4271     */
4272     case LEX_INTERPCASEMOD:
4273 #ifdef DEBUGGING
4274         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4275             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4276 #endif
4277         /* handle \E or end of string */
4278         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4279             /* if at a \E */
4280             if (PL_lex_casemods) {
4281                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4282                 PL_lex_casestack[PL_lex_casemods] = '\0';
4283
4284                 if (PL_bufptr != PL_bufend
4285                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4286                     PL_bufptr += 2;
4287                     PL_lex_state = LEX_INTERPCONCAT;
4288 #ifdef PERL_MAD
4289                     if (PL_madskills)
4290                         PL_thistoken = newSVpvs("\\E");
4291 #endif
4292                 }
4293                 return REPORT(')');
4294             }
4295 #ifdef PERL_MAD
4296             while (PL_bufptr != PL_bufend &&
4297               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4298                 if (!PL_thiswhite)
4299                     PL_thiswhite = newSVpvs("");
4300                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4301                 PL_bufptr += 2;
4302             }
4303 #else
4304             if (PL_bufptr != PL_bufend)
4305                 PL_bufptr += 2;
4306 #endif
4307             PL_lex_state = LEX_INTERPCONCAT;
4308             return yylex();
4309         }
4310         else {
4311             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4312               "### Saw case modifier\n"); });
4313             s = PL_bufptr + 1;
4314             if (s[1] == '\\' && s[2] == 'E') {
4315 #ifdef PERL_MAD
4316                 if (!PL_thiswhite)
4317                     PL_thiswhite = newSVpvs("");
4318                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4319 #endif
4320                 PL_bufptr = s + 3;
4321                 PL_lex_state = LEX_INTERPCONCAT;
4322                 return yylex();
4323             }
4324             else {
4325                 I32 tmp;
4326                 if (!PL_madskills) /* when just compiling don't need correct */
4327                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4328                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4329                 if ((*s == 'L' || *s == 'U') &&
4330                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4331                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4332                     return REPORT(')');
4333                 }
4334                 if (PL_lex_casemods > 10)
4335                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4336                 PL_lex_casestack[PL_lex_casemods++] = *s;
4337                 PL_lex_casestack[PL_lex_casemods] = '\0';
4338                 PL_lex_state = LEX_INTERPCONCAT;
4339                 start_force(PL_curforce);
4340                 NEXTVAL_NEXTTOKE.ival = 0;
4341                 force_next('(');
4342                 start_force(PL_curforce);
4343                 if (*s == 'l')
4344                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4345                 else if (*s == 'u')
4346                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4347                 else if (*s == 'L')
4348                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4349                 else if (*s == 'U')
4350                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4351                 else if (*s == 'Q')
4352                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4353                 else
4354                     Perl_croak(aTHX_ "panic: yylex");
4355                 if (PL_madskills) {
4356                     SV* const tmpsv = newSVpvs("\\ ");
4357                     /* replace the space with the character we want to escape
4358                      */
4359                     SvPVX(tmpsv)[1] = *s;
4360                     curmad('_', tmpsv);
4361                 }
4362                 PL_bufptr = s + 1;
4363             }
4364             force_next(FUNC);
4365             if (PL_lex_starts) {
4366                 s = PL_bufptr;
4367                 PL_lex_starts = 0;
4368 #ifdef PERL_MAD
4369                 if (PL_madskills) {
4370                     if (PL_thistoken)
4371                         sv_free(PL_thistoken);
4372                     PL_thistoken = newSVpvs("");
4373                 }
4374 #endif
4375                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4376                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4377                     OPERATOR(',');
4378                 else
4379                     Aop(OP_CONCAT);
4380             }
4381             else
4382                 return yylex();
4383         }
4384
4385     case LEX_INTERPPUSH:
4386         return REPORT(sublex_push());
4387
4388     case LEX_INTERPSTART:
4389         if (PL_bufptr == PL_bufend)
4390             return REPORT(sublex_done());
4391         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4392               "### Interpolated variable\n"); });
4393         PL_expect = XTERM;
4394         PL_lex_dojoin = (*PL_bufptr == '@');
4395         PL_lex_state = LEX_INTERPNORMAL;
4396         if (PL_lex_dojoin) {
4397             start_force(PL_curforce);
4398             NEXTVAL_NEXTTOKE.ival = 0;
4399             force_next(',');
4400             start_force(PL_curforce);
4401             force_ident("\"", '$');
4402             start_force(PL_curforce);
4403             NEXTVAL_NEXTTOKE.ival = 0;
4404             force_next('$');
4405             start_force(PL_curforce);
4406             NEXTVAL_NEXTTOKE.ival = 0;
4407             force_next('(');
4408             start_force(PL_curforce);
4409             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4410             force_next(FUNC);
4411         }
4412         if (PL_lex_starts++) {
4413             s = PL_bufptr;
4414 #ifdef PERL_MAD
4415             if (PL_madskills) {
4416                 if (PL_thistoken)
4417                     sv_free(PL_thistoken);
4418                 PL_thistoken = newSVpvs("");
4419             }
4420 #endif
4421             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4422             if (!PL_lex_casemods && PL_lex_inpat)
4423                 OPERATOR(',');
4424             else
4425                 Aop(OP_CONCAT);
4426         }
4427         return yylex();
4428
4429     case LEX_INTERPENDMAYBE:
4430         if (intuit_more(PL_bufptr)) {
4431             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4432             break;
4433         }
4434         /* FALL THROUGH */
4435
4436     case LEX_INTERPEND:
4437         if (PL_lex_dojoin) {
4438             PL_lex_dojoin = FALSE;
4439             PL_lex_state = LEX_INTERPCONCAT;
4440 #ifdef PERL_MAD
4441             if (PL_madskills) {
4442                 if (PL_thistoken)
4443                     sv_free(PL_thistoken);
4444                 PL_thistoken = newSVpvs("");
4445             }
4446 #endif
4447             return REPORT(')');
4448         }
4449         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4450             && SvEVALED(PL_lex_repl))
4451         {
4452             if (PL_bufptr != PL_bufend)
4453                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4454             PL_lex_repl = NULL;
4455         }
4456         /* FALLTHROUGH */
4457     case LEX_INTERPCONCAT:
4458 #ifdef DEBUGGING
4459         if (PL_lex_brackets)
4460             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4461 #endif
4462         if (PL_bufptr == PL_bufend)
4463             return REPORT(sublex_done());
4464
4465         if (SvIVX(PL_linestr) == '\'') {
4466             SV *sv = newSVsv(PL_linestr);
4467             if (!PL_lex_inpat)
4468                 sv = tokeq(sv);
4469             else if ( PL_hints & HINT_NEW_RE )
4470                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4471             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4472             s = PL_bufend;
4473         }
4474         else {
4475             s = scan_const(PL_bufptr);
4476             if (*s == '\\')
4477                 PL_lex_state = LEX_INTERPCASEMOD;
4478             else
4479                 PL_lex_state = LEX_INTERPSTART;
4480         }
4481
4482         if (s != PL_bufptr) {
4483             start_force(PL_curforce);
4484             if (PL_madskills) {
4485                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4486             }
4487             NEXTVAL_NEXTTOKE = pl_yylval;
4488             PL_expect = XTERM;
4489             force_next(THING);
4490             if (PL_lex_starts++) {
4491 #ifdef PERL_MAD
4492                 if (PL_madskills) {
4493                     if (PL_thistoken)
4494                         sv_free(PL_thistoken);
4495                     PL_thistoken = newSVpvs("");
4496                 }
4497 #endif
4498                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4499                 if (!PL_lex_casemods && PL_lex_inpat)
4500                     OPERATOR(',');
4501                 else
4502                     Aop(OP_CONCAT);
4503             }
4504             else {
4505                 PL_bufptr = s;
4506                 return yylex();
4507             }
4508         }
4509
4510         return yylex();
4511     case LEX_FORMLINE:
4512         PL_lex_state = LEX_NORMAL;
4513         s = scan_formline(PL_bufptr);
4514         if (!PL_lex_formbrack)
4515             goto rightbracket;
4516         OPERATOR(';');
4517     }
4518
4519     s = PL_bufptr;
4520     PL_oldoldbufptr = PL_oldbufptr;
4521     PL_oldbufptr = s;
4522
4523   retry:
4524 #ifdef PERL_MAD
4525     if (PL_thistoken) {
4526         sv_free(PL_thistoken);
4527         PL_thistoken = 0;
4528     }
4529     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4530 #endif
4531     switch (*s) {
4532     default:
4533         if (isIDFIRST_lazy_if(s,UTF))
4534             goto keylookup;
4535         {
4536         unsigned char c = *s;
4537         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4538         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4539             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4540         } else {
4541             d = PL_linestart;
4542         }
4543         *s = '\0';
4544         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4545     }
4546     case 4:
4547     case 26:
4548         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4549     case 0:
4550 #ifdef PERL_MAD
4551         if (PL_madskills)
4552             PL_faketokens = 0;
4553 #endif
4554         if (!PL_rsfp) {
4555             PL_last_uni = 0;
4556             PL_last_lop = 0;
4557             if (PL_lex_brackets) {
4558                 yyerror((const char *)
4559                         (PL_lex_formbrack
4560                          ? "Format not terminated"
4561                          : "Missing right curly or square bracket"));
4562             }
4563             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4564                         "### Tokener got EOF\n");
4565             } );
4566             TOKEN(0);
4567         }
4568         if (s++ < PL_bufend)
4569             goto retry;                 /* ignore stray nulls */
4570         PL_last_uni = 0;
4571         PL_last_lop = 0;
4572         if (!PL_in_eval && !PL_preambled) {
4573             PL_preambled = TRUE;
4574 #ifdef PERL_MAD
4575             if (PL_madskills)
4576                 PL_faketokens = 1;
4577 #endif
4578             if (PL_perldb) {
4579                 /* Generate a string of Perl code to load the debugger.
4580                  * If PERL5DB is set, it will return the contents of that,
4581                  * otherwise a compile-time require of perl5db.pl.  */
4582
4583                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4584
4585                 if (pdb) {
4586                     sv_setpv(PL_linestr, pdb);
4587                     sv_catpvs(PL_linestr,";");
4588                 } else {
4589                     SETERRNO(0,SS_NORMAL);
4590                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4591                 }
4592             } else
4593                 sv_setpvs(PL_linestr,"");
4594             if (PL_preambleav) {
4595                 SV **svp = AvARRAY(PL_preambleav);
4596                 SV **const end = svp + AvFILLp(PL_preambleav);
4597                 while(svp <= end) {
4598                     sv_catsv(PL_linestr, *svp);
4599                     ++svp;
4600                     sv_catpvs(PL_linestr, ";");
4601                 }
4602                 sv_free(MUTABLE_SV(PL_preambleav));
4603                 PL_preambleav = NULL;
4604             }
4605             if (PL_minus_E)
4606                 sv_catpvs(PL_linestr,
4607                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4608             if (PL_minus_n || PL_minus_p) {
4609                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4610                 if (PL_minus_l)
4611                     sv_catpvs(PL_linestr,"chomp;");
4612                 if (PL_minus_a) {
4613                     if (PL_minus_F) {
4614                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4615                              || *PL_splitstr == '"')
4616                               && strchr(PL_splitstr + 1, *PL_splitstr))
4617                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4618                         else {
4619                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4620                                bytes can be used as quoting characters.  :-) */
4621                             const char *splits = PL_splitstr;
4622                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4623                             do {
4624                                 /* Need to \ \s  */
4625                                 if (*splits == '\\')
4626                                     sv_catpvn(PL_linestr, splits, 1);
4627                                 sv_catpvn(PL_linestr, splits, 1);
4628                             } while (*splits++);
4629                             /* This loop will embed the trailing NUL of
4630                                PL_linestr as the last thing it does before
4631                                terminating.  */
4632                             sv_catpvs(PL_linestr, ");");
4633                         }
4634                     }
4635                     else
4636                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4637                 }
4638             }
4639             sv_catpvs(PL_linestr, "\n");
4640             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4641             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4642             PL_last_lop = PL_last_uni = NULL;
4643             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4644                 update_debugger_info(PL_linestr, NULL, 0);
4645             goto retry;
4646         }
4647         do {
4648             fake_eof = 0;
4649             bof = PL_rsfp ? TRUE : FALSE;
4650             if (0) {
4651               fake_eof:
4652                 fake_eof = LEX_FAKE_EOF;
4653             }
4654             PL_bufptr = PL_bufend;
4655             CopLINE_inc(PL_curcop);
4656             if (!lex_next_chunk(fake_eof)) {
4657                 CopLINE_dec(PL_curcop);
4658                 s = PL_bufptr;
4659                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4660             }
4661             CopLINE_dec(PL_curcop);
4662 #ifdef PERL_MAD
4663             if (!PL_rsfp)
4664                 PL_realtokenstart = -1;
4665 #endif
4666             s = PL_bufptr;
4667             /* If it looks like the start of a BOM or raw UTF-16,
4668              * check if it in fact is. */
4669             if (bof && PL_rsfp &&
4670                      (*s == 0 ||
4671                       *(U8*)s == 0xEF ||
4672                       *(U8*)s >= 0xFE ||
4673                       s[1] == 0)) {
4674                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4675                 if (bof) {
4676                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4677                     s = swallow_bom((U8*)s);
4678                 }
4679             }
4680             if (PL_doextract) {
4681                 /* Incest with pod. */
4682 #ifdef PERL_MAD
4683                 if (PL_madskills)
4684                     sv_catsv(PL_thiswhite, PL_linestr);
4685 #endif
4686                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4687                     sv_setpvs(PL_linestr, "");
4688                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4689                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4690                     PL_last_lop = PL_last_uni = NULL;
4691                     PL_doextract = FALSE;
4692                 }
4693             }
4694             if (PL_rsfp)
4695                 incline(s);
4696         } while (PL_doextract);
4697         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4698         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4699         PL_last_lop = PL_last_uni = NULL;
4700         if (CopLINE(PL_curcop) == 1) {
4701             while (s < PL_bufend && isSPACE(*s))
4702                 s++;
4703             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4704                 s++;
4705 #ifdef PERL_MAD
4706             if (PL_madskills)
4707                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4708 #endif
4709             d = NULL;
4710             if (!PL_in_eval) {
4711                 if (*s == '#' && *(s+1) == '!')
4712                     d = s + 2;
4713 #ifdef ALTERNATE_SHEBANG
4714                 else {
4715                     static char const as[] = ALTERNATE_SHEBANG;
4716                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4717                         d = s + (sizeof(as) - 1);
4718                 }
4719 #endif /* ALTERNATE_SHEBANG */
4720             }
4721             if (d) {
4722                 char *ipath;
4723                 char *ipathend;
4724
4725                 while (isSPACE(*d))
4726                     d++;
4727                 ipath = d;
4728                 while (*d && !isSPACE(*d))
4729                     d++;
4730                 ipathend = d;
4731
4732 #ifdef ARG_ZERO_IS_SCRIPT
4733                 if (ipathend > ipath) {
4734                     /*
4735                      * HP-UX (at least) sets argv[0] to the script name,
4736                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4737                      * at least, set argv[0] to the basename of the Perl
4738                      * interpreter. So, having found "#!", we'll set it right.
4739                      */
4740                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4741                                                     SVt_PV)); /* $^X */
4742                     assert(SvPOK(x) || SvGMAGICAL(x));
4743                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4744                         sv_setpvn(x, ipath, ipathend - ipath);
4745                         SvSETMAGIC(x);
4746                     }
4747                     else {
4748                         STRLEN blen;
4749                         STRLEN llen;
4750                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4751                         const char * const lstart = SvPV_const(x,llen);
4752                         if (llen < blen) {
4753                             bstart += blen - llen;
4754                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4755                                 sv_setpvn(x, ipath, ipathend - ipath);
4756                                 SvSETMAGIC(x);
4757                             }
4758                         }
4759                     }
4760                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4761                 }
4762 #endif /* ARG_ZERO_IS_SCRIPT */
4763
4764                 /*
4765                  * Look for options.
4766                  */
4767                 d = instr(s,"perl -");
4768                 if (!d) {
4769                     d = instr(s,"perl");
4770 #if defined(DOSISH)
4771                     /* avoid getting into infinite loops when shebang
4772                      * line contains "Perl" rather than "perl" */
4773                     if (!d) {
4774                         for (d = ipathend-4; d >= ipath; --d) {
4775                             if ((*d == 'p' || *d == 'P')
4776                                 && !ibcmp(d, "perl", 4))
4777                             {
4778                                 break;
4779                             }
4780                         }
4781                         if (d < ipath)
4782                             d = NULL;
4783                     }
4784 #endif
4785                 }
4786 #ifdef ALTERNATE_SHEBANG
4787                 /*
4788                  * If the ALTERNATE_SHEBANG on this system starts with a
4789                  * character that can be part of a Perl expression, then if
4790                  * we see it but not "perl", we're probably looking at the
4791                  * start of Perl code, not a request to hand off to some
4792                  * other interpreter.  Similarly, if "perl" is there, but
4793                  * not in the first 'word' of the line, we assume the line
4794                  * contains the start of the Perl program.
4795                  */
4796                 if (d && *s != '#') {
4797                     const char *c = ipath;
4798                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4799                         c++;
4800                     if (c < d)
4801                         d = NULL;       /* "perl" not in first word; ignore */
4802                     else
4803                         *s = '#';       /* Don't try to parse shebang line */
4804                 }
4805 #endif /* ALTERNATE_SHEBANG */
4806                 if (!d &&
4807                     *s == '#' &&
4808                     ipathend > ipath &&
4809                     !PL_minus_c &&
4810                     !instr(s,"indir") &&
4811                     instr(PL_origargv[0],"perl"))
4812                 {
4813                     dVAR;
4814                     char **newargv;
4815
4816                     *ipathend = '\0';
4817                     s = ipathend + 1;
4818                     while (s < PL_bufend && isSPACE(*s))
4819                         s++;
4820                     if (s < PL_bufend) {
4821                         Newx(newargv,PL_origargc+3,char*);
4822                         newargv[1] = s;
4823                         while (s < PL_bufend && !isSPACE(*s))
4824                             s++;
4825                         *s = '\0';
4826                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4827                     }
4828                     else
4829                         newargv = PL_origargv;
4830                     newargv[0] = ipath;
4831                     PERL_FPU_PRE_EXEC
4832                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4833                     PERL_FPU_POST_EXEC
4834                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4835                 }
4836                 if (d) {
4837                     while (*d && !isSPACE(*d))
4838                         d++;
4839                     while (SPACE_OR_TAB(*d))
4840                         d++;
4841
4842                     if (*d++ == '-') {
4843                         const bool switches_done = PL_doswitches;
4844                         const U32 oldpdb = PL_perldb;
4845                         const bool oldn = PL_minus_n;
4846                         const bool oldp = PL_minus_p;
4847                         const char *d1 = d;
4848
4849                         do {
4850                             bool baduni = FALSE;
4851                             if (*d1 == 'C') {
4852                                 const char *d2 = d1 + 1;
4853                                 if (parse_unicode_opts((const char **)&d2)
4854                                     != PL_unicode)
4855                                     baduni = TRUE;
4856                             }
4857                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4858                                 const char * const m = d1;
4859                                 while (*d1 && !isSPACE(*d1))
4860                                     d1++;
4861                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4862                                       (int)(d1 - m), m);
4863                             }
4864                             d1 = moreswitches(d1);
4865                         } while (d1);
4866                         if (PL_doswitches && !switches_done) {
4867                             int argc = PL_origargc;
4868                             char **argv = PL_origargv;
4869                             do {
4870                                 argc--,argv++;
4871                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4872                             init_argv_symbols(argc,argv);
4873                         }
4874                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4875                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4876                               /* if we have already added "LINE: while (<>) {",
4877                                  we must not do it again */
4878                         {
4879                             sv_setpvs(PL_linestr, "");
4880                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4881                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4882                             PL_last_lop = PL_last_uni = NULL;
4883                             PL_preambled = FALSE;
4884                             if (PERLDB_LINE || PERLDB_SAVESRC)
4885                                 (void)gv_fetchfile(PL_origfilename);
4886                             goto retry;
4887                         }
4888                     }
4889                 }
4890             }
4891         }
4892         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4893             PL_bufptr = s;
4894             PL_lex_state = LEX_FORMLINE;
4895             return yylex();
4896         }
4897         goto retry;
4898     case '\r':
4899 #ifdef PERL_STRICT_CR
4900         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4901         Perl_croak(aTHX_
4902       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4903 #endif
4904     case ' ': case '\t': case '\f': case 013:
4905 #ifdef PERL_MAD
4906         PL_realtokenstart = -1;
4907         if (!PL_thiswhite)
4908             PL_thiswhite = newSVpvs("");
4909         sv_catpvn(PL_thiswhite, s, 1);
4910 #endif
4911         s++;
4912         goto retry;
4913     case '#':
4914     case '\n':
4915 #ifdef PERL_MAD
4916         PL_realtokenstart = -1;
4917         if (PL_madskills)
4918             PL_faketokens = 0;
4919 #endif
4920         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4921             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4922                 /* handle eval qq[#line 1 "foo"\n ...] */
4923                 CopLINE_dec(PL_curcop);
4924                 incline(s);
4925             }
4926             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4927                 s = SKIPSPACE0(s);
4928                 if (!PL_in_eval || PL_rsfp)
4929                     incline(s);
4930             }
4931             else {
4932                 d = s;
4933                 while (d < PL_bufend && *d != '\n')
4934                     d++;
4935                 if (d < PL_bufend)
4936                     d++;
4937                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4938                   Perl_croak(aTHX_ "panic: input overflow");
4939 #ifdef PERL_MAD
4940                 if (PL_madskills)
4941                     PL_thiswhite = newSVpvn(s, d - s);
4942 #endif
4943                 s = d;
4944                 incline(s);
4945             }
4946             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4947                 PL_bufptr = s;
4948                 PL_lex_state = LEX_FORMLINE;
4949                 return yylex();
4950             }
4951         }
4952         else {
4953 #ifdef PERL_MAD
4954             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4955                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4956                     PL_faketokens = 0;
4957                     s = SKIPSPACE0(s);
4958                     TOKEN(PEG); /* make sure any #! line is accessible */
4959                 }
4960                 s = SKIPSPACE0(s);
4961             }
4962             else {
4963 /*              if (PL_madskills && PL_lex_formbrack) { */
4964                     d = s;
4965                     while (d < PL_bufend && *d != '\n')
4966                         d++;
4967                     if (d < PL_bufend)
4968                         d++;
4969                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4970                       Perl_croak(aTHX_ "panic: input overflow");
4971                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4972                         if (!PL_thiswhite)
4973                             PL_thiswhite = newSVpvs("");
4974                         if (CopLINE(PL_curcop) == 1) {
4975                             sv_setpvs(PL_thiswhite, "");
4976                             PL_faketokens = 0;
4977                         }
4978                         sv_catpvn(PL_thiswhite, s, d - s);
4979                     }
4980                     s = d;
4981 /*              }
4982                 *s = '\0';
4983                 PL_bufend = s; */
4984             }
4985 #else
4986             *s = '\0';
4987             PL_bufend = s;
4988 #endif
4989         }
4990         goto retry;
4991     case '-':
4992         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4993             I32 ftst = 0;
4994             char tmp;
4995
4996             s++;
4997             PL_bufptr = s;
4998             tmp = *s++;
4999
5000             while (s < PL_bufend && SPACE_OR_TAB(*s))
5001                 s++;
5002
5003             if (strnEQ(s,"=>",2)) {
5004                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5005                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5006                 OPERATOR('-');          /* unary minus */
5007             }
5008             PL_last_uni = PL_oldbufptr;
5009             switch (tmp) {
5010             case 'r': ftst = OP_FTEREAD;        break;
5011             case 'w': ftst = OP_FTEWRITE;       break;
5012             case 'x': ftst = OP_FTEEXEC;        break;
5013             case 'o': ftst = OP_FTEOWNED;       break;
5014             case 'R': ftst = OP_FTRREAD;        break;
5015             case 'W': ftst = OP_FTRWRITE;       break;
5016             case 'X': ftst = OP_FTREXEC;        break;
5017             case 'O': ftst = OP_FTROWNED;       break;
5018             case 'e': ftst = OP_FTIS;           break;
5019             case 'z': ftst = OP_FTZERO;         break;
5020             case 's': ftst = OP_FTSIZE;         break;
5021             case 'f': ftst = OP_FTFILE;         break;
5022             case 'd': ftst = OP_FTDIR;          break;
5023             case 'l': ftst = OP_FTLINK;         break;
5024             case 'p': ftst = OP_FTPIPE;         break;
5025             case 'S': ftst = OP_FTSOCK;         break;
5026             case 'u': ftst = OP_FTSUID;         break;
5027             case 'g': ftst = OP_FTSGID;         break;
5028             case 'k': ftst = OP_FTSVTX;         break;
5029             case 'b': ftst = OP_FTBLK;          break;
5030             case 'c': ftst = OP_FTCHR;          break;
5031             case 't': ftst = OP_FTTTY;          break;
5032             case 'T': ftst = OP_FTTEXT;         break;
5033             case 'B': ftst = OP_FTBINARY;       break;
5034             case 'M': case 'A': case 'C':
5035                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5036                 switch (tmp) {
5037                 case 'M': ftst = OP_FTMTIME;    break;
5038                 case 'A': ftst = OP_FTATIME;    break;
5039                 case 'C': ftst = OP_FTCTIME;    break;
5040                 default:                        break;
5041                 }
5042                 break;
5043             default:
5044                 break;
5045             }
5046             if (ftst) {
5047                 PL_last_lop_op = (OPCODE)ftst;
5048                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5049                         "### Saw file test %c\n", (int)tmp);
5050                 } );
5051                 FTST(ftst);
5052             }
5053             else {
5054                 /* Assume it was a minus followed by a one-letter named
5055                  * subroutine call (or a -bareword), then. */
5056                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5057                         "### '-%c' looked like a file test but was not\n",
5058                         (int) tmp);
5059                 } );
5060                 s = --PL_bufptr;
5061             }
5062         }
5063         {
5064             const char tmp = *s++;
5065             if (*s == tmp) {
5066                 s++;
5067                 if (PL_expect == XOPERATOR)
5068                     TERM(POSTDEC);
5069                 else
5070                     OPERATOR(PREDEC);
5071             }
5072             else if (*s == '>') {
5073                 s++;
5074                 s = SKIPSPACE1(s);
5075                 if (isIDFIRST_lazy_if(s,UTF)) {
5076                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5077                     TOKEN(ARROW);
5078                 }
5079                 else if (*s == '$')
5080                     OPERATOR(ARROW);
5081                 else
5082                     TERM(ARROW);
5083             }
5084             if (PL_expect == XOPERATOR)
5085                 Aop(OP_SUBTRACT);
5086             else {
5087                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5088                     check_uni();
5089                 OPERATOR('-');          /* unary minus */
5090             }
5091         }
5092
5093     case '+':
5094         {
5095             const char tmp = *s++;
5096             if (*s == tmp) {
5097                 s++;
5098                 if (PL_expect == XOPERATOR)
5099                     TERM(POSTINC);
5100                 else
5101                     OPERATOR(PREINC);
5102             }
5103             if (PL_expect == XOPERATOR)
5104                 Aop(OP_ADD);
5105             else {
5106                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5107                     check_uni();
5108                 OPERATOR('+');
5109             }
5110         }
5111
5112     case '*':
5113         if (PL_expect != XOPERATOR) {
5114             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5115             PL_expect = XOPERATOR;
5116             force_ident(PL_tokenbuf, '*');
5117             if (!*PL_tokenbuf)
5118                 PREREF('*');
5119             TERM('*');
5120         }
5121         s++;
5122         if (*s == '*') {
5123             s++;
5124             PWop(OP_POW);
5125         }
5126         Mop(OP_MULTIPLY);
5127
5128     case '%':
5129         if (PL_expect == XOPERATOR) {
5130             ++s;
5131             Mop(OP_MODULO);
5132         }
5133         PL_tokenbuf[0] = '%';
5134         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5135                 sizeof PL_tokenbuf - 1, FALSE);
5136         if (!PL_tokenbuf[1]) {
5137             PREREF('%');
5138         }
5139         PL_pending_ident = '%';
5140         TERM('%');
5141
5142     case '^':
5143         s++;
5144         BOop(OP_BIT_XOR);
5145     case '[':
5146         PL_lex_brackets++;
5147         {
5148             const char tmp = *s++;
5149             OPERATOR(tmp);
5150         }
5151     case '~':
5152         if (s[1] == '~'
5153             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5154         {
5155             s += 2;
5156             Eop(OP_SMARTMATCH);
5157         }
5158     case ',':
5159         {
5160             const char tmp = *s++;
5161             OPERATOR(tmp);
5162         }
5163     case ':':
5164         if (s[1] == ':') {
5165             len = 0;
5166             goto just_a_word_zero_gv;
5167         }
5168         s++;
5169         switch (PL_expect) {
5170             OP *attrs;
5171 #ifdef PERL_MAD
5172             I32 stuffstart;
5173 #endif
5174         case XOPERATOR:
5175             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5176                 break;
5177             PL_bufptr = s;      /* update in case we back off */
5178             if (*s == '=') {
5179                 deprecate(":= for an empty attribute list");
5180             }
5181             goto grabattrs;
5182         case XATTRBLOCK:
5183             PL_expect = XBLOCK;
5184             goto grabattrs;
5185         case XATTRTERM:
5186             PL_expect = XTERMBLOCK;
5187          grabattrs:
5188 #ifdef PERL_MAD
5189             stuffstart = s - SvPVX(PL_linestr) - 1;
5190 #endif
5191             s = PEEKSPACE(s);
5192             attrs = NULL;
5193             while (isIDFIRST_lazy_if(s,UTF)) {
5194                 I32 tmp;
5195                 SV *sv;
5196                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5197                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5198                     if (tmp < 0) tmp = -tmp;
5199                     switch (tmp) {
5200                     case KEY_or:
5201                     case KEY_and:
5202                     case KEY_for:
5203                     case KEY_foreach:
5204                     case KEY_unless:
5205                     case KEY_if:
5206                     case KEY_while:
5207                     case KEY_until:
5208                         goto got_attrs;
5209                     default:
5210                         break;
5211                     }
5212                 }
5213                 sv = newSVpvn(s, len);
5214                 if (*d == '(') {
5215                     d = scan_str(d,TRUE,TRUE);
5216                     if (!d) {
5217                         /* MUST advance bufptr here to avoid bogus
5218                            "at end of line" context messages from yyerror().
5219                          */
5220                         PL_bufptr = s + len;
5221                         yyerror("Unterminated attribute parameter in attribute list");
5222                         if (attrs)
5223                             op_free(attrs);
5224                         sv_free(sv);
5225                         return REPORT(0);       /* EOF indicator */
5226                     }
5227                 }
5228                 if (PL_lex_stuff) {
5229                     sv_catsv(sv, PL_lex_stuff);
5230                     attrs = append_elem(OP_LIST, attrs,
5231                                         newSVOP(OP_CONST, 0, sv));
5232                     SvREFCNT_dec(PL_lex_stuff);
5233                     PL_lex_stuff = NULL;
5234                 }
5235                 else {
5236                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5237                         sv_free(sv);
5238                         if (PL_in_my == KEY_our) {
5239                             deprecate(":unique");
5240                         }
5241                         else
5242                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5243                     }
5244
5245                     /* NOTE: any CV attrs applied here need to be part of
5246                        the CVf_BUILTIN_ATTRS define in cv.h! */
5247                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5248                         sv_free(sv);
5249                         CvLVALUE_on(PL_compcv);
5250                     }
5251                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5252                         sv_free(sv);
5253                         deprecate(":locked");
5254                     }
5255                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5256                         sv_free(sv);
5257                         CvMETHOD_on(PL_compcv);
5258                     }
5259                     /* After we've set the flags, it could be argued that
5260                        we don't need to do the attributes.pm-based setting
5261                        process, and shouldn't bother appending recognized
5262                        flags.  To experiment with that, uncomment the
5263                        following "else".  (Note that's already been
5264                        uncommented.  That keeps the above-applied built-in
5265                        attributes from being intercepted (and possibly
5266                        rejected) by a package's attribute routines, but is
5267                        justified by the performance win for the common case
5268                        of applying only built-in attributes.) */
5269                     else
5270                         attrs = append_elem(OP_LIST, attrs,
5271                                             newSVOP(OP_CONST, 0,
5272                                                     sv));
5273                 }
5274                 s = PEEKSPACE(d);
5275                 if (*s == ':' && s[1] != ':')
5276                     s = PEEKSPACE(s+1);
5277                 else if (s == d)
5278                     break;      /* require real whitespace or :'s */
5279                 /* XXX losing whitespace on sequential attributes here */
5280             }
5281             {
5282                 const char tmp
5283                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5284                 if (*s != ';' && *s != '}' && *s != tmp
5285                     && (tmp != '=' || *s != ')')) {
5286                     const char q = ((*s == '\'') ? '"' : '\'');
5287                     /* If here for an expression, and parsed no attrs, back
5288                        off. */
5289                     if (tmp == '=' && !attrs) {
5290                         s = PL_bufptr;
5291                         break;
5292                     }
5293                     /* MUST advance bufptr here to avoid bogus "at end of line"
5294                        context messages from yyerror().
5295                     */
5296                     PL_bufptr = s;
5297                     yyerror( (const char *)
5298                              (*s
5299                               ? Perl_form(aTHX_ "Invalid separator character "
5300                                           "%c%c%c in attribute list", q, *s, q)
5301                               : "Unterminated attribute list" ) );
5302                     if (attrs)
5303                         op_free(attrs);
5304                     OPERATOR(':');
5305                 }
5306             }
5307         got_attrs:
5308             if (attrs) {
5309                 start_force(PL_curforce);
5310                 NEXTVAL_NEXTTOKE.opval = attrs;
5311                 CURMAD('_', PL_nextwhite);
5312                 force_next(THING);
5313             }
5314 #ifdef PERL_MAD
5315             if (PL_madskills) {
5316                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5317                                      (s - SvPVX(PL_linestr)) - stuffstart);
5318             }
5319 #endif
5320             TOKEN(COLONATTR);
5321         }
5322         OPERATOR(':');
5323     case '(':
5324         s++;
5325         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5326             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5327         else
5328             PL_expect = XTERM;
5329         s = SKIPSPACE1(s);
5330         TOKEN('(');
5331     case ';':
5332         CLINE;
5333         {
5334             const char tmp = *s++;
5335             OPERATOR(tmp);
5336         }
5337     case ')':
5338         {
5339             const char tmp = *s++;
5340             s = SKIPSPACE1(s);
5341             if (*s == '{')
5342                 PREBLOCK(tmp);
5343             TERM(tmp);
5344         }
5345     case ']':
5346         s++;
5347         if (PL_lex_brackets <= 0)
5348             yyerror("Unmatched right square bracket");
5349         else
5350             --PL_lex_brackets;
5351         if (PL_lex_state == LEX_INTERPNORMAL) {
5352             if (PL_lex_brackets == 0) {
5353                 if (*s == '-' && s[1] == '>')
5354                     PL_lex_state = LEX_INTERPENDMAYBE;
5355                 else if (*s != '[' && *s != '{')
5356                     PL_lex_state = LEX_INTERPEND;
5357             }
5358         }
5359         TERM(']');
5360     case '{':
5361       leftbracket:
5362         s++;
5363         if (PL_lex_brackets > 100) {
5364             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5365         }
5366         switch (PL_expect) {
5367         case XTERM:
5368             if (PL_lex_formbrack) {
5369                 s--;
5370                 PRETERMBLOCK(DO);
5371             }
5372             if (PL_oldoldbufptr == PL_last_lop)
5373                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5374             else
5375                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5376             OPERATOR(HASHBRACK);
5377         case XOPERATOR:
5378             while (s < PL_bufend && SPACE_OR_TAB(*s))
5379                 s++;
5380             d = s;
5381             PL_tokenbuf[0] = '\0';
5382             if (d < PL_bufend && *d == '-') {
5383                 PL_tokenbuf[0] = '-';
5384                 d++;
5385                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5386                     d++;
5387             }
5388             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5389                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5390                               FALSE, &len);
5391                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5392                     d++;
5393                 if (*d == '}') {
5394                     const char minus = (PL_tokenbuf[0] == '-');
5395                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5396                     if (minus)
5397                         force_next('-');
5398                 }
5399             }
5400             /* FALL THROUGH */
5401         case XATTRBLOCK:
5402         case XBLOCK:
5403             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5404             PL_expect = XSTATE;
5405             break;
5406         case XATTRTERM:
5407         case XTERMBLOCK:
5408             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5409             PL_expect = XSTATE;
5410             break;
5411         default: {
5412                 const char *t;
5413                 if (PL_oldoldbufptr == PL_last_lop)
5414                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5415                 else
5416                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5417                 s = SKIPSPACE1(s);
5418                 if (*s == '}') {
5419                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5420                         PL_expect = XTERM;
5421                         /* This hack is to get the ${} in the message. */
5422                         PL_bufptr = s+1;
5423                         yyerror("syntax error");
5424                         break;
5425                     }
5426                     OPERATOR(HASHBRACK);
5427                 }
5428                 /* This hack serves to disambiguate a pair of curlies
5429                  * as being a block or an anon hash.  Normally, expectation
5430                  * determines that, but in cases where we're not in a
5431                  * position to expect anything in particular (like inside
5432                  * eval"") we have to resolve the ambiguity.  This code
5433                  * covers the case where the first term in the curlies is a
5434                  * quoted string.  Most other cases need to be explicitly
5435                  * disambiguated by prepending a "+" before the opening
5436                  * curly in order to force resolution as an anon hash.
5437                  *
5438                  * XXX should probably propagate the outer expectation
5439                  * into eval"" to rely less on this hack, but that could
5440                  * potentially break current behavior of eval"".
5441                  * GSAR 97-07-21
5442                  */
5443                 t = s;
5444                 if (*s == '\'' || *s == '"' || *s == '`') {
5445                     /* common case: get past first string, handling escapes */
5446                     for (t++; t < PL_bufend && *t != *s;)
5447                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5448                             t++;
5449                     t++;
5450                 }
5451                 else if (*s == 'q') {
5452                     if (++t < PL_bufend
5453                         && (!isALNUM(*t)
5454                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5455                                 && !isALNUM(*t))))
5456                     {
5457                         /* skip q//-like construct */
5458                         const char *tmps;
5459                         char open, close, term;
5460                         I32 brackets = 1;
5461
5462                         while (t < PL_bufend && isSPACE(*t))
5463                             t++;
5464                         /* check for q => */
5465                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5466                             OPERATOR(HASHBRACK);
5467                         }
5468                         term = *t;
5469                         open = term;
5470                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5471                             term = tmps[5];
5472                         close = term;
5473                         if (open == close)
5474                             for (t++; t < PL_bufend; t++) {
5475                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5476                                     t++;
5477                                 else if (*t == open)
5478                                     break;
5479                             }
5480                         else {
5481                             for (t++; t < PL_bufend; t++) {
5482                                 if (*t == '\\' && t+1 < PL_bufend)
5483                                     t++;
5484                                 else if (*t == close && --brackets <= 0)
5485                                     break;
5486                                 else if (*t == open)
5487                                     brackets++;
5488                             }
5489                         }
5490                         t++;
5491                     }
5492                     else
5493                         /* skip plain q word */
5494                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5495                              t += UTF8SKIP(t);
5496                 }
5497                 else if (isALNUM_lazy_if(t,UTF)) {
5498                     t += UTF8SKIP(t);
5499                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5500                          t += UTF8SKIP(t);
5501                 }
5502                 while (t < PL_bufend && isSPACE(*t))
5503                     t++;
5504                 /* if comma follows first term, call it an anon hash */
5505                 /* XXX it could be a comma expression with loop modifiers */
5506                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5507                                    || (*t == '=' && t[1] == '>')))
5508                     OPERATOR(HASHBRACK);
5509                 if (PL_expect == XREF)
5510                     PL_expect = XTERM;
5511                 else {
5512                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5513                     PL_expect = XSTATE;
5514                 }
5515             }
5516             break;
5517         }
5518         pl_yylval.ival = CopLINE(PL_curcop);
5519         if (isSPACE(*s) || *s == '#')
5520             PL_copline = NOLINE;   /* invalidate current command line number */
5521         TOKEN('{');
5522     case '}':
5523       rightbracket:
5524         s++;
5525         if (PL_lex_brackets <= 0)
5526             yyerror("Unmatched right curly bracket");
5527         else
5528             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5529         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5530             PL_lex_formbrack = 0;
5531         if (PL_lex_state == LEX_INTERPNORMAL) {
5532             if (PL_lex_brackets == 0) {
5533                 if (PL_expect & XFAKEBRACK) {
5534                     PL_expect &= XENUMMASK;
5535                     PL_lex_state = LEX_INTERPEND;
5536                     PL_bufptr = s;
5537 #if 0
5538                     if (PL_madskills) {
5539                         if (!PL_thiswhite)
5540                             PL_thiswhite = newSVpvs("");
5541                         sv_catpvs(PL_thiswhite,"}");
5542                     }
5543 #endif
5544                     return yylex();     /* ignore fake brackets */
5545                 }
5546                 if (*s == '-' && s[1] == '>')
5547                     PL_lex_state = LEX_INTERPENDMAYBE;
5548                 else if (*s != '[' && *s != '{')
5549                     PL_lex_state = LEX_INTERPEND;
5550             }
5551         }
5552         if (PL_expect & XFAKEBRACK) {
5553             PL_expect &= XENUMMASK;
5554             PL_bufptr = s;
5555             return yylex();             /* ignore fake brackets */
5556         }
5557         start_force(PL_curforce);
5558         if (PL_madskills) {
5559             curmad('X', newSVpvn(s-1,1));
5560             CURMAD('_', PL_thiswhite);
5561         }
5562         force_next('}');
5563 #ifdef PERL_MAD
5564         if (!PL_thistoken)
5565             PL_thistoken = newSVpvs("");
5566 #endif
5567         TOKEN(';');
5568     case '&':
5569         s++;
5570         if (*s++ == '&')
5571             AOPERATOR(ANDAND);
5572         s--;
5573         if (PL_expect == XOPERATOR) {
5574             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5575                 && isIDFIRST_lazy_if(s,UTF))
5576             {
5577                 CopLINE_dec(PL_curcop);
5578                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5579                 CopLINE_inc(PL_curcop);
5580             }
5581             BAop(OP_BIT_AND);
5582         }
5583
5584         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5585         if (*PL_tokenbuf) {
5586             PL_expect = XOPERATOR;
5587             force_ident(PL_tokenbuf, '&');
5588         }
5589         else
5590             PREREF('&');
5591         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5592         TERM('&');
5593
5594     case '|':
5595         s++;
5596         if (*s++ == '|')
5597             AOPERATOR(OROR);
5598         s--;
5599         BOop(OP_BIT_OR);
5600     case '=':
5601         s++;
5602         {
5603             const char tmp = *s++;
5604             if (tmp == '=')
5605                 Eop(OP_EQ);
5606             if (tmp == '>')
5607                 OPERATOR(',');
5608             if (tmp == '~')
5609                 PMop(OP_MATCH);
5610             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5611                 && strchr("+-*/%.^&|<",tmp))
5612                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5613                             "Reversed %c= operator",(int)tmp);
5614             s--;
5615             if (PL_expect == XSTATE && isALPHA(tmp) &&
5616                 (s == PL_linestart+1 || s[-2] == '\n') )
5617                 {
5618                     if (PL_in_eval && !PL_rsfp) {
5619                         d = PL_bufend;
5620                         while (s < d) {
5621                             if (*s++ == '\n') {
5622                                 incline(s);
5623                                 if (strnEQ(s,"=cut",4)) {
5624                                     s = strchr(s,'\n');
5625                                     if (s)
5626                                         s++;
5627                                     else
5628                                         s = d;
5629                                     incline(s);
5630                                     goto retry;
5631                                 }
5632                             }
5633                         }
5634                         goto retry;
5635                     }
5636 #ifdef PERL_MAD
5637                     if (PL_madskills) {
5638                         if (!PL_thiswhite)
5639                             PL_thiswhite = newSVpvs("");
5640                         sv_catpvn(PL_thiswhite, PL_linestart,
5641                                   PL_bufend - PL_linestart);
5642                     }
5643 #endif
5644                     s = PL_bufend;
5645                     PL_doextract = TRUE;
5646                     goto retry;
5647                 }
5648         }
5649         if (PL_lex_brackets < PL_lex_formbrack) {
5650             const char *t = s;
5651 #ifdef PERL_STRICT_CR
5652             while (SPACE_OR_TAB(*t))
5653 #else
5654             while (SPACE_OR_TAB(*t) || *t == '\r')
5655 #endif
5656                 t++;
5657             if (*t == '\n' || *t == '#') {
5658                 s--;
5659                 PL_expect = XBLOCK;
5660                 goto leftbracket;
5661             }
5662         }
5663         pl_yylval.ival = 0;
5664         OPERATOR(ASSIGNOP);
5665     case '!':
5666         s++;
5667         {
5668             const char tmp = *s++;
5669             if (tmp == '=') {
5670                 /* was this !=~ where !~ was meant?
5671                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5672
5673                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5674                     const char *t = s+1;
5675
5676                     while (t < PL_bufend && isSPACE(*t))
5677                         ++t;
5678
5679                     if (*t == '/' || *t == '?' ||
5680                         ((*t == 'm' || *t == 's' || *t == 'y')
5681                          && !isALNUM(t[1])) ||
5682                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5683                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5684                                     "!=~ should be !~");
5685                 }
5686                 Eop(OP_NE);
5687             }
5688             if (tmp == '~')
5689                 PMop(OP_NOT);
5690         }
5691         s--;
5692         OPERATOR('!');
5693     case '<':
5694         if (PL_expect != XOPERATOR) {
5695             if (s[1] != '<' && !strchr(s,'>'))
5696                 check_uni();
5697             if (s[1] == '<')
5698                 s = scan_heredoc(s);
5699             else
5700                 s = scan_inputsymbol(s);
5701             TERM(sublex_start());
5702         }
5703         s++;
5704         {
5705             char tmp = *s++;
5706             if (tmp == '<')
5707                 SHop(OP_LEFT_SHIFT);
5708             if (tmp == '=') {
5709                 tmp = *s++;
5710                 if (tmp == '>')
5711                     Eop(OP_NCMP);
5712                 s--;
5713                 Rop(OP_LE);
5714             }
5715         }
5716         s--;
5717         Rop(OP_LT);
5718     case '>':
5719         s++;
5720         {
5721             const char tmp = *s++;
5722             if (tmp == '>')
5723                 SHop(OP_RIGHT_SHIFT);
5724             else if (tmp == '=')
5725                 Rop(OP_GE);
5726         }
5727         s--;
5728         Rop(OP_GT);
5729
5730     case '$':
5731         CLINE;
5732
5733         if (PL_expect == XOPERATOR) {
5734             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5735                 return deprecate_commaless_var_list();
5736             }
5737         }
5738
5739         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5740             PL_tokenbuf[0] = '@';
5741             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5742                            sizeof PL_tokenbuf - 1, FALSE);
5743             if (PL_expect == XOPERATOR)
5744                 no_op("Array length", s);
5745             if (!PL_tokenbuf[1])
5746                 PREREF(DOLSHARP);
5747             PL_expect = XOPERATOR;
5748             PL_pending_ident = '#';
5749             TOKEN(DOLSHARP);
5750         }
5751
5752         PL_tokenbuf[0] = '$';
5753         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5754                        sizeof PL_tokenbuf - 1, FALSE);
5755         if (PL_expect == XOPERATOR)
5756             no_op("Scalar", s);
5757         if (!PL_tokenbuf[1]) {
5758             if (s == PL_bufend)
5759                 yyerror("Final $ should be \\$ or $name");
5760             PREREF('$');
5761         }
5762
5763         /* This kludge not intended to be bulletproof. */
5764         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5765             pl_yylval.opval = newSVOP(OP_CONST, 0,
5766                                    newSViv(CopARYBASE_get(&PL_compiling)));
5767             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5768             TERM(THING);
5769         }
5770
5771         d = s;
5772         {
5773             const char tmp = *s;
5774             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5775                 s = SKIPSPACE1(s);
5776
5777             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5778                 && intuit_more(s)) {
5779                 if (*s == '[') {
5780                     PL_tokenbuf[0] = '@';
5781                     if (ckWARN(WARN_SYNTAX)) {
5782                         char *t = s+1;
5783
5784                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5785                             t++;
5786                         if (*t++ == ',') {
5787                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5788                             while (t < PL_bufend && *t != ']')
5789                                 t++;
5790                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5791                                         "Multidimensional syntax %.*s not supported",
5792                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5793                         }
5794                     }
5795                 }
5796                 else if (*s == '{') {
5797                     char *t;
5798                     PL_tokenbuf[0] = '%';
5799                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5800                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5801                         {
5802                             char tmpbuf[sizeof PL_tokenbuf];
5803                             do {
5804                                 t++;
5805                             } while (isSPACE(*t));
5806                             if (isIDFIRST_lazy_if(t,UTF)) {
5807                                 STRLEN len;
5808                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5809                                               &len);
5810                                 while (isSPACE(*t))
5811                                     t++;
5812                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5813                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5814                                                 "You need to quote \"%s\"",
5815                                                 tmpbuf);
5816                             }
5817                         }
5818                 }
5819             }
5820
5821             PL_expect = XOPERATOR;
5822             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5823                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5824                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5825                     PL_expect = XOPERATOR;
5826                 else if (strchr("$@\"'`q", *s))
5827                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5828                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5829                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5830                 else if (isIDFIRST_lazy_if(s,UTF)) {
5831                     char tmpbuf[sizeof PL_tokenbuf];
5832                     int t2;
5833                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5834                     if ((t2 = keyword(tmpbuf, len, 0))) {
5835                         /* binary operators exclude handle interpretations */
5836                         switch (t2) {
5837                         case -KEY_x:
5838                         case -KEY_eq:
5839                         case -KEY_ne:
5840                         case -KEY_gt:
5841                         case -KEY_lt:
5842                         case -KEY_ge:
5843                         case -KEY_le:
5844                         case -KEY_cmp:
5845                             break;
5846                         default:
5847                             PL_expect = XTERM;  /* e.g. print $fh length() */
5848                             break;
5849                         }
5850                     }
5851                     else {
5852                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5853                     }
5854                 }
5855                 else if (isDIGIT(*s))
5856                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5857                 else if (*s == '.' && isDIGIT(s[1]))
5858                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5859                 else if ((*s == '?' || *s == '-' || *s == '+')
5860                          && !isSPACE(s[1]) && s[1] != '=')
5861                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5862                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5863                          && s[1] != '/')
5864                     PL_expect = XTERM;          /* e.g. print $fh /.../
5865                                                    XXX except DORDOR operator
5866                                                 */
5867                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5868                          && s[2] != '=')
5869                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5870             }
5871         }
5872         PL_pending_ident = '$';
5873         TOKEN('$');
5874
5875     case '@':
5876         if (PL_expect == XOPERATOR)
5877             no_op("Array", s);
5878         PL_tokenbuf[0] = '@';
5879         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5880         if (!PL_tokenbuf[1]) {
5881             PREREF('@');
5882         }
5883         if (PL_lex_state == LEX_NORMAL)
5884             s = SKIPSPACE1(s);
5885         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5886             if (*s == '{')
5887                 PL_tokenbuf[0] = '%';
5888
5889             /* Warn about @ where they meant $. */
5890             if (*s == '[' || *s == '{') {
5891                 if (ckWARN(WARN_SYNTAX)) {
5892                     const char *t = s + 1;
5893                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5894                         t++;
5895                     if (*t == '}' || *t == ']') {
5896                         t++;
5897                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5898                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5899                             "Scalar value %.*s better written as $%.*s",
5900                             (int)(t-PL_bufptr), PL_bufptr,
5901                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5902                     }
5903                 }
5904             }
5905         }
5906         PL_pending_ident = '@';
5907         TERM('@');
5908
5909      case '/':                  /* may be division, defined-or, or pattern */
5910         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5911             s += 2;
5912             AOPERATOR(DORDOR);
5913         }
5914      case '?':                  /* may either be conditional or pattern */
5915         if (PL_expect == XOPERATOR) {
5916              char tmp = *s++;
5917              if(tmp == '?') {
5918                 OPERATOR('?');
5919              }
5920              else {
5921                  tmp = *s++;
5922                  if(tmp == '/') {
5923                      /* A // operator. */
5924                     AOPERATOR(DORDOR);
5925                  }
5926                  else {
5927                      s--;
5928                      Mop(OP_DIVIDE);
5929                  }
5930              }
5931          }
5932          else {
5933              /* Disable warning on "study /blah/" */
5934              if (PL_oldoldbufptr == PL_last_uni
5935               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5936                   || memNE(PL_last_uni, "study", 5)
5937                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5938               ))
5939                  check_uni();
5940              s = scan_pat(s,OP_MATCH);
5941              TERM(sublex_start());
5942          }
5943
5944     case '.':
5945         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5946 #ifdef PERL_STRICT_CR
5947             && s[1] == '\n'
5948 #else
5949             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5950 #endif
5951             && (s == PL_linestart || s[-1] == '\n') )
5952         {
5953             PL_lex_formbrack = 0;
5954             PL_expect = XSTATE;
5955             goto rightbracket;
5956         }
5957         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5958             s += 3;
5959             OPERATOR(YADAYADA);
5960         }
5961         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5962             char tmp = *s++;
5963             if (*s == tmp) {
5964                 s++;
5965                 if (*s == tmp) {
5966                     s++;
5967                     pl_yylval.ival = OPf_SPECIAL;
5968                 }
5969                 else
5970                     pl_yylval.ival = 0;
5971                 OPERATOR(DOTDOT);
5972             }
5973             Aop(OP_CONCAT);
5974         }
5975         /* FALL THROUGH */
5976     case '0': case '1': case '2': case '3': case '4':
5977     case '5': case '6': case '7': case '8': case '9':
5978         s = scan_num(s, &pl_yylval);
5979         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5980         if (PL_expect == XOPERATOR)
5981             no_op("Number",s);
5982         TERM(THING);
5983
5984     case '\'':
5985         s = scan_str(s,!!PL_madskills,FALSE);
5986         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5987         if (PL_expect == XOPERATOR) {
5988             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5989                 return deprecate_commaless_var_list();
5990             }
5991             else
5992                 no_op("String",s);
5993         }
5994         if (!s)
5995             missingterm(NULL);
5996         pl_yylval.ival = OP_CONST;
5997         TERM(sublex_start());
5998
5999     case '"':
6000         s = scan_str(s,!!PL_madskills,FALSE);
6001         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6002         if (PL_expect == XOPERATOR) {
6003             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6004                 return deprecate_commaless_var_list();
6005             }
6006             else
6007                 no_op("String",s);
6008         }
6009         if (!s)
6010             missingterm(NULL);
6011         pl_yylval.ival = OP_CONST;
6012         /* FIXME. I think that this can be const if char *d is replaced by
6013            more localised variables.  */
6014         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6015             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6016                 pl_yylval.ival = OP_STRINGIFY;
6017                 break;
6018             }
6019         }
6020         TERM(sublex_start());
6021
6022     case '`':
6023         s = scan_str(s,!!PL_madskills,FALSE);
6024         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6025         if (PL_expect == XOPERATOR)
6026             no_op("Backticks",s);
6027         if (!s)
6028             missingterm(NULL);
6029         readpipe_override();
6030         TERM(sublex_start());
6031
6032     case '\\':
6033         s++;
6034         if (PL_lex_inwhat && isDIGIT(*s))
6035             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6036                            *s, *s);
6037         if (PL_expect == XOPERATOR)
6038             no_op("Backslash",s);
6039         OPERATOR(REFGEN);
6040
6041     case 'v':
6042         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6043             char *start = s + 2;
6044             while (isDIGIT(*start) || *start == '_')
6045                 start++;
6046             if (*start == '.' && isDIGIT(start[1])) {
6047                 s = scan_num(s, &pl_yylval);
6048                 TERM(THING);
6049             }
6050             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6051             else if (!isALPHA(*start) && (PL_expect == XTERM
6052                         || PL_expect == XREF || PL_expect == XSTATE
6053                         || PL_expect == XTERMORDORDOR)) {
6054                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6055                 if (!gv) {
6056                     s = scan_num(s, &pl_yylval);
6057                     TERM(THING);
6058                 }
6059             }
6060         }
6061         goto keylookup;
6062     case 'x':
6063         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6064             s++;
6065             Mop(OP_REPEAT);
6066         }
6067         goto keylookup;
6068
6069     case '_':
6070     case 'a': case 'A':
6071     case 'b': case 'B':
6072     case 'c': case 'C':
6073     case 'd': case 'D':
6074     case 'e': case 'E':
6075     case 'f': case 'F':
6076     case 'g': case 'G':
6077     case 'h': case 'H':
6078     case 'i': case 'I':
6079     case 'j': case 'J':
6080     case 'k': case 'K':
6081     case 'l': case 'L':
6082     case 'm': case 'M':
6083     case 'n': case 'N':
6084     case 'o': case 'O':
6085     case 'p': case 'P':
6086     case 'q': case 'Q':
6087     case 'r': case 'R':
6088     case 's': case 'S':
6089     case 't': case 'T':
6090     case 'u': case 'U':
6091               case 'V':
6092     case 'w': case 'W':
6093               case 'X':
6094     case 'y': case 'Y':
6095     case 'z': case 'Z':
6096
6097       keylookup: {
6098         bool anydelim;
6099         I32 tmp;
6100
6101         orig_keyword = 0;
6102         gv = NULL;
6103         gvp = NULL;
6104
6105         PL_bufptr = s;
6106         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6107
6108         /* Some keywords can be followed by any delimiter, including ':' */
6109         anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
6110                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6111                              (PL_tokenbuf[0] == 'q' &&
6112                               strchr("qwxr", PL_tokenbuf[1])))));
6113
6114         /* x::* is just a word, unless x is "CORE" */
6115         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6116             goto just_a_word;
6117
6118         d = s;
6119         while (d < PL_bufend && isSPACE(*d))
6120                 d++;    /* no comments skipped here, or s### is misparsed */
6121
6122         /* Is this a word before a => operator? */
6123         if (*d == '=' && d[1] == '>') {
6124             CLINE;
6125             pl_yylval.opval
6126                 = (OP*)newSVOP(OP_CONST, 0,
6127                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6128             pl_yylval.opval->op_private = OPpCONST_BARE;
6129             TERM(WORD);
6130         }
6131
6132         /* Check for plugged-in keyword */
6133         {
6134             OP *o;
6135             int result;
6136             char *saved_bufptr = PL_bufptr;
6137             PL_bufptr = s;
6138             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6139             s = PL_bufptr;
6140             if (result == KEYWORD_PLUGIN_DECLINE) {
6141                 /* not a plugged-in keyword */
6142                 PL_bufptr = saved_bufptr;
6143             } else if (result == KEYWORD_PLUGIN_STMT) {
6144                 pl_yylval.opval = o;
6145                 CLINE;
6146                 PL_expect = XSTATE;
6147                 return REPORT(PLUGSTMT);
6148             } else if (result == KEYWORD_PLUGIN_EXPR) {
6149                 pl_yylval.opval = o;
6150                 CLINE;
6151                 PL_expect = XOPERATOR;
6152                 return REPORT(PLUGEXPR);
6153             } else {
6154                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6155                                         PL_tokenbuf);
6156             }
6157         }
6158
6159         /* Check for built-in keyword */
6160         tmp = keyword(PL_tokenbuf, len, 0);
6161
6162         /* Is this a label? */
6163         if (!anydelim && PL_expect == XSTATE
6164               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6165             s = d + 1;
6166             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6167             CLINE;
6168             TOKEN(LABEL);
6169         }
6170
6171         if (tmp < 0) {                  /* second-class keyword? */
6172             GV *ogv = NULL;     /* override (winner) */
6173             GV *hgv = NULL;     /* hidden (loser) */
6174             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6175                 CV *cv;
6176                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6177                     (cv = GvCVu(gv)))
6178                 {
6179                     if (GvIMPORTED_CV(gv))
6180                         ogv = gv;
6181                     else if (! CvMETHOD(cv))
6182                         hgv = gv;
6183                 }
6184                 if (!ogv &&
6185                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6186                     (gv = *gvp) && isGV_with_GP(gv) &&
6187                     GvCVu(gv) && GvIMPORTED_CV(gv))
6188                 {
6189                     ogv = gv;
6190                 }
6191             }
6192             if (ogv) {
6193                 orig_keyword = tmp;
6194                 tmp = 0;                /* overridden by import or by GLOBAL */
6195             }
6196             else if (gv && !gvp
6197                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6198                      && GvCVu(gv))
6199             {
6200                 tmp = 0;                /* any sub overrides "weak" keyword */
6201             }
6202             else {                      /* no override */
6203                 tmp = -tmp;
6204                 if (tmp == KEY_dump) {
6205                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6206                                    "dump() better written as CORE::dump()");
6207                 }
6208                 gv = NULL;
6209                 gvp = 0;
6210                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6211                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6212                                    "Ambiguous call resolved as CORE::%s(), "
6213                                    "qualify as such or use &",
6214                                    GvENAME(hgv));
6215             }
6216         }
6217
6218       reserved_word:
6219         switch (tmp) {
6220
6221         default:                        /* not a keyword */
6222             /* Trade off - by using this evil construction we can pull the
6223                variable gv into the block labelled keylookup. If not, then
6224                we have to give it function scope so that the goto from the
6225                earlier ':' case doesn't bypass the initialisation.  */
6226             if (0) {
6227             just_a_word_zero_gv:
6228                 gv = NULL;
6229                 gvp = NULL;
6230                 orig_keyword = 0;
6231             }
6232           just_a_word: {
6233                 SV *sv;
6234                 int pkgname = 0;
6235                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6236                 OP *rv2cv_op;
6237                 CV *cv;
6238 #ifdef PERL_MAD
6239                 SV *nextPL_nextwhite = 0;
6240 #endif
6241
6242
6243                 /* Get the rest if it looks like a package qualifier */
6244
6245                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6246                     STRLEN morelen;
6247                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6248                                   TRUE, &morelen);
6249                     if (!morelen)
6250                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6251                                 *s == '\'' ? "'" : "::");
6252                     len += morelen;
6253                     pkgname = 1;
6254                 }
6255
6256                 if (PL_expect == XOPERATOR) {
6257                     if (PL_bufptr == PL_linestart) {
6258                         CopLINE_dec(PL_curcop);
6259                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6260                         CopLINE_inc(PL_curcop);
6261                     }
6262                     else
6263                         no_op("Bareword",s);
6264                 }
6265
6266                 /* Look for a subroutine with this name in current package,
6267                    unless name is "Foo::", in which case Foo is a bearword
6268                    (and a package name). */
6269
6270                 if (len > 2 && !PL_madskills &&
6271                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6272                 {
6273                     if (ckWARN(WARN_BAREWORD)
6274                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6275                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6276                             "Bareword \"%s\" refers to nonexistent package",
6277                              PL_tokenbuf);
6278                     len -= 2;
6279                     PL_tokenbuf[len] = '\0';
6280                     gv = NULL;
6281                     gvp = 0;
6282                 }
6283                 else {
6284                     if (!gv) {
6285                         /* Mustn't actually add anything to a symbol table.
6286                            But also don't want to "initialise" any placeholder
6287                            constants that might already be there into full
6288                            blown PVGVs with attached PVCV.  */
6289                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6290                                                GV_NOADD_NOINIT, SVt_PVCV);
6291                     }
6292                     len = 0;
6293                 }
6294
6295                 /* if we saw a global override before, get the right name */
6296
6297                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6298                     len ? len : strlen(PL_tokenbuf));
6299                 if (gvp) {
6300                     SV * const tmp_sv = sv;
6301                     sv = newSVpvs("CORE::GLOBAL::");
6302                     sv_catsv(sv, tmp_sv);
6303                     SvREFCNT_dec(tmp_sv);
6304                 }
6305
6306 #ifdef PERL_MAD
6307                 if (PL_madskills && !PL_thistoken) {
6308                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6309                     PL_thistoken = newSVpvn(start,s - start);
6310                     PL_realtokenstart = s - SvPVX(PL_linestr);
6311                 }
6312 #endif
6313
6314                 /* Presume this is going to be a bareword of some sort. */
6315                 CLINE;
6316                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6317                 pl_yylval.opval->op_private = OPpCONST_BARE;
6318
6319                 /* And if "Foo::", then that's what it certainly is. */
6320                 if (len)
6321                     goto safe_bareword;
6322
6323                 cv = NULL;
6324                 {
6325                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6326                     const_op->op_private = OPpCONST_BARE;
6327                     rv2cv_op = newCVREF(0, const_op);
6328                 }
6329                 if (rv2cv_op->op_type == OP_RV2CV &&
6330                         (rv2cv_op->op_flags & OPf_KIDS)) {
6331                     OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6332                     switch (rv_op->op_type) {
6333                         case OP_CONST: {
6334                             SV *sv = cSVOPx_sv(rv_op);
6335                             if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6336                                 cv = (CV*)SvRV(sv);
6337                         } break;
6338                         case OP_GV: {
6339                             GV *gv = cGVOPx_gv(rv_op);
6340                             CV *maybe_cv = GvCVu(gv);
6341                             if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6342                                 cv = maybe_cv;
6343                         } break;
6344                     }
6345                 }
6346
6347                 /* See if it's the indirect object for a list operator. */
6348
6349                 if (PL_oldoldbufptr &&
6350                     PL_oldoldbufptr < PL_bufptr &&
6351                     (PL_oldoldbufptr == PL_last_lop
6352                      || PL_oldoldbufptr == PL_last_uni) &&
6353                     /* NO SKIPSPACE BEFORE HERE! */
6354                     (PL_expect == XREF ||
6355                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6356                 {
6357                     bool immediate_paren = *s == '(';
6358
6359                     /* (Now we can afford to cross potential line boundary.) */
6360                     s = SKIPSPACE2(s,nextPL_nextwhite);
6361 #ifdef PERL_MAD
6362                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6363 #endif
6364
6365                     /* Two barewords in a row may indicate method call. */
6366
6367                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6368                         (tmp = intuit_method(s, gv, cv))) {
6369                         op_free(rv2cv_op);
6370                         return REPORT(tmp);
6371                     }
6372
6373                     /* If not a declared subroutine, it's an indirect object. */
6374                     /* (But it's an indir obj regardless for sort.) */
6375                     /* Also, if "_" follows a filetest operator, it's a bareword */
6376
6377                     if (
6378                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6379                          (!cv &&
6380                         (PL_last_lop_op != OP_MAPSTART &&
6381                          PL_last_lop_op != OP_GREPSTART))))
6382                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6383                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6384                        )
6385                     {
6386                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6387                         goto bareword;
6388                     }
6389                 }
6390
6391                 PL_expect = XOPERATOR;
6392 #ifdef PERL_MAD
6393                 if (isSPACE(*s))
6394                     s = SKIPSPACE2(s,nextPL_nextwhite);
6395                 PL_nextwhite = nextPL_nextwhite;
6396 #else
6397                 s = skipspace(s);
6398 #endif
6399
6400                 /* Is this a word before a => operator? */
6401                 if (*s == '=' && s[1] == '>' && !pkgname) {
6402                     op_free(rv2cv_op);
6403                     CLINE;
6404                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6405                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6406                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6407                     TERM(WORD);
6408                 }
6409
6410                 /* If followed by a paren, it's certainly a subroutine. */
6411                 if (*s == '(') {
6412                     CLINE;
6413                     if (cv) {
6414                         d = s + 1;
6415                         while (SPACE_OR_TAB(*d))
6416                             d++;
6417                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6418                             s = d + 1;
6419                             goto its_constant;
6420                         }
6421                     }
6422 #ifdef PERL_MAD
6423                     if (PL_madskills) {
6424                         PL_nextwhite = PL_thiswhite;
6425                         PL_thiswhite = 0;
6426                     }
6427                     start_force(PL_curforce);
6428 #endif
6429                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6430                     PL_expect = XOPERATOR;
6431 #ifdef PERL_MAD
6432                     if (PL_madskills) {
6433                         PL_nextwhite = nextPL_nextwhite;
6434                         curmad('X', PL_thistoken);
6435                         PL_thistoken = newSVpvs("");
6436                     }
6437 #endif
6438                     op_free(rv2cv_op);
6439                     force_next(WORD);
6440                     pl_yylval.ival = 0;
6441                     TOKEN('&');
6442                 }
6443
6444                 /* If followed by var or block, call it a method (unless sub) */
6445
6446                 if ((*s == '$' || *s == '{') && !cv) {
6447                     op_free(rv2cv_op);
6448                     PL_last_lop = PL_oldbufptr;
6449                     PL_last_lop_op = OP_METHOD;
6450                     PREBLOCK(METHOD);
6451                 }
6452
6453                 /* If followed by a bareword, see if it looks like indir obj. */
6454
6455                 if (!orig_keyword
6456                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6457                         && (tmp = intuit_method(s, gv, cv))) {
6458                     op_free(rv2cv_op);
6459                     return REPORT(tmp);
6460                 }
6461
6462                 /* Not a method, so call it a subroutine (if defined) */
6463
6464                 if (cv) {
6465                     if (lastchar == '-')
6466                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6467                                          "Ambiguous use of -%s resolved as -&%s()",
6468                                          PL_tokenbuf, PL_tokenbuf);
6469                     /* Check for a constant sub */
6470                     if ((sv = cv_const_sv(cv))) {
6471                   its_constant:
6472                         op_free(rv2cv_op);
6473                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6474                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6475                         pl_yylval.opval->op_private = 0;
6476                         TOKEN(WORD);
6477                     }
6478
6479                     op_free(pl_yylval.opval);
6480                     pl_yylval.opval = rv2cv_op;
6481                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6482                     PL_last_lop = PL_oldbufptr;
6483                     PL_last_lop_op = OP_ENTERSUB;
6484                     /* Is there a prototype? */
6485                     if (
6486 #ifdef PERL_MAD
6487                         cv &&
6488 #endif
6489                         SvPOK(cv))
6490                     {
6491                         STRLEN protolen;
6492                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6493                         if (!protolen)
6494                             TERM(FUNC0SUB);
6495                         while (*proto == ';')
6496                             proto++;
6497                         if (
6498                             (
6499                                 (
6500                                     *proto == '$' || *proto == '_'
6501                                  || *proto == '*'
6502                                 )
6503                              && proto[1] == '\0'
6504                             )
6505                          || (
6506                              *proto == '\\' && proto[1] && proto[2] == '\0'
6507                             )
6508                         )
6509                             OPERATOR(UNIOPSUB);
6510                         if (*proto == '\\' && proto[1] == '[') {
6511                             const char *p = proto + 2;
6512                             while(*p && *p != ']')
6513                                 ++p;
6514                             if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6515                         }
6516                         if (*proto == '&' && *s == '{') {
6517                             if (PL_curstash)
6518                                 sv_setpvs(PL_subname, "__ANON__");
6519                             else
6520                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6521                             PREBLOCK(LSTOPSUB);
6522                         }
6523                     }
6524 #ifdef PERL_MAD
6525                     {
6526                         if (PL_madskills) {
6527                             PL_nextwhite = PL_thiswhite;
6528                             PL_thiswhite = 0;
6529                         }
6530                         start_force(PL_curforce);
6531                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6532                         PL_expect = XTERM;
6533                         if (PL_madskills) {
6534                             PL_nextwhite = nextPL_nextwhite;
6535                             curmad('X', PL_thistoken);
6536                             PL_thistoken = newSVpvs("");
6537                         }
6538                         force_next(WORD);
6539                         TOKEN(NOAMP);
6540                     }
6541                 }
6542
6543                 /* Guess harder when madskills require "best effort". */
6544                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6545                     int probable_sub = 0;
6546                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6547                         probable_sub = 1;
6548                     else if (isALPHA(*s)) {
6549                         char tmpbuf[1024];
6550                         STRLEN tmplen;
6551                         d = s;
6552                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6553                         if (!keyword(tmpbuf, tmplen, 0))
6554                             probable_sub = 1;
6555                         else {
6556                             while (d < PL_bufend && isSPACE(*d))
6557                                 d++;
6558                             if (*d == '=' && d[1] == '>')
6559                                 probable_sub = 1;
6560                         }
6561                     }
6562                     if (probable_sub) {
6563                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6564                         op_free(pl_yylval.opval);
6565                         pl_yylval.opval = rv2cv_op;
6566                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6567                         PL_last_lop = PL_oldbufptr;
6568                         PL_last_lop_op = OP_ENTERSUB;
6569                         PL_nextwhite = PL_thiswhite;
6570                         PL_thiswhite = 0;
6571                         start_force(PL_curforce);
6572                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6573                         PL_expect = XTERM;
6574                         PL_nextwhite = nextPL_nextwhite;
6575                         curmad('X', PL_thistoken);
6576                         PL_thistoken = newSVpvs("");
6577                         force_next(WORD);
6578                         TOKEN(NOAMP);
6579                     }
6580 #else
6581                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6582                     PL_expect = XTERM;
6583                     force_next(WORD);
6584                     TOKEN(NOAMP);
6585 #endif
6586                 }
6587
6588                 /* Call it a bare word */
6589
6590                 if (PL_hints & HINT_STRICT_SUBS)
6591                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6592                 else {
6593                 bareword:
6594                     /* after "print" and similar functions (corresponding to
6595                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6596                      * a filehandle should be subject to "strict subs".
6597                      * Likewise for the optional indirect-object argument to system
6598                      * or exec, which can't be a bareword */
6599                     if ((PL_last_lop_op == OP_PRINT
6600                             || PL_last_lop_op == OP_PRTF
6601                             || PL_last_lop_op == OP_SAY
6602                             || PL_last_lop_op == OP_SYSTEM
6603                             || PL_last_lop_op == OP_EXEC)
6604                             && (PL_hints & HINT_STRICT_SUBS))
6605                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6606                     if (lastchar != '-') {
6607                         if (ckWARN(WARN_RESERVED)) {
6608                             d = PL_tokenbuf;
6609                             while (isLOWER(*d))
6610                                 d++;
6611                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6612                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6613                                        PL_tokenbuf);
6614                         }
6615                     }
6616                 }
6617                 op_free(rv2cv_op);
6618
6619             safe_bareword:
6620                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6621                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6622                                      "Operator or semicolon missing before %c%s",
6623                                      lastchar, PL_tokenbuf);
6624                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6625                                      "Ambiguous use of %c resolved as operator %c",
6626                                      lastchar, lastchar);
6627                 }
6628                 TOKEN(WORD);
6629             }
6630
6631         case KEY___FILE__:
6632             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6633                                         newSVpv(CopFILE(PL_curcop),0));
6634             TERM(THING);
6635
6636         case KEY___LINE__:
6637             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6638                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6639             TERM(THING);
6640
6641         case KEY___PACKAGE__:
6642             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6643                                         (PL_curstash
6644                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6645                                          : &PL_sv_undef));
6646             TERM(THING);
6647
6648         case KEY___DATA__:
6649         case KEY___END__: {
6650             GV *gv;
6651             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6652                 const char *pname = "main";
6653                 if (PL_tokenbuf[2] == 'D')
6654                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6655                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6656                                 SVt_PVIO);
6657                 GvMULTI_on(gv);
6658                 if (!GvIO(gv))
6659                     GvIOp(gv) = newIO();
6660                 IoIFP(GvIOp(gv)) = PL_rsfp;
6661 #if defined(HAS_FCNTL) && defined(F_SETFD)
6662                 {
6663                     const int fd = PerlIO_fileno(PL_rsfp);
6664                     fcntl(fd,F_SETFD,fd >= 3);
6665                 }
6666 #endif
6667                 /* Mark this internal pseudo-handle as clean */
6668                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6669                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6670                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6671                 else
6672                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6673 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6674                 /* if the script was opened in binmode, we need to revert
6675                  * it to text mode for compatibility; but only iff it has CRs
6676                  * XXX this is a questionable hack at best. */
6677                 if (PL_bufend-PL_bufptr > 2
6678                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6679                 {
6680                     Off_t loc = 0;
6681                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6682                         loc = PerlIO_tell(PL_rsfp);
6683                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6684                     }
6685 #ifdef NETWARE
6686                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6687 #else
6688                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6689 #endif  /* NETWARE */
6690 #ifdef PERLIO_IS_STDIO /* really? */
6691 #  if defined(__BORLANDC__)
6692                         /* XXX see note in do_binmode() */
6693                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6694 #  endif
6695 #endif
6696                         if (loc > 0)
6697                             PerlIO_seek(PL_rsfp, loc, 0);
6698                     }
6699                 }
6700 #endif
6701 #ifdef PERLIO_LAYERS
6702                 if (!IN_BYTES) {
6703                     if (UTF)
6704                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6705                     else if (PL_encoding) {
6706                         SV *name;
6707                         dSP;
6708                         ENTER;
6709                         SAVETMPS;
6710                         PUSHMARK(sp);
6711                         EXTEND(SP, 1);
6712                         XPUSHs(PL_encoding);
6713                         PUTBACK;
6714                         call_method("name", G_SCALAR);
6715                         SPAGAIN;
6716                         name = POPs;
6717                         PUTBACK;
6718                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6719                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6720                                                       SVfARG(name)));
6721                         FREETMPS;
6722                         LEAVE;
6723                     }
6724                 }
6725 #endif
6726 #ifdef PERL_MAD
6727                 if (PL_madskills) {
6728                     if (PL_realtokenstart >= 0) {
6729                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6730                         if (!PL_endwhite)
6731                             PL_endwhite = newSVpvs("");
6732                         sv_catsv(PL_endwhite, PL_thiswhite);
6733                         PL_thiswhite = 0;
6734                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6735                         PL_realtokenstart = -1;
6736                     }
6737                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6738                            != NULL) ;
6739                 }
6740 #endif
6741                 PL_rsfp = NULL;
6742             }
6743             goto fake_eof;
6744         }
6745
6746         case KEY_AUTOLOAD:
6747         case KEY_DESTROY:
6748         case KEY_BEGIN:
6749         case KEY_UNITCHECK:
6750         case KEY_CHECK:
6751         case KEY_INIT:
6752         case KEY_END:
6753             if (PL_expect == XSTATE) {
6754                 s = PL_bufptr;
6755                 goto really_sub;
6756             }
6757             goto just_a_word;
6758
6759         case KEY_CORE:
6760             if (*s == ':' && s[1] == ':') {
6761                 s += 2;
6762                 d = s;
6763                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6764                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6765                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6766                 if (tmp < 0)
6767                     tmp = -tmp;
6768                 else if (tmp == KEY_require || tmp == KEY_do)
6769                     /* that's a way to remember we saw "CORE::" */
6770                     orig_keyword = tmp;
6771                 goto reserved_word;
6772             }
6773             goto just_a_word;
6774
6775         case KEY_abs:
6776             UNI(OP_ABS);
6777
6778         case KEY_alarm:
6779             UNI(OP_ALARM);
6780
6781         case KEY_accept:
6782             LOP(OP_ACCEPT,XTERM);
6783
6784         case KEY_and:
6785             OPERATOR(ANDOP);
6786
6787         case KEY_atan2:
6788             LOP(OP_ATAN2,XTERM);
6789
6790         case KEY_bind:
6791             LOP(OP_BIND,XTERM);
6792
6793         case KEY_binmode:
6794             LOP(OP_BINMODE,XTERM);
6795
6796         case KEY_bless:
6797             LOP(OP_BLESS,XTERM);
6798
6799         case KEY_break:
6800             FUN0(OP_BREAK);
6801
6802         case KEY_chop:
6803             UNI(OP_CHOP);
6804
6805         case KEY_continue:
6806             /* When 'use switch' is in effect, continue has a dual
6807                life as a control operator. */
6808             {
6809                 if (!FEATURE_IS_ENABLED("switch"))
6810                     PREBLOCK(CONTINUE);
6811                 else {
6812                     /* We have to disambiguate the two senses of
6813                       "continue". If the next token is a '{' then
6814                       treat it as the start of a continue block;
6815                       otherwise treat it as a control operator.
6816                      */
6817                     s = skipspace(s);
6818                     if (*s == '{')
6819             PREBLOCK(CONTINUE);
6820                     else
6821                         FUN0(OP_CONTINUE);
6822                 }
6823             }
6824
6825         case KEY_chdir:
6826             /* may use HOME */
6827             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6828             UNI(OP_CHDIR);
6829
6830         case KEY_close:
6831             UNI(OP_CLOSE);
6832
6833         case KEY_closedir:
6834             UNI(OP_CLOSEDIR);
6835
6836         case KEY_cmp:
6837             Eop(OP_SCMP);
6838
6839         case KEY_caller:
6840             UNI(OP_CALLER);
6841
6842         case KEY_crypt:
6843 #ifdef FCRYPT
6844             if (!PL_cryptseen) {
6845                 PL_cryptseen = TRUE;
6846                 init_des();
6847             }
6848 #endif
6849             LOP(OP_CRYPT,XTERM);
6850
6851         case KEY_chmod:
6852             LOP(OP_CHMOD,XTERM);
6853
6854         case KEY_chown:
6855             LOP(OP_CHOWN,XTERM);
6856
6857         case KEY_connect:
6858             LOP(OP_CONNECT,XTERM);
6859
6860         case KEY_chr:
6861             UNI(OP_CHR);
6862
6863         case KEY_cos:
6864             UNI(OP_COS);
6865
6866         case KEY_chroot:
6867             UNI(OP_CHROOT);
6868
6869         case KEY_default:
6870             PREBLOCK(DEFAULT);
6871
6872         case KEY_do:
6873             s = SKIPSPACE1(s);
6874             if (*s == '{')
6875                 PRETERMBLOCK(DO);
6876             if (*s != '\'')
6877                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6878             if (orig_keyword == KEY_do) {
6879                 orig_keyword = 0;
6880                 pl_yylval.ival = 1;
6881             }
6882             else
6883                 pl_yylval.ival = 0;
6884             OPERATOR(DO);
6885
6886         case KEY_die:
6887             PL_hints |= HINT_BLOCK_SCOPE;
6888             LOP(OP_DIE,XTERM);
6889
6890         case KEY_defined:
6891             UNI(OP_DEFINED);
6892
6893         case KEY_delete:
6894             UNI(OP_DELETE);
6895
6896         case KEY_dbmopen:
6897             gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6898             LOP(OP_DBMOPEN,XTERM);
6899
6900         case KEY_dbmclose:
6901             UNI(OP_DBMCLOSE);
6902
6903         case KEY_dump:
6904             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6905             LOOPX(OP_DUMP);
6906
6907         case KEY_else:
6908             PREBLOCK(ELSE);
6909
6910         case KEY_elsif:
6911             pl_yylval.ival = CopLINE(PL_curcop);
6912             OPERATOR(ELSIF);
6913
6914         case KEY_eq:
6915             Eop(OP_SEQ);
6916
6917         case KEY_exists:
6918             UNI(OP_EXISTS);
6919
6920         case KEY_exit:
6921             if (PL_madskills)
6922                 UNI(OP_INT);
6923             UNI(OP_EXIT);
6924
6925         case KEY_eval:
6926             s = SKIPSPACE1(s);
6927             if (*s == '{') { /* block eval */
6928                 PL_expect = XTERMBLOCK;
6929                 UNIBRACK(OP_ENTERTRY);
6930             }
6931             else { /* string eval */
6932                 PL_expect = XTERM;
6933                 UNIBRACK(OP_ENTEREVAL);
6934             }
6935
6936         case KEY_eof:
6937             UNI(OP_EOF);
6938
6939         case KEY_exp:
6940             UNI(OP_EXP);
6941
6942         case KEY_each:
6943             UNI(OP_EACH);
6944
6945         case KEY_exec:
6946             LOP(OP_EXEC,XREF);
6947
6948         case KEY_endhostent:
6949             FUN0(OP_EHOSTENT);
6950
6951         case KEY_endnetent:
6952             FUN0(OP_ENETENT);
6953
6954         case KEY_endservent:
6955             FUN0(OP_ESERVENT);
6956
6957         case KEY_endprotoent:
6958             FUN0(OP_EPROTOENT);
6959
6960         case KEY_endpwent:
6961             FUN0(OP_EPWENT);
6962
6963         case KEY_endgrent:
6964             FUN0(OP_EGRENT);
6965
6966         case KEY_for:
6967         case KEY_foreach:
6968             pl_yylval.ival = CopLINE(PL_curcop);
6969             s = SKIPSPACE1(s);
6970             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6971                 char *p = s;
6972 #ifdef PERL_MAD
6973                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6974 #endif
6975
6976                 if ((PL_bufend - p) >= 3 &&
6977                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6978                     p += 2;
6979                 else if ((PL_bufend - p) >= 4 &&
6980                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6981                     p += 3;
6982                 p = PEEKSPACE(p);
6983                 if (isIDFIRST_lazy_if(p,UTF)) {
6984                     p = scan_ident(p, PL_bufend,
6985                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6986                     p = PEEKSPACE(p);
6987                 }
6988                 if (*p != '$')
6989                     Perl_croak(aTHX_ "Missing $ on loop variable");
6990 #ifdef PERL_MAD
6991                 s = SvPVX(PL_linestr) + soff;
6992 #endif
6993             }
6994             OPERATOR(FOR);
6995
6996         case KEY_formline:
6997             LOP(OP_FORMLINE,XTERM);
6998
6999         case KEY_fork:
7000             FUN0(OP_FORK);
7001
7002         case KEY_fcntl:
7003             LOP(OP_FCNTL,XTERM);
7004
7005         case KEY_fileno:
7006             UNI(OP_FILENO);
7007
7008         case KEY_flock:
7009             LOP(OP_FLOCK,XTERM);
7010
7011         case KEY_gt:
7012             Rop(OP_SGT);
7013
7014         case KEY_ge:
7015             Rop(OP_SGE);
7016
7017         case KEY_grep:
7018             LOP(OP_GREPSTART, XREF);
7019
7020         case KEY_goto:
7021             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7022             LOOPX(OP_GOTO);
7023
7024         case KEY_gmtime:
7025             UNI(OP_GMTIME);
7026
7027         case KEY_getc:
7028             UNIDOR(OP_GETC);
7029
7030         case KEY_getppid:
7031             FUN0(OP_GETPPID);
7032
7033         case KEY_getpgrp:
7034             UNI(OP_GETPGRP);
7035
7036         case KEY_getpriority:
7037             LOP(OP_GETPRIORITY,XTERM);
7038
7039         case KEY_getprotobyname:
7040             UNI(OP_GPBYNAME);
7041
7042         case KEY_getprotobynumber:
7043             LOP(OP_GPBYNUMBER,XTERM);
7044
7045         case KEY_getprotoent:
7046             FUN0(OP_GPROTOENT);
7047
7048         case KEY_getpwent:
7049             FUN0(OP_GPWENT);
7050
7051         case KEY_getpwnam:
7052             UNI(OP_GPWNAM);
7053
7054         case KEY_getpwuid:
7055             UNI(OP_GPWUID);
7056
7057         case KEY_getpeername:
7058             UNI(OP_GETPEERNAME);
7059
7060         case KEY_gethostbyname:
7061             UNI(OP_GHBYNAME);
7062
7063         case KEY_gethostbyaddr:
7064             LOP(OP_GHBYADDR,XTERM);
7065
7066         case KEY_gethostent:
7067             FUN0(OP_GHOSTENT);
7068
7069         case KEY_getnetbyname:
7070             UNI(OP_GNBYNAME);
7071
7072         case KEY_getnetbyaddr:
7073             LOP(OP_GNBYADDR,XTERM);
7074
7075         case KEY_getnetent:
7076             FUN0(OP_GNETENT);
7077
7078         case KEY_getservbyname:
7079             LOP(OP_GSBYNAME,XTERM);
7080
7081         case KEY_getservbyport:
7082             LOP(OP_GSBYPORT,XTERM);
7083
7084         case KEY_getservent:
7085             FUN0(OP_GSERVENT);
7086
7087         case KEY_getsockname:
7088             UNI(OP_GETSOCKNAME);
7089
7090         case KEY_getsockopt:
7091             LOP(OP_GSOCKOPT,XTERM);
7092
7093         case KEY_getgrent:
7094             FUN0(OP_GGRENT);
7095
7096         case KEY_getgrnam:
7097             UNI(OP_GGRNAM);
7098
7099         case KEY_getgrgid:
7100             UNI(OP_GGRGID);
7101
7102         case KEY_getlogin:
7103             FUN0(OP_GETLOGIN);
7104
7105         case KEY_given:
7106             pl_yylval.ival = CopLINE(PL_curcop);
7107             OPERATOR(GIVEN);
7108
7109         case KEY_glob:
7110             LOP(OP_GLOB,XTERM);
7111
7112         case KEY_hex:
7113             UNI(OP_HEX);
7114
7115         case KEY_if:
7116             pl_yylval.ival = CopLINE(PL_curcop);
7117             OPERATOR(IF);
7118
7119         case KEY_index:
7120             LOP(OP_INDEX,XTERM);
7121
7122         case KEY_int:
7123             UNI(OP_INT);
7124
7125         case KEY_ioctl:
7126             LOP(OP_IOCTL,XTERM);
7127
7128         case KEY_join:
7129             LOP(OP_JOIN,XTERM);
7130
7131         case KEY_keys:
7132             UNI(OP_KEYS);
7133
7134         case KEY_kill:
7135             LOP(OP_KILL,XTERM);
7136
7137         case KEY_last:
7138             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7139             LOOPX(OP_LAST);
7140
7141         case KEY_lc:
7142             UNI(OP_LC);
7143
7144         case KEY_lcfirst:
7145             UNI(OP_LCFIRST);
7146
7147         case KEY_local:
7148             pl_yylval.ival = 0;
7149             OPERATOR(LOCAL);
7150
7151         case KEY_length:
7152             UNI(OP_LENGTH);
7153
7154         case KEY_lt:
7155             Rop(OP_SLT);
7156
7157         case KEY_le:
7158             Rop(OP_SLE);
7159
7160         case KEY_localtime:
7161             UNI(OP_LOCALTIME);
7162
7163         case KEY_log:
7164             UNI(OP_LOG);
7165
7166         case KEY_link:
7167             LOP(OP_LINK,XTERM);
7168
7169         case KEY_listen:
7170             LOP(OP_LISTEN,XTERM);
7171
7172         case KEY_lock:
7173             UNI(OP_LOCK);
7174
7175         case KEY_lstat:
7176             UNI(OP_LSTAT);
7177
7178         case KEY_m:
7179             s = scan_pat(s,OP_MATCH);
7180             TERM(sublex_start());
7181
7182         case KEY_map:
7183             LOP(OP_MAPSTART, XREF);
7184
7185         case KEY_mkdir:
7186             LOP(OP_MKDIR,XTERM);
7187
7188         case KEY_msgctl:
7189             LOP(OP_MSGCTL,XTERM);
7190
7191         case KEY_msgget:
7192             LOP(OP_MSGGET,XTERM);
7193
7194         case KEY_msgrcv:
7195             LOP(OP_MSGRCV,XTERM);
7196
7197         case KEY_msgsnd:
7198             LOP(OP_MSGSND,XTERM);
7199
7200         case KEY_our:
7201         case KEY_my:
7202         case KEY_state:
7203             PL_in_my = (U16)tmp;
7204             s = SKIPSPACE1(s);
7205             if (isIDFIRST_lazy_if(s,UTF)) {
7206 #ifdef PERL_MAD
7207                 char* start = s;
7208 #endif
7209                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7210                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7211                     goto really_sub;
7212                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7213                 if (!PL_in_my_stash) {
7214                     char tmpbuf[1024];
7215                     PL_bufptr = s;
7216                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7217                     yyerror(tmpbuf);
7218                 }
7219 #ifdef PERL_MAD
7220                 if (PL_madskills) {     /* just add type to declarator token */
7221                     sv_catsv(PL_thistoken, PL_nextwhite);
7222                     PL_nextwhite = 0;
7223                     sv_catpvn(PL_thistoken, start, s - start);
7224                 }
7225 #endif
7226             }
7227             pl_yylval.ival = 1;
7228             OPERATOR(MY);
7229
7230         case KEY_next:
7231             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7232             LOOPX(OP_NEXT);
7233
7234         case KEY_ne:
7235             Eop(OP_SNE);
7236
7237         case KEY_no:
7238             s = tokenize_use(0, s);
7239             OPERATOR(USE);
7240
7241         case KEY_not:
7242             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7243                 FUN1(OP_NOT);
7244             else
7245                 OPERATOR(NOTOP);
7246
7247         case KEY_open:
7248             s = SKIPSPACE1(s);
7249             if (isIDFIRST_lazy_if(s,UTF)) {
7250                 const char *t;
7251                 for (d = s; isALNUM_lazy_if(d,UTF);)
7252                     d++;
7253                 for (t=d; isSPACE(*t);)
7254                     t++;
7255                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7256                     /* [perl #16184] */
7257                     && !(t[0] == '=' && t[1] == '>')
7258                 ) {
7259                     int parms_len = (int)(d-s);
7260                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7261                            "Precedence problem: open %.*s should be open(%.*s)",
7262                             parms_len, s, parms_len, s);
7263                 }
7264             }
7265             LOP(OP_OPEN,XTERM);
7266
7267         case KEY_or:
7268             pl_yylval.ival = OP_OR;
7269             OPERATOR(OROP);
7270
7271         case KEY_ord:
7272             UNI(OP_ORD);
7273
7274         case KEY_oct:
7275             UNI(OP_OCT);
7276
7277         case KEY_opendir:
7278             LOP(OP_OPEN_DIR,XTERM);
7279
7280         case KEY_print:
7281             checkcomma(s,PL_tokenbuf,"filehandle");
7282             LOP(OP_PRINT,XREF);
7283
7284         case KEY_printf:
7285             checkcomma(s,PL_tokenbuf,"filehandle");
7286             LOP(OP_PRTF,XREF);
7287
7288         case KEY_prototype:
7289             UNI(OP_PROTOTYPE);
7290
7291         case KEY_push:
7292             LOP(OP_PUSH,XTERM);
7293
7294         case KEY_pop:
7295             UNIDOR(OP_POP);
7296
7297         case KEY_pos:
7298             UNIDOR(OP_POS);
7299
7300         case KEY_pack:
7301             LOP(OP_PACK,XTERM);
7302
7303         case KEY_package:
7304             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7305             s = SKIPSPACE1(s);
7306             s = force_strict_version(s);
7307             PL_lex_expect = XBLOCK;
7308             OPERATOR(PACKAGE);
7309
7310         case KEY_pipe:
7311             LOP(OP_PIPE_OP,XTERM);
7312
7313         case KEY_q:
7314             s = scan_str(s,!!PL_madskills,FALSE);
7315             if (!s)
7316                 missingterm(NULL);
7317             pl_yylval.ival = OP_CONST;
7318             TERM(sublex_start());
7319
7320         case KEY_quotemeta:
7321             UNI(OP_QUOTEMETA);
7322
7323         case KEY_qw: {
7324             OP *words = NULL;
7325             s = scan_str(s,!!PL_madskills,FALSE);
7326             if (!s)
7327                 missingterm(NULL);
7328             PL_expect = XOPERATOR;
7329             if (SvCUR(PL_lex_stuff)) {
7330                 int warned = 0;
7331                 d = SvPV_force(PL_lex_stuff, len);
7332                 while (len) {
7333                     for (; isSPACE(*d) && len; --len, ++d)
7334                         /**/;
7335                     if (len) {
7336                         SV *sv;
7337                         const char *b = d;
7338                         if (!warned && ckWARN(WARN_QW)) {
7339                             for (; !isSPACE(*d) && len; --len, ++d) {
7340                                 if (*d == ',') {
7341                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7342                                         "Possible attempt to separate words with commas");
7343                                     ++warned;
7344                                 }
7345                                 else if (*d == '#') {
7346                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7347                                         "Possible attempt to put comments in qw() list");
7348                                     ++warned;
7349                                 }
7350                             }
7351                         }
7352                         else {
7353                             for (; !isSPACE(*d) && len; --len, ++d)
7354                                 /**/;
7355                         }
7356                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7357                         words = append_elem(OP_LIST, words,
7358                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7359                     }
7360                 }
7361             }
7362             if (!words)
7363                 words = newNULLLIST();
7364             if (PL_lex_stuff) {
7365                 SvREFCNT_dec(PL_lex_stuff);
7366                 PL_lex_stuff = NULL;
7367             }
7368             PL_expect = XOPERATOR;
7369             pl_yylval.opval = sawparens(words);
7370             TOKEN(QWLIST);
7371         }
7372
7373         case KEY_qq:
7374             s = scan_str(s,!!PL_madskills,FALSE);
7375             if (!s)
7376                 missingterm(NULL);
7377             pl_yylval.ival = OP_STRINGIFY;
7378             if (SvIVX(PL_lex_stuff) == '\'')
7379                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
7380             TERM(sublex_start());
7381
7382         case KEY_qr:
7383             s = scan_pat(s,OP_QR);
7384             TERM(sublex_start());
7385
7386         case KEY_qx:
7387             s = scan_str(s,!!PL_madskills,FALSE);
7388             if (!s)
7389                 missingterm(NULL);
7390             readpipe_override();
7391             TERM(sublex_start());
7392
7393         case KEY_return:
7394             OLDLOP(OP_RETURN);
7395
7396         case KEY_require:
7397             s = SKIPSPACE1(s);
7398             if (isDIGIT(*s)) {
7399                 s = force_version(s, FALSE);
7400             }
7401             else if (*s != 'v' || !isDIGIT(s[1])
7402                     || (s = force_version(s, TRUE), *s == 'v'))
7403             {
7404                 *PL_tokenbuf = '\0';
7405                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7406                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7407                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7408                 else if (*s == '<')
7409                     yyerror("<> should be quotes");
7410             }
7411             if (orig_keyword == KEY_require) {
7412                 orig_keyword = 0;
7413                 pl_yylval.ival = 1;
7414             }
7415             else
7416                 pl_yylval.ival = 0;
7417             PL_expect = XTERM;
7418             PL_bufptr = s;
7419             PL_last_uni = PL_oldbufptr;
7420             PL_last_lop_op = OP_REQUIRE;
7421             s = skipspace(s);
7422             return REPORT( (int)REQUIRE );
7423
7424         case KEY_reset:
7425             UNI(OP_RESET);
7426
7427         case KEY_redo:
7428             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7429             LOOPX(OP_REDO);
7430
7431         case KEY_rename:
7432             LOP(OP_RENAME,XTERM);
7433
7434         case KEY_rand:
7435             UNI(OP_RAND);
7436
7437         case KEY_rmdir:
7438             UNI(OP_RMDIR);
7439
7440         case KEY_rindex:
7441             LOP(OP_RINDEX,XTERM);
7442
7443         case KEY_read:
7444             LOP(OP_READ,XTERM);
7445
7446         case KEY_readdir:
7447             UNI(OP_READDIR);
7448
7449         case KEY_readline:
7450             UNIDOR(OP_READLINE);
7451
7452         case KEY_readpipe:
7453             UNIDOR(OP_BACKTICK);
7454
7455         case KEY_rewinddir:
7456             UNI(OP_REWINDDIR);
7457
7458         case KEY_recv:
7459             LOP(OP_RECV,XTERM);
7460
7461         case KEY_reverse:
7462             LOP(OP_REVERSE,XTERM);
7463
7464         case KEY_readlink:
7465             UNIDOR(OP_READLINK);
7466
7467         case KEY_ref:
7468             UNI(OP_REF);
7469
7470         case KEY_s:
7471             s = scan_subst(s);
7472             if (pl_yylval.opval)
7473                 TERM(sublex_start());
7474             else
7475                 TOKEN(1);       /* force error */
7476
7477         case KEY_say:
7478             checkcomma(s,PL_tokenbuf,"filehandle");
7479             LOP(OP_SAY,XREF);
7480
7481         case KEY_chomp:
7482             UNI(OP_CHOMP);
7483
7484         case KEY_scalar:
7485             UNI(OP_SCALAR);
7486
7487         case KEY_select:
7488             LOP(OP_SELECT,XTERM);
7489
7490         case KEY_seek:
7491             LOP(OP_SEEK,XTERM);
7492
7493         case KEY_semctl:
7494             LOP(OP_SEMCTL,XTERM);
7495
7496         case KEY_semget:
7497             LOP(OP_SEMGET,XTERM);
7498
7499         case KEY_semop:
7500             LOP(OP_SEMOP,XTERM);
7501
7502         case KEY_send:
7503             LOP(OP_SEND,XTERM);
7504
7505         case KEY_setpgrp:
7506             LOP(OP_SETPGRP,XTERM);
7507
7508         case KEY_setpriority:
7509             LOP(OP_SETPRIORITY,XTERM);
7510
7511         case KEY_sethostent:
7512             UNI(OP_SHOSTENT);
7513
7514         case KEY_setnetent:
7515             UNI(OP_SNETENT);
7516
7517         case KEY_setservent:
7518             UNI(OP_SSERVENT);
7519
7520         case KEY_setprotoent:
7521             UNI(OP_SPROTOENT);
7522
7523         case KEY_setpwent:
7524             FUN0(OP_SPWENT);
7525
7526         case KEY_setgrent:
7527             FUN0(OP_SGRENT);
7528
7529         case KEY_seekdir:
7530             LOP(OP_SEEKDIR,XTERM);
7531
7532         case KEY_setsockopt:
7533             LOP(OP_SSOCKOPT,XTERM);
7534
7535         case KEY_shift:
7536             UNIDOR(OP_SHIFT);
7537
7538         case KEY_shmctl:
7539             LOP(OP_SHMCTL,XTERM);
7540
7541         case KEY_shmget:
7542             LOP(OP_SHMGET,XTERM);
7543
7544         case KEY_shmread:
7545             LOP(OP_SHMREAD,XTERM);
7546
7547         case KEY_shmwrite:
7548             LOP(OP_SHMWRITE,XTERM);
7549
7550         case KEY_shutdown:
7551             LOP(OP_SHUTDOWN,XTERM);
7552
7553         case KEY_sin:
7554             UNI(OP_SIN);
7555
7556         case KEY_sleep:
7557             UNI(OP_SLEEP);
7558
7559         case KEY_socket:
7560             LOP(OP_SOCKET,XTERM);
7561
7562         case KEY_socketpair:
7563             LOP(OP_SOCKPAIR,XTERM);
7564
7565         case KEY_sort:
7566             checkcomma(s,PL_tokenbuf,"subroutine name");
7567             s = SKIPSPACE1(s);
7568             if (*s == ';' || *s == ')')         /* probably a close */
7569                 Perl_croak(aTHX_ "sort is now a reserved word");
7570             PL_expect = XTERM;
7571             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7572             LOP(OP_SORT,XREF);
7573
7574         case KEY_split:
7575             LOP(OP_SPLIT,XTERM);
7576
7577         case KEY_sprintf:
7578             LOP(OP_SPRINTF,XTERM);
7579
7580         case KEY_splice:
7581             LOP(OP_SPLICE,XTERM);
7582
7583         case KEY_sqrt:
7584             UNI(OP_SQRT);
7585
7586         case KEY_srand:
7587             UNI(OP_SRAND);
7588
7589         case KEY_stat:
7590             UNI(OP_STAT);
7591
7592         case KEY_study:
7593             UNI(OP_STUDY);
7594
7595         case KEY_substr:
7596             LOP(OP_SUBSTR,XTERM);
7597
7598         case KEY_format:
7599         case KEY_sub:
7600           really_sub:
7601             {
7602                 char tmpbuf[sizeof PL_tokenbuf];
7603                 SSize_t tboffset = 0;
7604                 expectation attrful;
7605                 bool have_name, have_proto;
7606                 const int key = tmp;
7607
7608 #ifdef PERL_MAD
7609                 SV *tmpwhite = 0;
7610
7611                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7612                 SV *subtoken = newSVpvn(tstart, s - tstart);
7613                 PL_thistoken = 0;
7614
7615                 d = s;
7616                 s = SKIPSPACE2(s,tmpwhite);
7617 #else
7618                 s = skipspace(s);
7619 #endif
7620
7621                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7622                     (*s == ':' && s[1] == ':'))
7623                 {
7624 #ifdef PERL_MAD
7625                     SV *nametoke = NULL;
7626 #endif
7627
7628                     PL_expect = XBLOCK;
7629                     attrful = XATTRBLOCK;
7630                     /* remember buffer pos'n for later force_word */
7631                     tboffset = s - PL_oldbufptr;
7632                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7633 #ifdef PERL_MAD
7634                     if (PL_madskills)
7635                         nametoke = newSVpvn(s, d - s);
7636 #endif
7637                     if (memchr(tmpbuf, ':', len))
7638                         sv_setpvn(PL_subname, tmpbuf, len);
7639                     else {
7640                         sv_setsv(PL_subname,PL_curstname);
7641                         sv_catpvs(PL_subname,"::");
7642                         sv_catpvn(PL_subname,tmpbuf,len);
7643                     }
7644                     have_name = TRUE;
7645
7646 #ifdef PERL_MAD
7647
7648                     start_force(0);
7649                     CURMAD('X', nametoke);
7650                     CURMAD('_', tmpwhite);
7651                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7652                                       FALSE, TRUE, TRUE);
7653
7654                     s = SKIPSPACE2(d,tmpwhite);
7655 #else
7656                     s = skipspace(d);
7657 #endif
7658                 }
7659                 else {
7660                     if (key == KEY_my)
7661                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7662                     PL_expect = XTERMBLOCK;
7663                     attrful = XATTRTERM;
7664                     sv_setpvs(PL_subname,"?");
7665                     have_name = FALSE;
7666                 }
7667
7668                 if (key == KEY_format) {
7669                     if (*s == '=')
7670                         PL_lex_formbrack = PL_lex_brackets + 1;
7671 #ifdef PERL_MAD
7672                     PL_thistoken = subtoken;
7673                     s = d;
7674 #else
7675                     if (have_name)
7676                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7677                                           FALSE, TRUE, TRUE);
7678 #endif
7679                     OPERATOR(FORMAT);
7680                 }
7681
7682                 /* Look for a prototype */
7683                 if (*s == '(') {
7684                     char *p;
7685                     bool bad_proto = FALSE;
7686                     bool in_brackets = FALSE;
7687                     char greedy_proto = ' ';
7688                     bool proto_after_greedy_proto = FALSE;
7689                     bool must_be_last = FALSE;
7690                     bool underscore = FALSE;
7691                     bool seen_underscore = FALSE;
7692                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7693
7694                     s = scan_str(s,!!PL_madskills,FALSE);
7695                     if (!s)
7696                         Perl_croak(aTHX_ "Prototype not terminated");
7697                     /* strip spaces and check for bad characters */
7698                     d = SvPVX(PL_lex_stuff);
7699                     tmp = 0;
7700                     for (p = d; *p; ++p) {
7701                         if (!isSPACE(*p)) {
7702                             d[tmp++] = *p;
7703
7704                             if (warnillegalproto) {
7705                                 if (must_be_last)
7706                                     proto_after_greedy_proto = TRUE;
7707                                 if (!strchr("$@%*;[]&\\_", *p)) {
7708                                     bad_proto = TRUE;
7709                                 }
7710                                 else {
7711                                     if ( underscore ) {
7712                                         if ( *p != ';' )
7713                                             bad_proto = TRUE;
7714                                         underscore = FALSE;
7715                                     }
7716                                     if ( *p == '[' ) {
7717                                         in_brackets = TRUE;
7718                                     }
7719                                     else if ( *p == ']' ) {
7720                                         in_brackets = FALSE;
7721                                     }
7722                                     else if ( (*p == '@' || *p == '%') &&
7723                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
7724                                          !in_brackets ) {
7725                                         must_be_last = TRUE;
7726                                         greedy_proto = *p;
7727                                     }
7728                                     else if ( *p == '_' ) {
7729                                         underscore = seen_underscore = TRUE;
7730                                     }
7731                                 }
7732                             }
7733                         }
7734                     }
7735                     d[tmp] = '\0';
7736                     if (proto_after_greedy_proto)
7737                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7738                                     "Prototype after '%c' for %"SVf" : %s",
7739                                     greedy_proto, SVfARG(PL_subname), d);
7740                     if (bad_proto)
7741                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7742                                     "Illegal character %sin prototype for %"SVf" : %s",
7743                                     seen_underscore ? "after '_' " : "",
7744                                     SVfARG(PL_subname), d);
7745                     SvCUR_set(PL_lex_stuff, tmp);
7746                     have_proto = TRUE;
7747
7748 #ifdef PERL_MAD
7749                     start_force(0);
7750                     CURMAD('q', PL_thisopen);
7751                     CURMAD('_', tmpwhite);
7752                     CURMAD('=', PL_thisstuff);
7753                     CURMAD('Q', PL_thisclose);
7754                     NEXTVAL_NEXTTOKE.opval =
7755                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7756                     PL_lex_stuff = NULL;
7757                     force_next(THING);
7758
7759                     s = SKIPSPACE2(s,tmpwhite);
7760 #else
7761                     s = skipspace(s);
7762 #endif
7763                 }
7764                 else
7765                     have_proto = FALSE;
7766
7767                 if (*s == ':' && s[1] != ':')
7768                     PL_expect = attrful;
7769                 else if (*s != '{' && key == KEY_sub) {
7770                     if (!have_name)
7771                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7772                     else if (*s != ';' && *s != '}')
7773                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7774                 }
7775
7776 #ifdef PERL_MAD
7777                 start_force(0);
7778                 if (tmpwhite) {
7779                     if (PL_madskills)
7780                         curmad('^', newSVpvs(""));
7781                     CURMAD('_', tmpwhite);
7782                 }
7783                 force_next(0);
7784
7785                 PL_thistoken = subtoken;
7786 #else
7787                 if (have_proto) {
7788                     NEXTVAL_NEXTTOKE.opval =
7789                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7790                     PL_lex_stuff = NULL;
7791                     force_next(THING);
7792                 }
7793 #endif
7794                 if (!have_name) {
7795                     if (PL_curstash)
7796                         sv_setpvs(PL_subname, "__ANON__");
7797                     else
7798                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7799                     TOKEN(ANONSUB);
7800                 }
7801 #ifndef PERL_MAD
7802                 (void) force_word(PL_oldbufptr + tboffset, WORD,
7803                                   FALSE, TRUE, TRUE);
7804 #endif
7805                 if (key == KEY_my)
7806                     TOKEN(MYSUB);
7807                 TOKEN(SUB);
7808             }
7809
7810         case KEY_system:
7811             LOP(OP_SYSTEM,XREF);
7812
7813         case KEY_symlink:
7814             LOP(OP_SYMLINK,XTERM);
7815
7816         case KEY_syscall:
7817             LOP(OP_SYSCALL,XTERM);
7818
7819         case KEY_sysopen:
7820             LOP(OP_SYSOPEN,XTERM);
7821
7822         case KEY_sysseek:
7823             LOP(OP_SYSSEEK,XTERM);
7824
7825         case KEY_sysread:
7826             LOP(OP_SYSREAD,XTERM);
7827
7828         case KEY_syswrite:
7829             LOP(OP_SYSWRITE,XTERM);
7830
7831         case KEY_tr:
7832             s = scan_trans(s);
7833             TERM(sublex_start());
7834
7835         case KEY_tell:
7836             UNI(OP_TELL);
7837
7838         case KEY_telldir:
7839             UNI(OP_TELLDIR);
7840
7841         case KEY_tie:
7842             LOP(OP_TIE,XTERM);
7843
7844         case KEY_tied:
7845             UNI(OP_TIED);
7846
7847         case KEY_time:
7848             FUN0(OP_TIME);
7849
7850         case KEY_times:
7851             FUN0(OP_TMS);
7852
7853         case KEY_truncate:
7854             LOP(OP_TRUNCATE,XTERM);
7855
7856         case KEY_uc:
7857             UNI(OP_UC);
7858
7859         case KEY_ucfirst:
7860             UNI(OP_UCFIRST);
7861
7862         case KEY_untie:
7863             UNI(OP_UNTIE);
7864
7865         case KEY_until:
7866             pl_yylval.ival = CopLINE(PL_curcop);
7867             OPERATOR(UNTIL);
7868
7869         case KEY_unless:
7870             pl_yylval.ival = CopLINE(PL_curcop);
7871             OPERATOR(UNLESS);
7872
7873         case KEY_unlink:
7874             LOP(OP_UNLINK,XTERM);
7875
7876         case KEY_undef:
7877             UNIDOR(OP_UNDEF);
7878
7879         case KEY_unpack:
7880             LOP(OP_UNPACK,XTERM);
7881
7882         case KEY_utime:
7883             LOP(OP_UTIME,XTERM);
7884
7885         case KEY_umask:
7886             UNIDOR(OP_UMASK);
7887
7888         case KEY_unshift:
7889             LOP(OP_UNSHIFT,XTERM);
7890
7891         case KEY_use:
7892             s = tokenize_use(1, s);
7893             OPERATOR(USE);
7894
7895         case KEY_values:
7896             UNI(OP_VALUES);
7897
7898         case KEY_vec:
7899             LOP(OP_VEC,XTERM);
7900
7901         case KEY_when:
7902             pl_yylval.ival = CopLINE(PL_curcop);
7903             OPERATOR(WHEN);
7904
7905         case KEY_while:
7906             pl_yylval.ival = CopLINE(PL_curcop);
7907             OPERATOR(WHILE);
7908
7909         case KEY_warn:
7910             PL_hints |= HINT_BLOCK_SCOPE;
7911             LOP(OP_WARN,XTERM);
7912
7913         case KEY_wait:
7914             FUN0(OP_WAIT);
7915
7916         case KEY_waitpid:
7917             LOP(OP_WAITPID,XTERM);
7918
7919         case KEY_wantarray:
7920             FUN0(OP_WANTARRAY);
7921
7922         case KEY_write:
7923 #ifdef EBCDIC
7924         {
7925             char ctl_l[2];
7926             ctl_l[0] = toCTRL('L');
7927             ctl_l[1] = '\0';
7928             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7929         }
7930 #else
7931             /* Make sure $^L is defined */
7932             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7933 #endif
7934             UNI(OP_ENTERWRITE);
7935
7936         case KEY_x:
7937             if (PL_expect == XOPERATOR)
7938                 Mop(OP_REPEAT);
7939             check_uni();
7940             goto just_a_word;
7941
7942         case KEY_xor:
7943             pl_yylval.ival = OP_XOR;
7944             OPERATOR(OROP);
7945
7946         case KEY_y:
7947             s = scan_trans(s);
7948             TERM(sublex_start());
7949         }
7950     }}
7951 }
7952 #ifdef __SC__
7953 #pragma segment Main
7954 #endif
7955
7956 static int
7957 S_pending_ident(pTHX)
7958 {
7959     dVAR;
7960     register char *d;
7961     PADOFFSET tmp = 0;
7962     /* pit holds the identifier we read and pending_ident is reset */
7963     char pit = PL_pending_ident;
7964     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7965     /* All routes through this function want to know if there is a colon.  */
7966     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7967     PL_pending_ident = 0;
7968
7969     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7970     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7971           "### Pending identifier '%s'\n", PL_tokenbuf); });
7972
7973     /* if we're in a my(), we can't allow dynamics here.
7974        $foo'bar has already been turned into $foo::bar, so
7975        just check for colons.
7976
7977        if it's a legal name, the OP is a PADANY.
7978     */
7979     if (PL_in_my) {
7980         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
7981             if (has_colon)
7982                 yyerror(Perl_form(aTHX_ "No package name allowed for "
7983                                   "variable %s in \"our\"",
7984                                   PL_tokenbuf));
7985             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7986         }
7987         else {
7988             if (has_colon)
7989                 yyerror(Perl_form(aTHX_ PL_no_myglob,
7990                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7991
7992             pl_yylval.opval = newOP(OP_PADANY, 0);
7993             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7994             return PRIVATEREF;
7995         }
7996     }
7997
7998     /*
7999        build the ops for accesses to a my() variable.
8000
8001        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8002        then used in a comparison.  This catches most, but not
8003        all cases.  For instance, it catches
8004            sort { my($a); $a <=> $b }
8005        but not
8006            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8007        (although why you'd do that is anyone's guess).
8008     */
8009
8010     if (!has_colon) {
8011         if (!PL_in_my)
8012             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8013         if (tmp != NOT_IN_PAD) {
8014             /* might be an "our" variable" */
8015             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8016                 /* build ops for a bareword */
8017                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8018                 HEK * const stashname = HvNAME_HEK(stash);
8019                 SV *  const sym = newSVhek(stashname);
8020                 sv_catpvs(sym, "::");
8021                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8022                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8023                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8024                 gv_fetchsv(sym,
8025                     (PL_in_eval
8026                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8027                         : GV_ADDMULTI
8028                     ),
8029                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8030                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8031                      : SVt_PVHV));
8032                 return WORD;
8033             }
8034
8035             /* if it's a sort block and they're naming $a or $b */
8036             if (PL_last_lop_op == OP_SORT &&
8037                 PL_tokenbuf[0] == '$' &&
8038                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8039                 && !PL_tokenbuf[2])
8040             {
8041                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8042                      d < PL_bufend && *d != '\n';
8043                      d++)
8044                 {
8045                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8046                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8047                               PL_tokenbuf);
8048                     }
8049                 }
8050             }
8051
8052             pl_yylval.opval = newOP(OP_PADANY, 0);
8053             pl_yylval.opval->op_targ = tmp;
8054             return PRIVATEREF;
8055         }
8056     }
8057
8058     /*
8059        Whine if they've said @foo in a doublequoted string,
8060        and @foo isn't a variable we can find in the symbol
8061        table.
8062     */
8063     if (ckWARN(WARN_AMBIGUOUS) &&
8064         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8065         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8066                                          SVt_PVAV);
8067         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8068                 /* DO NOT warn for @- and @+ */
8069                 && !( PL_tokenbuf[2] == '\0' &&
8070                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8071            )
8072         {
8073             /* Downgraded from fatal to warning 20000522 mjd */
8074             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8075                         "Possible unintended interpolation of %s in string",
8076                         PL_tokenbuf);
8077         }
8078     }
8079
8080     /* build ops for a bareword */
8081     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8082                                                       tokenbuf_len - 1));
8083     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8084     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8085                      PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8086                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8087                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8088                       : SVt_PVHV));
8089     return WORD;
8090 }
8091
8092 /*
8093  *  The following code was generated by perl_keyword.pl.
8094  */
8095
8096 I32
8097 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
8098 {
8099     dVAR;
8100
8101     PERL_ARGS_ASSERT_KEYWORD;
8102
8103   switch (len)
8104   {
8105     case 1: /* 5 tokens of length 1 */
8106       switch (name[0])
8107       {
8108         case 'm':
8109           {                                       /* m          */
8110             return KEY_m;
8111           }
8112
8113         case 'q':
8114           {                                       /* q          */
8115             return KEY_q;
8116           }
8117
8118         case 's':
8119           {                                       /* s          */
8120             return KEY_s;
8121           }
8122
8123         case 'x':
8124           {                                       /* x          */
8125             return -KEY_x;
8126           }
8127
8128         case 'y':
8129           {                                       /* y          */
8130             return KEY_y;
8131           }
8132
8133         default:
8134           goto unknown;
8135       }
8136
8137     case 2: /* 18 tokens of length 2 */
8138       switch (name[0])
8139       {
8140         case 'd':
8141           if (name[1] == 'o')
8142           {                                       /* do         */
8143             return KEY_do;
8144           }
8145
8146           goto unknown;
8147
8148         case 'e':
8149           if (name[1] == 'q')
8150           {                                       /* eq         */
8151             return -KEY_eq;
8152           }
8153
8154           goto unknown;
8155
8156         case 'g':
8157           switch (name[1])
8158           {
8159             case 'e':
8160               {                                   /* ge         */
8161                 return -KEY_ge;
8162               }
8163
8164             case 't':
8165               {                                   /* gt         */
8166                 return -KEY_gt;
8167               }
8168
8169             default:
8170               goto unknown;
8171           }
8172
8173         case 'i':
8174           if (name[1] == 'f')
8175           {                                       /* if         */
8176             return KEY_if;
8177           }
8178
8179           goto unknown;
8180
8181         case 'l':
8182           switch (name[1])
8183           {
8184             case 'c':
8185               {                                   /* lc         */
8186                 return -KEY_lc;
8187               }
8188
8189             case 'e':
8190               {                                   /* le         */
8191                 return -KEY_le;
8192               }
8193
8194             case 't':
8195               {                                   /* lt         */
8196                 return -KEY_lt;
8197               }
8198
8199             default:
8200               goto unknown;
8201           }
8202
8203         case 'm':
8204           if (name[1] == 'y')
8205           {                                       /* my         */
8206             return KEY_my;
8207           }
8208
8209           goto unknown;
8210
8211         case 'n':
8212           switch (name[1])
8213           {
8214             case 'e':
8215               {                                   /* ne         */
8216                 return -KEY_ne;
8217               }
8218
8219             case 'o':
8220               {                                   /* no         */
8221                 return KEY_no;
8222               }
8223
8224             default:
8225               goto unknown;
8226           }
8227
8228         case 'o':
8229           if (name[1] == 'r')
8230           {                                       /* or         */
8231             return -KEY_or;
8232           }
8233
8234           goto unknown;
8235
8236         case 'q':
8237           switch (name[1])
8238           {
8239             case 'q':
8240               {                                   /* qq         */
8241                 return KEY_qq;
8242               }
8243
8244             case 'r':
8245               {                                   /* qr         */
8246                 return KEY_qr;
8247               }
8248
8249             case 'w':
8250               {                                   /* qw         */
8251                 return KEY_qw;
8252               }
8253
8254             case 'x':
8255               {                                   /* qx         */
8256                 return KEY_qx;
8257               }
8258
8259             default:
8260               goto unknown;
8261           }
8262
8263         case 't':
8264           if (name[1] == 'r')
8265           {                                       /* tr         */
8266             return KEY_tr;
8267           }
8268
8269           goto unknown;
8270
8271         case 'u':
8272           if (name[1] == 'c')
8273           {                                       /* uc         */
8274             return -KEY_uc;
8275           }
8276
8277           goto unknown;
8278
8279         default:
8280           goto unknown;
8281       }
8282
8283     case 3: /* 29 tokens of length 3 */
8284       switch (name[0])
8285       {
8286         case 'E':
8287           if (name[1] == 'N' &&
8288               name[2] == 'D')
8289           {                                       /* END        */
8290             return KEY_END;
8291           }
8292
8293           goto unknown;
8294
8295         case 'a':
8296           switch (name[1])
8297           {
8298             case 'b':
8299               if (name[2] == 's')
8300               {                                   /* abs        */
8301                 return -KEY_abs;
8302               }
8303
8304               goto unknown;
8305
8306             case 'n':
8307               if (name[2] == 'd')
8308               {                                   /* and        */
8309                 return -KEY_and;
8310               }
8311
8312               goto unknown;
8313
8314             default:
8315               goto unknown;
8316           }
8317
8318         case 'c':
8319           switch (name[1])
8320           {
8321             case 'h':
8322               if (name[2] == 'r')
8323               {                                   /* chr        */
8324                 return -KEY_chr;
8325               }
8326
8327               goto unknown;
8328
8329             case 'm':
8330               if (name[2] == 'p')
8331               {                                   /* cmp        */
8332                 return -KEY_cmp;
8333               }
8334
8335               goto unknown;
8336
8337             case 'o':
8338               if (name[2] == 's')
8339               {                                   /* cos        */
8340                 return -KEY_cos;
8341               }
8342
8343               goto unknown;
8344
8345             default:
8346               goto unknown;
8347           }
8348
8349         case 'd':
8350           if (name[1] == 'i' &&
8351               name[2] == 'e')
8352           {                                       /* die        */
8353             return -KEY_die;
8354           }
8355
8356           goto unknown;
8357
8358         case 'e':
8359           switch (name[1])
8360           {
8361             case 'o':
8362               if (name[2] == 'f')
8363               {                                   /* eof        */
8364                 return -KEY_eof;
8365               }
8366
8367               goto unknown;
8368
8369             case 'x':
8370               if (name[2] == 'p')
8371               {                                   /* exp        */
8372                 return -KEY_exp;
8373               }
8374
8375               goto unknown;
8376
8377             default:
8378               goto unknown;
8379           }
8380
8381         case 'f':
8382           if (name[1] == 'o' &&
8383               name[2] == 'r')
8384           {                                       /* for        */
8385             return KEY_for;
8386           }
8387
8388           goto unknown;
8389
8390         case 'h':
8391           if (name[1] == 'e' &&
8392               name[2] == 'x')
8393           {                                       /* hex        */
8394             return -KEY_hex;
8395           }
8396
8397           goto unknown;
8398
8399         case 'i':
8400           if (name[1] == 'n' &&
8401               name[2] == 't')
8402           {                                       /* int        */
8403             return -KEY_int;
8404           }
8405
8406           goto unknown;
8407
8408         case 'l':
8409           if (name[1] == 'o' &&
8410               name[2] == 'g')
8411           {                                       /* log        */
8412             return -KEY_log;
8413           }
8414
8415           goto unknown;
8416
8417         case 'm':
8418           if (name[1] == 'a' &&
8419               name[2] == 'p')
8420           {                                       /* map        */
8421             return KEY_map;
8422           }
8423
8424           goto unknown;
8425
8426         case 'n':
8427           if (name[1] == 'o' &&
8428               name[2] == 't')
8429           {                                       /* not        */
8430             return -KEY_not;
8431           }
8432
8433           goto unknown;
8434
8435         case 'o':
8436           switch (name[1])
8437           {
8438             case 'c':
8439               if (name[2] == 't')
8440               {                                   /* oct        */
8441                 return -KEY_oct;
8442               }
8443
8444               goto unknown;
8445
8446             case 'r':
8447               if (name[2] == 'd')
8448               {                                   /* ord        */
8449                 return -KEY_ord;
8450               }
8451
8452               goto unknown;
8453
8454             case 'u':
8455               if (name[2] == 'r')
8456               {                                   /* our        */
8457                 return KEY_our;
8458               }
8459
8460               goto unknown;
8461
8462             default:
8463               goto unknown;
8464           }
8465
8466         case 'p':
8467           if (name[1] == 'o')
8468           {
8469             switch (name[2])
8470             {
8471               case 'p':
8472                 {                                 /* pop        */
8473                   return -KEY_pop;
8474                 }
8475
8476               case 's':
8477                 {                                 /* pos        */
8478                   return KEY_pos;
8479                 }
8480
8481               default:
8482                 goto unknown;
8483             }
8484           }
8485
8486           goto unknown;
8487
8488         case 'r':
8489           if (name[1] == 'e' &&
8490               name[2] == 'f')
8491           {                                       /* ref        */
8492             return -KEY_ref;
8493           }
8494
8495           goto unknown;
8496
8497         case 's':
8498           switch (name[1])
8499           {
8500             case 'a':
8501               if (name[2] == 'y')
8502               {                                   /* say        */
8503                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8504               }
8505
8506               goto unknown;
8507
8508             case 'i':
8509               if (name[2] == 'n')
8510               {                                   /* sin        */
8511                 return -KEY_sin;
8512               }
8513
8514               goto unknown;
8515
8516             case 'u':
8517               if (name[2] == 'b')
8518               {                                   /* sub        */
8519                 return KEY_sub;
8520               }
8521
8522               goto unknown;
8523
8524             default:
8525               goto unknown;
8526           }
8527
8528         case 't':
8529           if (name[1] == 'i' &&
8530               name[2] == 'e')
8531           {                                       /* tie        */
8532             return -KEY_tie;
8533           }
8534
8535           goto unknown;
8536
8537         case 'u':
8538           if (name[1] == 's' &&
8539               name[2] == 'e')
8540           {                                       /* use        */
8541             return KEY_use;
8542           }
8543
8544           goto unknown;
8545
8546         case 'v':
8547           if (name[1] == 'e' &&
8548               name[2] == 'c')
8549           {                                       /* vec        */
8550             return -KEY_vec;
8551           }
8552
8553           goto unknown;
8554
8555         case 'x':
8556           if (name[1] == 'o' &&
8557               name[2] == 'r')
8558           {                                       /* xor        */
8559             return -KEY_xor;
8560           }
8561
8562           goto unknown;
8563
8564         default:
8565           goto unknown;
8566       }
8567
8568     case 4: /* 41 tokens of length 4 */
8569       switch (name[0])
8570       {
8571         case 'C':
8572           if (name[1] == 'O' &&
8573               name[2] == 'R' &&
8574               name[3] == 'E')
8575           {                                       /* CORE       */
8576             return -KEY_CORE;
8577           }
8578
8579           goto unknown;
8580
8581         case 'I':
8582           if (name[1] == 'N' &&
8583               name[2] == 'I' &&
8584               name[3] == 'T')
8585           {                                       /* INIT       */
8586             return KEY_INIT;
8587           }
8588
8589           goto unknown;
8590
8591         case 'b':
8592           if (name[1] == 'i' &&
8593               name[2] == 'n' &&
8594               name[3] == 'd')
8595           {                                       /* bind       */
8596             return -KEY_bind;
8597           }
8598
8599           goto unknown;
8600
8601         case 'c':
8602           if (name[1] == 'h' &&
8603               name[2] == 'o' &&
8604               name[3] == 'p')
8605           {                                       /* chop       */
8606             return -KEY_chop;
8607           }
8608
8609           goto unknown;
8610
8611         case 'd':
8612           if (name[1] == 'u' &&
8613               name[2] == 'm' &&
8614               name[3] == 'p')
8615           {                                       /* dump       */
8616             return -KEY_dump;
8617           }
8618
8619           goto unknown;
8620
8621         case 'e':
8622           switch (name[1])
8623           {
8624             case 'a':
8625               if (name[2] == 'c' &&
8626                   name[3] == 'h')
8627               {                                   /* each       */
8628                 return -KEY_each;
8629               }
8630
8631               goto unknown;
8632
8633             case 'l':
8634               if (name[2] == 's' &&
8635                   name[3] == 'e')
8636               {                                   /* else       */
8637                 return KEY_else;
8638               }
8639
8640               goto unknown;
8641
8642             case 'v':
8643               if (name[2] == 'a' &&
8644                   name[3] == 'l')
8645               {                                   /* eval       */
8646                 return KEY_eval;
8647               }
8648
8649               goto unknown;
8650
8651             case 'x':
8652               switch (name[2])
8653               {
8654                 case 'e':
8655                   if (name[3] == 'c')
8656                   {                               /* exec       */
8657                     return -KEY_exec;
8658                   }
8659
8660                   goto unknown;
8661
8662                 case 'i':
8663                   if (name[3] == 't')
8664                   {                               /* exit       */
8665                     return -KEY_exit;
8666                   }
8667
8668                   goto unknown;
8669
8670                 default:
8671                   goto unknown;
8672               }
8673
8674             default:
8675               goto unknown;
8676           }
8677
8678         case 'f':
8679           if (name[1] == 'o' &&
8680               name[2] == 'r' &&
8681               name[3] == 'k')
8682           {                                       /* fork       */
8683             return -KEY_fork;
8684           }
8685
8686           goto unknown;
8687
8688         case 'g':
8689           switch (name[1])
8690           {
8691             case 'e':
8692               if (name[2] == 't' &&
8693                   name[3] == 'c')
8694               {                                   /* getc       */
8695                 return -KEY_getc;
8696               }
8697
8698               goto unknown;
8699
8700             case 'l':
8701               if (name[2] == 'o' &&
8702                   name[3] == 'b')
8703               {                                   /* glob       */
8704                 return KEY_glob;
8705               }
8706
8707               goto unknown;
8708
8709             case 'o':
8710               if (name[2] == 't' &&
8711                   name[3] == 'o')
8712               {                                   /* goto       */
8713                 return KEY_goto;
8714               }
8715
8716               goto unknown;
8717
8718             case 'r':
8719               if (name[2] == 'e' &&
8720                   name[3] == 'p')
8721               {                                   /* grep       */
8722                 return KEY_grep;
8723               }
8724
8725               goto unknown;
8726
8727             default:
8728               goto unknown;
8729           }
8730
8731         case 'j':
8732           if (name[1] == 'o' &&
8733               name[2] == 'i' &&
8734               name[3] == 'n')
8735           {                                       /* join       */
8736             return -KEY_join;
8737           }
8738
8739           goto unknown;
8740
8741         case 'k':
8742           switch (name[1])
8743           {
8744             case 'e':
8745               if (name[2] == 'y' &&
8746                   name[3] == 's')
8747               {                                   /* keys       */
8748                 return -KEY_keys;
8749               }
8750
8751               goto unknown;
8752
8753             case 'i':
8754               if (name[2] == 'l' &&
8755                   name[3] == 'l')
8756               {                                   /* kill       */
8757                 return -KEY_kill;
8758               }
8759
8760               goto unknown;
8761
8762             default:
8763               goto unknown;
8764           }
8765
8766         case 'l':
8767           switch (name[1])
8768           {
8769             case 'a':
8770               if (name[2] == 's' &&
8771                   name[3] == 't')
8772               {                                   /* last       */
8773                 return KEY_last;
8774               }
8775
8776               goto unknown;
8777
8778             case 'i':
8779               if (name[2] == 'n' &&
8780                   name[3] == 'k')
8781               {                                   /* link       */
8782                 return -KEY_link;
8783               }
8784
8785               goto unknown;
8786
8787             case 'o':
8788               if (name[2] == 'c' &&
8789                   name[3] == 'k')
8790               {                                   /* lock       */
8791                 return -KEY_lock;
8792               }
8793
8794               goto unknown;
8795
8796             default:
8797               goto unknown;
8798           }
8799
8800         case 'n':
8801           if (name[1] == 'e' &&
8802               name[2] == 'x' &&
8803               name[3] == 't')
8804           {                                       /* next       */
8805             return KEY_next;
8806           }
8807
8808           goto unknown;
8809
8810         case 'o':
8811           if (name[1] == 'p' &&
8812               name[2] == 'e' &&
8813               name[3] == 'n')
8814           {                                       /* open       */
8815             return -KEY_open;
8816           }
8817
8818           goto unknown;
8819
8820         case 'p':
8821           switch (name[1])
8822           {
8823             case 'a':
8824               if (name[2] == 'c' &&
8825                   name[3] == 'k')
8826               {                                   /* pack       */
8827                 return -KEY_pack;
8828               }
8829
8830               goto unknown;
8831
8832             case 'i':
8833               if (name[2] == 'p' &&
8834                   name[3] == 'e')
8835               {                                   /* pipe       */
8836                 return -KEY_pipe;
8837               }
8838
8839               goto unknown;
8840
8841             case 'u':
8842               if (name[2] == 's' &&
8843                   name[3] == 'h')
8844               {                                   /* push       */
8845                 return -KEY_push;
8846               }
8847
8848               goto unknown;
8849
8850             default:
8851               goto unknown;
8852           }
8853
8854         case 'r':
8855           switch (name[1])
8856           {
8857             case 'a':
8858               if (name[2] == 'n' &&
8859                   name[3] == 'd')
8860               {                                   /* rand       */
8861                 return -KEY_rand;
8862               }
8863
8864               goto unknown;
8865
8866             case 'e':
8867               switch (name[2])
8868               {
8869                 case 'a':
8870                   if (name[3] == 'd')
8871                   {                               /* read       */
8872                     return -KEY_read;
8873                   }
8874
8875                   goto unknown;
8876
8877                 case 'c':
8878                   if (name[3] == 'v')
8879                   {                               /* recv       */
8880                     return -KEY_recv;
8881                   }
8882
8883                   goto unknown;
8884
8885                 case 'd':
8886                   if (name[3] == 'o')
8887                   {                               /* redo       */
8888                     return KEY_redo;
8889                   }
8890
8891                   goto unknown;
8892
8893                 default:
8894                   goto unknown;
8895               }
8896
8897             default:
8898               goto unknown;
8899           }
8900
8901         case 's':
8902           switch (name[1])
8903           {
8904             case 'e':
8905               switch (name[2])
8906               {
8907                 case 'e':
8908                   if (name[3] == 'k')
8909                   {                               /* seek       */
8910                     return -KEY_seek;
8911                   }
8912
8913                   goto unknown;
8914
8915                 case 'n':
8916                   if (name[3] == 'd')
8917                   {                               /* send       */
8918                     return -KEY_send;
8919                   }
8920
8921                   goto unknown;
8922
8923                 default:
8924                   goto unknown;
8925               }
8926
8927             case 'o':
8928               if (name[2] == 'r' &&
8929                   name[3] == 't')
8930               {                                   /* sort       */
8931                 return KEY_sort;
8932               }
8933
8934               goto unknown;
8935
8936             case 'q':
8937               if (name[2] == 'r' &&
8938                   name[3] == 't')
8939               {                                   /* sqrt       */
8940                 return -KEY_sqrt;
8941               }
8942
8943               goto unknown;
8944
8945             case 't':
8946               if (name[2] == 'a' &&
8947                   name[3] == 't')
8948               {                                   /* stat       */
8949                 return -KEY_stat;
8950               }
8951
8952               goto unknown;
8953
8954             default:
8955               goto unknown;
8956           }
8957
8958         case 't':
8959           switch (name[1])
8960           {
8961             case 'e':
8962               if (name[2] == 'l' &&
8963                   name[3] == 'l')
8964               {                                   /* tell       */
8965                 return -KEY_tell;
8966               }
8967
8968               goto unknown;
8969
8970             case 'i':
8971               switch (name[2])
8972               {
8973                 case 'e':
8974                   if (name[3] == 'd')
8975                   {                               /* tied       */
8976                     return -KEY_tied;
8977                   }
8978
8979                   goto unknown;
8980
8981                 case 'm':
8982                   if (name[3] == 'e')
8983                   {                               /* time       */
8984                     return -KEY_time;
8985                   }
8986
8987                   goto unknown;
8988
8989                 default:
8990                   goto unknown;
8991               }
8992
8993             default:
8994               goto unknown;
8995           }
8996
8997         case 'w':
8998           switch (name[1])
8999           {
9000             case 'a':
9001               switch (name[2])
9002               {
9003                 case 'i':
9004                   if (name[3] == 't')
9005                   {                               /* wait       */
9006                     return -KEY_wait;
9007                   }
9008
9009                   goto unknown;
9010
9011                 case 'r':
9012                   if (name[3] == 'n')
9013                   {                               /* warn       */
9014                     return -KEY_warn;
9015                   }
9016
9017                   goto unknown;
9018
9019                 default:
9020                   goto unknown;
9021               }
9022
9023             case 'h':
9024               if (name[2] == 'e' &&
9025                   name[3] == 'n')
9026               {                                   /* when       */
9027                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
9028               }
9029
9030               goto unknown;
9031
9032             default:
9033               goto unknown;
9034           }
9035
9036         default:
9037           goto unknown;
9038       }
9039
9040     case 5: /* 39 tokens of length 5 */
9041       switch (name[0])
9042       {
9043         case 'B':
9044           if (name[1] == 'E' &&
9045               name[2] == 'G' &&
9046               name[3] == 'I' &&
9047               name[4] == 'N')
9048           {                                       /* BEGIN      */
9049             return KEY_BEGIN;
9050           }
9051
9052           goto unknown;
9053
9054         case 'C':
9055           if (name[1] == 'H' &&
9056               name[2] == 'E' &&
9057               name[3] == 'C' &&
9058               name[4] == 'K')
9059           {                                       /* CHECK      */
9060             return KEY_CHECK;
9061           }
9062
9063           goto unknown;
9064
9065         case 'a':
9066           switch (name[1])
9067           {
9068             case 'l':
9069               if (name[2] == 'a' &&
9070                   name[3] == 'r' &&
9071                   name[4] == 'm')
9072               {                                   /* alarm      */
9073                 return -KEY_alarm;
9074               }
9075
9076               goto unknown;
9077
9078             case 't':
9079               if (name[2] == 'a' &&
9080                   name[3] == 'n' &&
9081                   name[4] == '2')
9082               {                                   /* atan2      */
9083                 return -KEY_atan2;
9084               }
9085
9086               goto unknown;
9087
9088             default:
9089               goto unknown;
9090           }
9091
9092         case 'b':
9093           switch (name[1])
9094           {
9095             case 'l':
9096               if (name[2] == 'e' &&
9097                   name[3] == 's' &&
9098                   name[4] == 's')
9099               {                                   /* bless      */
9100                 return -KEY_bless;
9101               }
9102
9103               goto unknown;
9104
9105             case 'r':
9106               if (name[2] == 'e' &&
9107                   name[3] == 'a' &&
9108                   name[4] == 'k')
9109               {                                   /* break      */
9110                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
9111               }
9112
9113               goto unknown;
9114
9115             default:
9116               goto unknown;
9117           }
9118
9119         case 'c':
9120           switch (name[1])
9121           {
9122             case 'h':
9123               switch (name[2])
9124               {
9125                 case 'd':
9126                   if (name[3] == 'i' &&
9127                       name[4] == 'r')
9128                   {                               /* chdir      */
9129                     return -KEY_chdir;
9130                   }
9131
9132                   goto unknown;
9133
9134                 case 'm':
9135                   if (name[3] == 'o' &&
9136                       name[4] == 'd')
9137                   {                               /* chmod      */
9138                     return -KEY_chmod;
9139                   }
9140
9141                   goto unknown;
9142
9143                 case 'o':
9144                   switch (name[3])
9145                   {
9146                     case 'm':
9147                       if (name[4] == 'p')
9148                       {                           /* chomp      */
9149                         return -KEY_chomp;
9150                       }
9151
9152                       goto unknown;
9153
9154                     case 'w':
9155                       if (name[4] == 'n')
9156                       {                           /* chown      */
9157                         return -KEY_chown;
9158                       }
9159
9160                       goto unknown;
9161
9162                     default:
9163                       goto unknown;
9164                   }
9165
9166                 default:
9167                   goto unknown;
9168               }
9169
9170             case 'l':
9171               if (name[2] == 'o' &&
9172                   name[3] == 's' &&
9173                   name[4] == 'e')
9174               {                                   /* close      */
9175                 return -KEY_close;
9176               }
9177
9178               goto unknown;
9179
9180             case 'r':
9181               if (name[2] == 'y' &&
9182                   name[3] == 'p' &&
9183                   name[4] == 't')
9184               {                                   /* crypt      */
9185                 return -KEY_crypt;
9186               }
9187
9188               goto unknown;
9189
9190             default:
9191               goto unknown;
9192           }
9193
9194         case 'e':
9195           if (name[1] == 'l' &&
9196               name[2] == 's' &&
9197               name[3] == 'i' &&
9198               name[4] == 'f')
9199           {                                       /* elsif      */
9200             return KEY_elsif;
9201           }
9202
9203           goto unknown;
9204
9205         case 'f':
9206           switch (name[1])
9207           {
9208             case 'c':
9209               if (name[2] == 'n' &&
9210                   name[3] == 't' &&
9211                   name[4] == 'l')
9212               {                                   /* fcntl      */
9213                 return -KEY_fcntl;
9214               }
9215
9216               goto unknown;
9217
9218             case 'l':
9219               if (name[2] == 'o' &&
9220                   name[3] == 'c' &&
9221                   name[4] == 'k')
9222               {                                   /* flock      */
9223                 return -KEY_flock;
9224               }
9225
9226               goto unknown;
9227
9228             default:
9229               goto unknown;
9230           }
9231
9232         case 'g':
9233           if (name[1] == 'i' &&
9234               name[2] == 'v' &&
9235               name[3] == 'e' &&
9236               name[4] == 'n')
9237           {                                       /* given      */
9238             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
9239           }
9240
9241           goto unknown;
9242
9243         case 'i':
9244           switch (name[1])
9245           {
9246             case 'n':
9247               if (name[2] == 'd' &&
9248                   name[3] == 'e' &&
9249                   name[4] == 'x')
9250               {                                   /* index      */
9251                 return -KEY_index;
9252               }
9253
9254               goto unknown;
9255
9256             case 'o':
9257               if (name[2] == 'c' &&
9258                   name[3] == 't' &&
9259                   name[4] == 'l')
9260               {                                   /* ioctl      */
9261                 return -KEY_ioctl;
9262               }
9263
9264               goto unknown;
9265
9266             default:
9267               goto unknown;
9268           }
9269
9270         case 'l':
9271           switch (name[1])
9272           {
9273             case 'o':
9274               if (name[2] == 'c' &&
9275                   name[3] == 'a' &&
9276                   name[4] == 'l')
9277               {                                   /* local      */
9278                 return KEY_local;
9279               }
9280
9281               goto unknown;
9282
9283             case 's':
9284               if (name[2] == 't' &&
9285                   name[3] == 'a' &&
9286                   name[4] == 't')
9287               {                                   /* lstat      */
9288                 return -KEY_lstat;
9289               }
9290
9291               goto unknown;
9292
9293             default:
9294               goto unknown;
9295           }
9296
9297         case 'm':
9298           if (name[1] == 'k' &&
9299               name[2] == 'd' &&
9300               name[3] == 'i' &&
9301               name[4] == 'r')
9302           {                                       /* mkdir      */
9303             return -KEY_mkdir;
9304           }
9305
9306           goto unknown;
9307
9308         case 'p':
9309           if (name[1] == 'r' &&
9310               name[2] == 'i' &&
9311               name[3] == 'n' &&
9312               name[4] == 't')
9313           {                                       /* print      */
9314             return KEY_print;
9315           }
9316
9317           goto unknown;
9318
9319         case 'r':
9320           switch (name[1])
9321           {
9322             case 'e':
9323               if (name[2] == 's' &&
9324                   name[3] == 'e' &&
9325                   name[4] == 't')
9326               {                                   /* reset      */
9327                 return -KEY_reset;
9328               }
9329
9330               goto unknown;
9331
9332             case 'm':
9333               if (name[2] == 'd' &&
9334                   name[3] == 'i' &&
9335                   name[4] == 'r')
9336               {                                   /* rmdir      */
9337                 return -KEY_rmdir;
9338               }
9339
9340               goto unknown;
9341
9342             default:
9343               goto unknown;
9344           }
9345
9346         case 's':
9347           switch (name[1])
9348           {
9349             case 'e':
9350               if (name[2] == 'm' &&
9351                   name[3] == 'o' &&
9352                   name[4] == 'p')
9353               {                                   /* semop      */
9354                 return -KEY_semop;
9355               }
9356
9357               goto unknown;
9358
9359             case 'h':
9360               if (name[2] == 'i' &&
9361                   name[3] == 'f' &&
9362                   name[4] == 't')
9363               {                                   /* shift      */
9364                 return -KEY_shift;
9365               }
9366
9367               goto unknown;
9368
9369             case 'l':
9370               if (name[2] == 'e' &&
9371                   name[3] == 'e' &&
9372                   name[4] == 'p')
9373               {                                   /* sleep      */
9374                 return -KEY_sleep;
9375               }
9376
9377               goto unknown;
9378
9379             case 'p':
9380               if (name[2] == 'l' &&
9381                   name[3] == 'i' &&
9382                   name[4] == 't')
9383               {                                   /* split      */
9384                 return KEY_split;
9385               }
9386
9387               goto unknown;
9388
9389             case 'r':
9390               if (name[2] == 'a' &&
9391                   name[3] == 'n' &&
9392                   name[4] == 'd')
9393               {                                   /* srand      */
9394                 return -KEY_srand;
9395               }
9396
9397               goto unknown;
9398
9399             case 't':
9400               switch (name[2])
9401               {
9402                 case 'a':
9403                   if (name[3] == 't' &&
9404                       name[4] == 'e')
9405                   {                               /* state      */
9406                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9407                   }
9408
9409                   goto unknown;
9410
9411                 case 'u':
9412                   if (name[3] == 'd' &&
9413                       name[4] == 'y')
9414                   {                               /* study      */
9415                     return KEY_study;
9416                   }
9417
9418                   goto unknown;
9419
9420                 default:
9421                   goto unknown;
9422               }
9423
9424             default:
9425               goto unknown;
9426           }
9427
9428         case 't':
9429           if (name[1] == 'i' &&
9430               name[2] == 'm' &&
9431               name[3] == 'e' &&
9432               name[4] == 's')
9433           {                                       /* times      */
9434             return -KEY_times;
9435           }
9436
9437           goto unknown;
9438
9439         case 'u':
9440           switch (name[1])
9441           {
9442             case 'm':
9443               if (name[2] == 'a' &&
9444                   name[3] == 's' &&
9445                   name[4] == 'k')
9446               {                                   /* umask      */
9447                 return -KEY_umask;
9448               }
9449
9450               goto unknown;
9451
9452             case 'n':
9453               switch (name[2])
9454               {
9455                 case 'd':
9456                   if (name[3] == 'e' &&
9457                       name[4] == 'f')
9458                   {                               /* undef      */
9459                     return KEY_undef;
9460                   }
9461
9462                   goto unknown;
9463
9464                 case 't':
9465                   if (name[3] == 'i')
9466                   {
9467                     switch (name[4])
9468                     {
9469                       case 'e':
9470                         {                         /* untie      */
9471                           return -KEY_untie;
9472                         }
9473
9474                       case 'l':
9475                         {                         /* until      */
9476                           return KEY_until;
9477                         }
9478
9479                       default:
9480                         goto unknown;
9481                     }
9482                   }
9483
9484                   goto unknown;
9485
9486                 default:
9487                   goto unknown;
9488               }
9489
9490             case 't':
9491               if (name[2] == 'i' &&
9492                   name[3] == 'm' &&
9493                   name[4] == 'e')
9494               {                                   /* utime      */
9495                 return -KEY_utime;
9496               }
9497
9498               goto unknown;
9499
9500             default:
9501               goto unknown;
9502           }
9503
9504         case 'w':
9505           switch (name[1])
9506           {
9507             case 'h':
9508               if (name[2] == 'i' &&
9509                   name[3] == 'l' &&
9510                   name[4] == 'e')
9511               {                                   /* while      */
9512                 return KEY_while;
9513               }
9514
9515               goto unknown;
9516
9517             case 'r':
9518               if (name[2] == 'i' &&
9519                   name[3] == 't' &&
9520                   name[4] == 'e')
9521               {                                   /* write      */
9522                 return -KEY_write;
9523               }
9524
9525               goto unknown;
9526
9527             default:
9528               goto unknown;
9529           }
9530
9531         default:
9532           goto unknown;
9533       }
9534
9535     case 6: /* 33 tokens of length 6 */
9536       switch (name[0])
9537       {
9538         case 'a':
9539           if (name[1] == 'c' &&
9540               name[2] == 'c' &&
9541               name[3] == 'e' &&
9542               name[4] == 'p' &&
9543               name[5] == 't')
9544           {                                       /* accept     */
9545             return -KEY_accept;
9546           }
9547
9548           goto unknown;
9549
9550         case 'c':
9551           switch (name[1])
9552           {
9553             case 'a':
9554               if (name[2] == 'l' &&
9555                   name[3] == 'l' &&
9556                   name[4] == 'e' &&
9557                   name[5] == 'r')
9558               {                                   /* caller     */
9559                 return -KEY_caller;
9560               }
9561
9562               goto unknown;
9563
9564             case 'h':
9565               if (name[2] == 'r' &&
9566                   name[3] == 'o' &&
9567                   name[4] == 'o' &&
9568                   name[5] == 't')
9569               {                                   /* chroot     */
9570                 return -KEY_chroot;
9571               }
9572
9573               goto unknown;
9574
9575             default:
9576               goto unknown;
9577           }
9578
9579         case 'd':
9580           if (name[1] == 'e' &&
9581               name[2] == 'l' &&
9582               name[3] == 'e' &&
9583               name[4] == 't' &&
9584               name[5] == 'e')
9585           {                                       /* delete     */
9586             return KEY_delete;
9587           }
9588
9589           goto unknown;
9590
9591         case 'e':
9592           switch (name[1])
9593           {
9594             case 'l':
9595               if (name[2] == 's' &&
9596                   name[3] == 'e' &&
9597                   name[4] == 'i' &&
9598                   name[5] == 'f')
9599               {                                   /* elseif     */
9600                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9601               }
9602
9603               goto unknown;
9604
9605             case 'x':
9606               if (name[2] == 'i' &&
9607                   name[3] == 's' &&
9608                   name[4] == 't' &&
9609                   name[5] == 's')
9610               {                                   /* exists     */
9611                 return KEY_exists;
9612               }
9613
9614               goto unknown;
9615
9616             default:
9617               goto unknown;
9618           }
9619
9620         case 'f':
9621           switch (name[1])
9622           {
9623             case 'i':
9624               if (name[2] == 'l' &&
9625                   name[3] == 'e' &&
9626                   name[4] == 'n' &&
9627                   name[5] == 'o')
9628               {                                   /* fileno     */
9629                 return -KEY_fileno;
9630               }
9631
9632               goto unknown;
9633
9634             case 'o':
9635               if (name[2] == 'r' &&
9636                   name[3] == 'm' &&
9637                   name[4] == 'a' &&
9638                   name[5] == 't')
9639               {                                   /* format     */
9640                 return KEY_format;
9641               }
9642
9643               goto unknown;
9644
9645             default:
9646               goto unknown;
9647           }
9648
9649         case 'g':
9650           if (name[1] == 'm' &&
9651               name[2] == 't' &&
9652               name[3] == 'i' &&
9653               name[4] == 'm' &&
9654               name[5] == 'e')
9655           {                                       /* gmtime     */
9656             return -KEY_gmtime;
9657           }
9658
9659           goto unknown;
9660
9661         case 'l':
9662           switch (name[1])
9663           {
9664             case 'e':
9665               if (name[2] == 'n' &&
9666                   name[3] == 'g' &&
9667                   name[4] == 't' &&
9668                   name[5] == 'h')
9669               {                                   /* length     */
9670                 return -KEY_length;
9671               }
9672
9673               goto unknown;
9674
9675             case 'i':
9676               if (name[2] == 's' &&
9677                   name[3] == 't' &&
9678                   name[4] == 'e' &&
9679                   name[5] == 'n')
9680               {                                   /* listen     */
9681                 return -KEY_listen;
9682               }
9683
9684               goto unknown;
9685
9686             default:
9687               goto unknown;
9688           }
9689
9690         case 'm':
9691           if (name[1] == 's' &&
9692               name[2] == 'g')
9693           {
9694             switch (name[3])
9695             {
9696               case 'c':
9697                 if (name[4] == 't' &&
9698                     name[5] == 'l')
9699                 {                                 /* msgctl     */
9700                   return -KEY_msgctl;
9701                 }
9702
9703                 goto unknown;
9704
9705               case 'g':
9706                 if (name[4] == 'e' &&
9707                     name[5] == 't')
9708                 {                                 /* msgget     */
9709                   return -KEY_msgget;
9710                 }
9711
9712                 goto unknown;
9713
9714               case 'r':
9715                 if (name[4] == 'c' &&
9716                     name[5] == 'v')
9717                 {                                 /* msgrcv     */
9718                   return -KEY_msgrcv;
9719                 }
9720
9721                 goto unknown;
9722
9723               case 's':
9724                 if (name[4] == 'n' &&
9725                     name[5] == 'd')
9726                 {                                 /* msgsnd     */
9727                   return -KEY_msgsnd;
9728                 }
9729
9730                 goto unknown;
9731
9732               default:
9733                 goto unknown;
9734             }
9735           }
9736
9737           goto unknown;
9738
9739         case 'p':
9740           if (name[1] == 'r' &&
9741               name[2] == 'i' &&
9742               name[3] == 'n' &&
9743               name[4] == 't' &&
9744               name[5] == 'f')
9745           {                                       /* printf     */
9746             return KEY_printf;
9747           }
9748
9749           goto unknown;
9750
9751         case 'r':
9752           switch (name[1])
9753           {
9754             case 'e':
9755               switch (name[2])
9756               {
9757                 case 'n':
9758                   if (name[3] == 'a' &&
9759                       name[4] == 'm' &&
9760                       name[5] == 'e')
9761                   {                               /* rename     */
9762                     return -KEY_rename;
9763                   }
9764
9765                   goto unknown;
9766
9767                 case 't':
9768                   if (name[3] == 'u' &&
9769                       name[4] == 'r' &&
9770                       name[5] == 'n')
9771                   {                               /* return     */
9772                     return KEY_return;
9773                   }
9774
9775                   goto unknown;
9776
9777                 default:
9778                   goto unknown;
9779               }
9780
9781             case 'i':
9782               if (name[2] == 'n' &&
9783                   name[3] == 'd' &&
9784                   name[4] == 'e' &&
9785                   name[5] == 'x')
9786               {                                   /* rindex     */
9787                 return -KEY_rindex;
9788               }
9789
9790               goto unknown;
9791
9792             default:
9793               goto unknown;
9794           }
9795
9796         case 's':
9797           switch (name[1])
9798           {
9799             case 'c':
9800               if (name[2] == 'a' &&
9801                   name[3] == 'l' &&
9802                   name[4] == 'a' &&
9803                   name[5] == 'r')
9804               {                                   /* scalar     */
9805                 return KEY_scalar;
9806               }
9807
9808               goto unknown;
9809
9810             case 'e':
9811               switch (name[2])
9812               {
9813                 case 'l':
9814                   if (name[3] == 'e' &&
9815                       name[4] == 'c' &&
9816                       name[5] == 't')
9817                   {                               /* select     */
9818                     return -KEY_select;
9819                   }
9820
9821                   goto unknown;
9822
9823                 case 'm':
9824                   switch (name[3])
9825                   {
9826                     case 'c':
9827                       if (name[4] == 't' &&
9828                           name[5] == 'l')
9829                       {                           /* semctl     */
9830                         return -KEY_semctl;
9831                       }
9832
9833                       goto unknown;
9834
9835                     case 'g':
9836                       if (name[4] == 'e' &&
9837                           name[5] == 't')
9838                       {                           /* semget     */
9839                         return -KEY_semget;
9840                       }
9841
9842                       goto unknown;
9843
9844                     default:
9845                       goto unknown;
9846                   }
9847
9848                 default:
9849                   goto unknown;
9850               }
9851
9852             case 'h':
9853               if (name[2] == 'm')
9854               {
9855                 switch (name[3])
9856                 {
9857                   case 'c':
9858                     if (name[4] == 't' &&
9859                         name[5] == 'l')
9860                     {                             /* shmctl     */
9861                       return -KEY_shmctl;
9862                     }
9863
9864                     goto unknown;
9865
9866                   case 'g':
9867                     if (name[4] == 'e' &&
9868                         name[5] == 't')
9869                     {                             /* shmget     */
9870                       return -KEY_shmget;
9871                     }
9872
9873                     goto unknown;
9874
9875                   default:
9876                     goto unknown;
9877                 }
9878               }
9879
9880               goto unknown;
9881
9882             case 'o':
9883               if (name[2] == 'c' &&
9884                   name[3] == 'k' &&
9885                   name[4] == 'e' &&
9886                   name[5] == 't')
9887               {                                   /* socket     */
9888                 return -KEY_socket;
9889               }
9890
9891               goto unknown;
9892
9893             case 'p':
9894               if (name[2] == 'l' &&
9895                   name[3] == 'i' &&
9896                   name[4] == 'c' &&
9897                   name[5] == 'e')
9898               {                                   /* splice     */
9899                 return -KEY_splice;
9900               }
9901
9902               goto unknown;
9903
9904             case 'u':
9905               if (name[2] == 'b' &&
9906                   name[3] == 's' &&
9907                   name[4] == 't' &&
9908                   name[5] == 'r')
9909               {                                   /* substr     */
9910                 return -KEY_substr;
9911               }
9912
9913               goto unknown;
9914
9915             case 'y':
9916               if (name[2] == 's' &&
9917                   name[3] == 't' &&
9918                   name[4] == 'e' &&
9919                   name[5] == 'm')
9920               {                                   /* system     */
9921                 return -KEY_system;
9922               }
9923
9924               goto unknown;
9925
9926             default:
9927               goto unknown;
9928           }
9929
9930         case 'u':
9931           if (name[1] == 'n')
9932           {
9933             switch (name[2])
9934             {
9935               case 'l':
9936                 switch (name[3])
9937                 {
9938                   case 'e':
9939                     if (name[4] == 's' &&
9940                         name[5] == 's')
9941                     {                             /* unless     */
9942                       return KEY_unless;
9943                     }
9944
9945                     goto unknown;
9946
9947                   case 'i':
9948                     if (name[4] == 'n' &&
9949                         name[5] == 'k')
9950                     {                             /* unlink     */
9951                       return -KEY_unlink;
9952                     }
9953
9954                     goto unknown;
9955
9956                   default:
9957                     goto unknown;
9958                 }
9959
9960               case 'p':
9961                 if (name[3] == 'a' &&
9962                     name[4] == 'c' &&
9963                     name[5] == 'k')
9964                 {                                 /* unpack     */
9965                   return -KEY_unpack;
9966                 }
9967
9968                 goto unknown;
9969
9970               default:
9971                 goto unknown;
9972             }
9973           }
9974
9975           goto unknown;
9976
9977         case 'v':
9978           if (name[1] == 'a' &&
9979               name[2] == 'l' &&
9980               name[3] == 'u' &&
9981               name[4] == 'e' &&
9982               name[5] == 's')
9983           {                                       /* values     */
9984             return -KEY_values;
9985           }
9986
9987           goto unknown;
9988
9989         default:
9990           goto unknown;
9991       }
9992
9993     case 7: /* 29 tokens of length 7 */
9994       switch (name[0])
9995       {
9996         case 'D':
9997           if (name[1] == 'E' &&
9998               name[2] == 'S' &&
9999               name[3] == 'T' &&
10000               name[4] == 'R' &&
10001               name[5] == 'O' &&
10002               name[6] == 'Y')
10003           {                                       /* DESTROY    */
10004             return KEY_DESTROY;
10005           }
10006
10007           goto unknown;
10008
10009         case '_':
10010           if (name[1] == '_' &&
10011               name[2] == 'E' &&
10012               name[3] == 'N' &&
10013               name[4] == 'D' &&
10014               name[5] == '_' &&
10015               name[6] == '_')
10016           {                                       /* __END__    */
10017             return KEY___END__;
10018           }
10019
10020           goto unknown;
10021
10022         case 'b':
10023           if (name[1] == 'i' &&
10024               name[2] == 'n' &&
10025               name[3] == 'm' &&
10026               name[4] == 'o' &&
10027               name[5] == 'd' &&
10028               name[6] == 'e')
10029           {                                       /* binmode    */
10030             return -KEY_binmode;
10031           }
10032
10033           goto unknown;
10034
10035         case 'c':
10036           if (name[1] == 'o' &&
10037               name[2] == 'n' &&
10038               name[3] == 'n' &&
10039               name[4] == 'e' &&
10040               name[5] == 'c' &&
10041               name[6] == 't')
10042           {                                       /* connect    */
10043             return -KEY_connect;
10044           }
10045
10046           goto unknown;
10047
10048         case 'd':
10049           switch (name[1])
10050           {
10051             case 'b':
10052               if (name[2] == 'm' &&
10053                   name[3] == 'o' &&
10054                   name[4] == 'p' &&
10055                   name[5] == 'e' &&
10056                   name[6] == 'n')
10057               {                                   /* dbmopen    */
10058                 return -KEY_dbmopen;
10059               }
10060
10061               goto unknown;
10062
10063             case 'e':
10064               if (name[2] == 'f')
10065               {
10066                 switch (name[3])
10067                 {
10068                   case 'a':
10069                     if (name[4] == 'u' &&
10070                         name[5] == 'l' &&
10071                         name[6] == 't')
10072                     {                             /* default    */
10073                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
10074                     }
10075
10076                     goto unknown;
10077
10078                   case 'i':
10079                     if (name[4] == 'n' &&
10080                         name[5] == 'e' &&
10081                         name[6] == 'd')
10082                     {                             /* defined    */
10083                       return KEY_defined;
10084                     }
10085
10086                     goto unknown;
10087
10088                   default:
10089                     goto unknown;
10090                 }
10091               }
10092
10093               goto unknown;
10094
10095             default:
10096               goto unknown;
10097           }
10098
10099         case 'f':
10100           if (name[1] == 'o' &&
10101               name[2] == 'r' &&
10102               name[3] == 'e' &&
10103               name[4] == 'a' &&
10104               name[5] == 'c' &&
10105               name[6] == 'h')
10106           {                                       /* foreach    */
10107             return KEY_foreach;
10108           }
10109
10110           goto unknown;
10111
10112         case 'g':
10113           if (name[1] == 'e' &&
10114               name[2] == 't' &&
10115               name[3] == 'p')
10116           {
10117             switch (name[4])
10118             {
10119               case 'g':
10120                 if (name[5] == 'r' &&
10121                     name[6] == 'p')
10122                 {                                 /* getpgrp    */
10123                   return -KEY_getpgrp;
10124                 }
10125
10126                 goto unknown;
10127
10128               case 'p':
10129                 if (name[5] == 'i' &&
10130                     name[6] == 'd')
10131                 {                                 /* getppid    */
10132                   return -KEY_getppid;
10133                 }
10134
10135                 goto unknown;
10136
10137               default:
10138                 goto unknown;
10139             }
10140           }
10141
10142           goto unknown;
10143
10144         case 'l':
10145           if (name[1] == 'c' &&
10146               name[2] == 'f' &&
10147               name[3] == 'i' &&
10148               name[4] == 'r' &&
10149               name[5] == 's' &&
10150               name[6] == 't')
10151           {                                       /* lcfirst    */
10152             return -KEY_lcfirst;
10153           }
10154
10155           goto unknown;
10156
10157         case 'o':
10158           if (name[1] == 'p' &&
10159               name[2] == 'e' &&
10160               name[3] == 'n' &&
10161               name[4] == 'd' &&
10162               name[5] == 'i' &&
10163               name[6] == 'r')
10164           {                                       /* opendir    */
10165             return -KEY_opendir;
10166           }
10167
10168           goto unknown;
10169
10170         case 'p':
10171           if (name[1] == 'a' &&
10172               name[2] == 'c' &&
10173               name[3] == 'k' &&
10174               name[4] == 'a' &&
10175               name[5] == 'g' &&
10176               name[6] == 'e')
10177           {                                       /* package    */
10178             return KEY_package;
10179           }
10180
10181           goto unknown;
10182
10183         case 'r':
10184           if (name[1] == 'e')
10185           {
10186             switch (name[2])
10187             {
10188               case 'a':
10189                 if (name[3] == 'd' &&
10190                     name[4] == 'd' &&
10191                     name[5] == 'i' &&
10192                     name[6] == 'r')
10193                 {                                 /* readdir    */
10194                   return -KEY_readdir;
10195                 }
10196
10197                 goto unknown;
10198
10199               case 'q':
10200                 if (name[3] == 'u' &&
10201                     name[4] == 'i' &&
10202                     name[5] == 'r' &&
10203                     name[6] == 'e')
10204                 {                                 /* require    */
10205                   return KEY_require;
10206                 }
10207
10208                 goto unknown;
10209
10210               case 'v':
10211                 if (name[3] == 'e' &&
10212                     name[4] == 'r' &&
10213                     name[5] == 's' &&
10214                     name[6] == 'e')
10215                 {                                 /* reverse    */
10216                   return -KEY_reverse;
10217                 }
10218
10219                 goto unknown;
10220
10221               default:
10222                 goto unknown;
10223             }
10224           }
10225
10226           goto unknown;
10227
10228         case 's':
10229           switch (name[1])
10230           {
10231             case 'e':
10232               switch (name[2])
10233               {
10234                 case 'e':
10235                   if (name[3] == 'k' &&
10236                       name[4] == 'd' &&
10237                       name[5] == 'i' &&
10238                       name[6] == 'r')
10239                   {                               /* seekdir    */
10240                     return -KEY_seekdir;
10241                   }
10242
10243                   goto unknown;
10244
10245                 case 't':
10246                   if (name[3] == 'p' &&
10247                       name[4] == 'g' &&
10248                       name[5] == 'r' &&
10249                       name[6] == 'p')
10250                   {                               /* setpgrp    */
10251                     return -KEY_setpgrp;
10252                   }
10253
10254                   goto unknown;
10255
10256                 default:
10257                   goto unknown;
10258               }
10259
10260             case 'h':
10261               if (name[2] == 'm' &&
10262                   name[3] == 'r' &&
10263                   name[4] == 'e' &&
10264                   name[5] == 'a' &&
10265                   name[6] == 'd')
10266               {                                   /* shmread    */
10267                 return -KEY_shmread;
10268               }
10269
10270               goto unknown;
10271
10272             case 'p':
10273               if (name[2] == 'r' &&
10274                   name[3] == 'i' &&
10275                   name[4] == 'n' &&
10276                   name[5] == 't' &&
10277                   name[6] == 'f')
10278               {                                   /* sprintf    */
10279                 return -KEY_sprintf;
10280               }
10281
10282               goto unknown;
10283
10284             case 'y':
10285               switch (name[2])
10286               {
10287                 case 'm':
10288                   if (name[3] == 'l' &&
10289                       name[4] == 'i' &&
10290                       name[5] == 'n' &&
10291                       name[6] == 'k')
10292                   {                               /* symlink    */
10293                     return -KEY_symlink;
10294                   }
10295
10296                   goto unknown;
10297
10298                 case 's':
10299                   switch (name[3])
10300                   {
10301                     case 'c':
10302                       if (name[4] == 'a' &&
10303                           name[5] == 'l' &&
10304                           name[6] == 'l')
10305                       {                           /* syscall    */
10306                         return -KEY_syscall;
10307                       }
10308
10309                       goto unknown;
10310
10311                     case 'o':
10312                       if (name[4] == 'p' &&
10313                           name[5] == 'e' &&
10314                           name[6] == 'n')
10315                       {                           /* sysopen    */
10316                         return -KEY_sysopen;
10317                       }
10318
10319                       goto unknown;
10320
10321                     case 'r':
10322                       if (name[4] == 'e' &&
10323                           name[5] == 'a' &&
10324                           name[6] == 'd')
10325                       {                           /* sysread    */
10326                         return -KEY_sysread;
10327                       }
10328
10329                       goto unknown;
10330
10331                     case 's':
10332                       if (name[4] == 'e' &&
10333                           name[5] == 'e' &&
10334                           name[6] == 'k')
10335                       {                           /* sysseek    */
10336                         return -KEY_sysseek;
10337                       }
10338
10339                       goto unknown;
10340
10341                     default:
10342                       goto unknown;
10343                   }
10344
10345                 default:
10346                   goto unknown;
10347               }
10348
10349             default:
10350               goto unknown;
10351           }
10352
10353         case 't':
10354           if (name[1] == 'e' &&
10355               name[2] == 'l' &&
10356               name[3] == 'l' &&
10357               name[4] == 'd' &&
10358               name[5] == 'i' &&
10359               name[6] == 'r')
10360           {                                       /* telldir    */
10361             return -KEY_telldir;
10362           }
10363
10364           goto unknown;
10365
10366         case 'u':
10367           switch (name[1])
10368           {
10369             case 'c':
10370               if (name[2] == 'f' &&
10371                   name[3] == 'i' &&
10372                   name[4] == 'r' &&
10373                   name[5] == 's' &&
10374                   name[6] == 't')
10375               {                                   /* ucfirst    */
10376                 return -KEY_ucfirst;
10377               }
10378
10379               goto unknown;
10380
10381             case 'n':
10382               if (name[2] == 's' &&
10383                   name[3] == 'h' &&
10384                   name[4] == 'i' &&
10385                   name[5] == 'f' &&
10386                   name[6] == 't')
10387               {                                   /* unshift    */
10388                 return -KEY_unshift;
10389               }
10390
10391               goto unknown;
10392
10393             default:
10394               goto unknown;
10395           }
10396
10397         case 'w':
10398           if (name[1] == 'a' &&
10399               name[2] == 'i' &&
10400               name[3] == 't' &&
10401               name[4] == 'p' &&
10402               name[5] == 'i' &&
10403               name[6] == 'd')
10404           {                                       /* waitpid    */
10405             return -KEY_waitpid;
10406           }
10407
10408           goto unknown;
10409
10410         default:
10411           goto unknown;
10412       }
10413
10414     case 8: /* 26 tokens of length 8 */
10415       switch (name[0])
10416       {
10417         case 'A':
10418           if (name[1] == 'U' &&
10419               name[2] == 'T' &&
10420               name[3] == 'O' &&
10421               name[4] == 'L' &&
10422               name[5] == 'O' &&
10423               name[6] == 'A' &&
10424               name[7] == 'D')
10425           {                                       /* AUTOLOAD   */
10426             return KEY_AUTOLOAD;
10427           }
10428
10429           goto unknown;
10430
10431         case '_':
10432           if (name[1] == '_')
10433           {
10434             switch (name[2])
10435             {
10436               case 'D':
10437                 if (name[3] == 'A' &&
10438                     name[4] == 'T' &&
10439                     name[5] == 'A' &&
10440                     name[6] == '_' &&
10441                     name[7] == '_')
10442                 {                                 /* __DATA__   */
10443                   return KEY___DATA__;
10444                 }
10445
10446                 goto unknown;
10447
10448               case 'F':
10449                 if (name[3] == 'I' &&
10450                     name[4] == 'L' &&
10451                     name[5] == 'E' &&
10452                     name[6] == '_' &&
10453                     name[7] == '_')
10454                 {                                 /* __FILE__   */
10455                   return -KEY___FILE__;
10456                 }
10457
10458                 goto unknown;
10459
10460               case 'L':
10461                 if (name[3] == 'I' &&
10462                     name[4] == 'N' &&
10463                     name[5] == 'E' &&
10464                     name[6] == '_' &&
10465                     name[7] == '_')
10466                 {                                 /* __LINE__   */
10467                   return -KEY___LINE__;
10468                 }
10469
10470                 goto unknown;
10471
10472               default:
10473                 goto unknown;
10474             }
10475           }
10476
10477           goto unknown;
10478
10479         case 'c':
10480           switch (name[1])
10481           {
10482             case 'l':
10483               if (name[2] == 'o' &&
10484                   name[3] == 's' &&
10485                   name[4] == 'e' &&
10486                   name[5] == 'd' &&
10487                   name[6] == 'i' &&
10488                   name[7] == 'r')
10489               {                                   /* closedir   */
10490                 return -KEY_closedir;
10491               }
10492
10493               goto unknown;
10494
10495             case 'o':
10496               if (name[2] == 'n' &&
10497                   name[3] == 't' &&
10498                   name[4] == 'i' &&
10499                   name[5] == 'n' &&
10500                   name[6] == 'u' &&
10501                   name[7] == 'e')
10502               {                                   /* continue   */
10503                 return -KEY_continue;
10504               }
10505
10506               goto unknown;
10507
10508             default:
10509               goto unknown;
10510           }
10511
10512         case 'd':
10513           if (name[1] == 'b' &&
10514               name[2] == 'm' &&
10515               name[3] == 'c' &&
10516               name[4] == 'l' &&
10517               name[5] == 'o' &&
10518               name[6] == 's' &&
10519               name[7] == 'e')
10520           {                                       /* dbmclose   */
10521             return -KEY_dbmclose;
10522           }
10523
10524           goto unknown;
10525
10526         case 'e':
10527           if (name[1] == 'n' &&
10528               name[2] == 'd')
10529           {
10530             switch (name[3])
10531             {
10532               case 'g':
10533                 if (name[4] == 'r' &&
10534                     name[5] == 'e' &&
10535                     name[6] == 'n' &&
10536                     name[7] == 't')
10537                 {                                 /* endgrent   */
10538                   return -KEY_endgrent;
10539                 }
10540
10541                 goto unknown;
10542
10543               case 'p':
10544                 if (name[4] == 'w' &&
10545                     name[5] == 'e' &&
10546                     name[6] == 'n' &&
10547                     name[7] == 't')
10548                 {                                 /* endpwent   */
10549                   return -KEY_endpwent;
10550                 }
10551
10552                 goto unknown;
10553
10554               default:
10555                 goto unknown;
10556             }
10557           }
10558
10559           goto unknown;
10560
10561         case 'f':
10562           if (name[1] == 'o' &&
10563               name[2] == 'r' &&
10564               name[3] == 'm' &&
10565               name[4] == 'l' &&
10566               name[5] == 'i' &&
10567               name[6] == 'n' &&
10568               name[7] == 'e')
10569           {                                       /* formline   */
10570             return -KEY_formline;
10571           }
10572
10573           goto unknown;
10574
10575         case 'g':
10576           if (name[1] == 'e' &&
10577               name[2] == 't')
10578           {
10579             switch (name[3])
10580             {
10581               case 'g':
10582                 if (name[4] == 'r')
10583                 {
10584                   switch (name[5])
10585                   {
10586                     case 'e':
10587                       if (name[6] == 'n' &&
10588                           name[7] == 't')
10589                       {                           /* getgrent   */
10590                         return -KEY_getgrent;
10591                       }
10592
10593                       goto unknown;
10594
10595                     case 'g':
10596                       if (name[6] == 'i' &&
10597                           name[7] == 'd')
10598                       {                           /* getgrgid   */
10599                         return -KEY_getgrgid;
10600                       }
10601
10602                       goto unknown;
10603
10604                     case 'n':
10605                       if (name[6] == 'a' &&
10606                           name[7] == 'm')
10607                       {                           /* getgrnam   */
10608                         return -KEY_getgrnam;
10609                       }
10610
10611                       goto unknown;
10612
10613                     default:
10614                       goto unknown;
10615                   }
10616                 }
10617
10618                 goto unknown;
10619
10620               case 'l':
10621                 if (name[4] == 'o' &&
10622                     name[5] == 'g' &&
10623                     name[6] == 'i' &&
10624                     name[7] == 'n')
10625                 {                                 /* getlogin   */
10626                   return -KEY_getlogin;
10627                 }
10628
10629                 goto unknown;
10630
10631               case 'p':
10632                 if (name[4] == 'w')
10633                 {
10634                   switch (name[5])
10635                   {
10636                     case 'e':
10637                       if (name[6] == 'n' &&
10638                           name[7] == 't')
10639                       {                           /* getpwent   */
10640                         return -KEY_getpwent;
10641                       }
10642
10643                       goto unknown;
10644
10645                     case 'n':
10646                       if (name[6] == 'a' &&
10647                           name[7] == 'm')
10648                       {                           /* getpwnam   */
10649                         return -KEY_getpwnam;
10650                       }
10651
10652                       goto unknown;
10653
10654                     case 'u':
10655                       if (name[6] == 'i' &&
10656                           name[7] == 'd')
10657                       {                           /* getpwuid   */
10658                         return -KEY_getpwuid;
10659                       }
10660
10661                       goto unknown;
10662
10663                     default:
10664                       goto unknown;
10665                   }
10666                 }
10667
10668                 goto unknown;
10669
10670               default:
10671                 goto unknown;
10672             }
10673           }
10674
10675           goto unknown;
10676
10677         case 'r':
10678           if (name[1] == 'e' &&
10679               name[2] == 'a' &&
10680               name[3] == 'd')
10681           {
10682             switch (name[4])
10683             {
10684               case 'l':
10685                 if (name[5] == 'i' &&
10686                     name[6] == 'n')
10687                 {
10688                   switch (name[7])
10689                   {
10690                     case 'e':
10691                       {                           /* readline   */
10692                         return -KEY_readline;
10693                       }
10694
10695                     case 'k':
10696                       {                           /* readlink   */
10697                         return -KEY_readlink;
10698                       }
10699
10700                     default:
10701                       goto unknown;
10702                   }
10703                 }
10704
10705                 goto unknown;
10706
10707               case 'p':
10708                 if (name[5] == 'i' &&
10709                     name[6] == 'p' &&
10710                     name[7] == 'e')
10711                 {                                 /* readpipe   */
10712                   return -KEY_readpipe;
10713                 }
10714
10715                 goto unknown;
10716
10717               default:
10718                 goto unknown;
10719             }
10720           }
10721
10722           goto unknown;
10723
10724         case 's':
10725           switch (name[1])
10726           {
10727             case 'e':
10728               if (name[2] == 't')
10729               {
10730                 switch (name[3])
10731                 {
10732                   case 'g':
10733                     if (name[4] == 'r' &&
10734                         name[5] == 'e' &&
10735                         name[6] == 'n' &&
10736                         name[7] == 't')
10737                     {                             /* setgrent   */
10738                       return -KEY_setgrent;
10739                     }
10740
10741                     goto unknown;
10742
10743                   case 'p':
10744                     if (name[4] == 'w' &&
10745                         name[5] == 'e' &&
10746                         name[6] == 'n' &&
10747                         name[7] == 't')
10748                     {                             /* setpwent   */
10749                       return -KEY_setpwent;
10750                     }
10751
10752                     goto unknown;
10753
10754                   default:
10755                     goto unknown;
10756                 }
10757               }
10758
10759               goto unknown;
10760
10761             case 'h':
10762               switch (name[2])
10763               {
10764                 case 'm':
10765                   if (name[3] == 'w' &&
10766                       name[4] == 'r' &&
10767                       name[5] == 'i' &&
10768                       name[6] == 't' &&
10769                       name[7] == 'e')
10770                   {                               /* shmwrite   */
10771                     return -KEY_shmwrite;
10772                   }
10773
10774                   goto unknown;
10775
10776                 case 'u':
10777                   if (name[3] == 't' &&
10778                       name[4] == 'd' &&
10779                       name[5] == 'o' &&
10780                       name[6] == 'w' &&
10781                       name[7] == 'n')
10782                   {                               /* shutdown   */
10783                     return -KEY_shutdown;
10784                   }
10785
10786                   goto unknown;
10787
10788                 default:
10789                   goto unknown;
10790               }
10791
10792             case 'y':
10793               if (name[2] == 's' &&
10794                   name[3] == 'w' &&
10795                   name[4] == 'r' &&
10796                   name[5] == 'i' &&
10797                   name[6] == 't' &&
10798                   name[7] == 'e')
10799               {                                   /* syswrite   */
10800                 return -KEY_syswrite;
10801               }
10802
10803               goto unknown;
10804
10805             default:
10806               goto unknown;
10807           }
10808
10809         case 't':
10810           if (name[1] == 'r' &&
10811               name[2] == 'u' &&
10812               name[3] == 'n' &&
10813               name[4] == 'c' &&
10814               name[5] == 'a' &&
10815               name[6] == 't' &&
10816               name[7] == 'e')
10817           {                                       /* truncate   */
10818             return -KEY_truncate;
10819           }
10820
10821           goto unknown;
10822
10823         default:
10824           goto unknown;
10825       }
10826
10827     case 9: /* 9 tokens of length 9 */
10828       switch (name[0])
10829       {
10830         case 'U':
10831           if (name[1] == 'N' &&
10832               name[2] == 'I' &&
10833               name[3] == 'T' &&
10834               name[4] == 'C' &&
10835               name[5] == 'H' &&
10836               name[6] == 'E' &&
10837               name[7] == 'C' &&
10838               name[8] == 'K')
10839           {                                       /* UNITCHECK  */
10840             return KEY_UNITCHECK;
10841           }
10842
10843           goto unknown;
10844
10845         case 'e':
10846           if (name[1] == 'n' &&
10847               name[2] == 'd' &&
10848               name[3] == 'n' &&
10849               name[4] == 'e' &&
10850               name[5] == 't' &&
10851               name[6] == 'e' &&
10852               name[7] == 'n' &&
10853               name[8] == 't')
10854           {                                       /* endnetent  */
10855             return -KEY_endnetent;
10856           }
10857
10858           goto unknown;
10859
10860         case 'g':
10861           if (name[1] == 'e' &&
10862               name[2] == 't' &&
10863               name[3] == 'n' &&
10864               name[4] == 'e' &&
10865               name[5] == 't' &&
10866               name[6] == 'e' &&
10867               name[7] == 'n' &&
10868               name[8] == 't')
10869           {                                       /* getnetent  */
10870             return -KEY_getnetent;
10871           }
10872
10873           goto unknown;
10874
10875         case 'l':
10876           if (name[1] == 'o' &&
10877               name[2] == 'c' &&
10878               name[3] == 'a' &&
10879               name[4] == 'l' &&
10880               name[5] == 't' &&
10881               name[6] == 'i' &&
10882               name[7] == 'm' &&
10883               name[8] == 'e')
10884           {                                       /* localtime  */
10885             return -KEY_localtime;
10886           }
10887
10888           goto unknown;
10889
10890         case 'p':
10891           if (name[1] == 'r' &&
10892               name[2] == 'o' &&
10893               name[3] == 't' &&
10894               name[4] == 'o' &&
10895               name[5] == 't' &&
10896               name[6] == 'y' &&
10897               name[7] == 'p' &&
10898               name[8] == 'e')
10899           {                                       /* prototype  */
10900             return KEY_prototype;
10901           }
10902
10903           goto unknown;
10904
10905         case 'q':
10906           if (name[1] == 'u' &&
10907               name[2] == 'o' &&
10908               name[3] == 't' &&
10909               name[4] == 'e' &&
10910               name[5] == 'm' &&
10911               name[6] == 'e' &&
10912               name[7] == 't' &&
10913               name[8] == 'a')
10914           {                                       /* quotemeta  */
10915             return -KEY_quotemeta;
10916           }
10917
10918           goto unknown;
10919
10920         case 'r':
10921           if (name[1] == 'e' &&
10922               name[2] == 'w' &&
10923               name[3] == 'i' &&
10924               name[4] == 'n' &&
10925               name[5] == 'd' &&
10926               name[6] == 'd' &&
10927               name[7] == 'i' &&
10928               name[8] == 'r')
10929           {                                       /* rewinddir  */
10930             return -KEY_rewinddir;
10931           }
10932
10933           goto unknown;
10934
10935         case 's':
10936           if (name[1] == 'e' &&
10937               name[2] == 't' &&
10938               name[3] == 'n' &&
10939               name[4] == 'e' &&
10940               name[5] == 't' &&
10941               name[6] == 'e' &&
10942               name[7] == 'n' &&
10943               name[8] == 't')
10944           {                                       /* setnetent  */
10945             return -KEY_setnetent;
10946           }
10947
10948           goto unknown;
10949
10950         case 'w':
10951           if (name[1] == 'a' &&
10952               name[2] == 'n' &&
10953               name[3] == 't' &&
10954               name[4] == 'a' &&
10955               name[5] == 'r' &&
10956               name[6] == 'r' &&
10957               name[7] == 'a' &&
10958               name[8] == 'y')
10959           {                                       /* wantarray  */
10960             return -KEY_wantarray;
10961           }
10962
10963           goto unknown;
10964
10965         default:
10966           goto unknown;
10967       }
10968
10969     case 10: /* 9 tokens of length 10 */
10970       switch (name[0])
10971       {
10972         case 'e':
10973           if (name[1] == 'n' &&
10974               name[2] == 'd')
10975           {
10976             switch (name[3])
10977             {
10978               case 'h':
10979                 if (name[4] == 'o' &&
10980                     name[5] == 's' &&
10981                     name[6] == 't' &&
10982                     name[7] == 'e' &&
10983                     name[8] == 'n' &&
10984                     name[9] == 't')
10985                 {                                 /* endhostent */
10986                   return -KEY_endhostent;
10987                 }
10988
10989                 goto unknown;
10990
10991               case 's':
10992                 if (name[4] == 'e' &&
10993                     name[5] == 'r' &&
10994                     name[6] == 'v' &&
10995                     name[7] == 'e' &&
10996                     name[8] == 'n' &&
10997                     name[9] == 't')
10998                 {                                 /* endservent */
10999                   return -KEY_endservent;
11000                 }
11001
11002                 goto unknown;
11003
11004               default:
11005                 goto unknown;
11006             }
11007           }
11008
11009           goto unknown;
11010
11011         case 'g':
11012           if (name[1] == 'e' &&
11013               name[2] == 't')
11014           {
11015             switch (name[3])
11016             {
11017               case 'h':
11018                 if (name[4] == 'o' &&
11019                     name[5] == 's' &&
11020                     name[6] == 't' &&
11021                     name[7] == 'e' &&
11022                     name[8] == 'n' &&
11023                     name[9] == 't')
11024                 {                                 /* gethostent */
11025                   return -KEY_gethostent;
11026                 }
11027
11028                 goto unknown;
11029
11030               case 's':
11031                 switch (name[4])
11032                 {
11033                   case 'e':
11034                     if (name[5] == 'r' &&
11035                         name[6] == 'v' &&
11036                         name[7] == 'e' &&
11037                         name[8] == 'n' &&
11038                         name[9] == 't')
11039                     {                             /* getservent */
11040                       return -KEY_getservent;
11041                     }
11042
11043                     goto unknown;
11044
11045                   case 'o':
11046                     if (name[5] == 'c' &&
11047                         name[6] == 'k' &&
11048                         name[7] == 'o' &&
11049                         name[8] == 'p' &&
11050                         name[9] == 't')
11051                     {                             /* getsockopt */
11052                       return -KEY_getsockopt;
11053                     }
11054
11055                     goto unknown;
11056
11057                   default:
11058                     goto unknown;
11059                 }
11060
11061               default:
11062                 goto unknown;
11063             }
11064           }
11065
11066           goto unknown;
11067
11068         case 's':
11069           switch (name[1])
11070           {
11071             case 'e':
11072               if (name[2] == 't')
11073               {
11074                 switch (name[3])
11075                 {
11076                   case 'h':
11077                     if (name[4] == 'o' &&
11078                         name[5] == 's' &&
11079                         name[6] == 't' &&
11080                         name[7] == 'e' &&
11081                         name[8] == 'n' &&
11082                         name[9] == 't')
11083                     {                             /* sethostent */
11084                       return -KEY_sethostent;
11085                     }
11086
11087                     goto unknown;
11088
11089                   case 's':
11090                     switch (name[4])
11091                     {
11092                       case 'e':
11093                         if (name[5] == 'r' &&
11094                             name[6] == 'v' &&
11095                             name[7] == 'e' &&
11096                             name[8] == 'n' &&
11097                             name[9] == 't')
11098                         {                         /* setservent */
11099                           return -KEY_setservent;
11100                         }
11101
11102                         goto unknown;
11103
11104                       case 'o':
11105                         if (name[5] == 'c' &&
11106                             name[6] == 'k' &&
11107                             name[7] == 'o' &&
11108                             name[8] == 'p' &&
11109                             name[9] == 't')
11110                         {                         /* setsockopt */
11111                           return -KEY_setsockopt;
11112                         }
11113
11114                         goto unknown;
11115
11116                       default:
11117                         goto unknown;
11118                     }
11119
11120                   default:
11121                     goto unknown;
11122                 }
11123               }
11124
11125               goto unknown;
11126
11127             case 'o':
11128               if (name[2] == 'c' &&
11129                   name[3] == 'k' &&
11130                   name[4] == 'e' &&
11131                   name[5] == 't' &&
11132                   name[6] == 'p' &&
11133                   name[7] == 'a' &&
11134                   name[8] == 'i' &&
11135                   name[9] == 'r')
11136               {                                   /* socketpair */
11137                 return -KEY_socketpair;
11138               }
11139
11140               goto unknown;
11141
11142             default:
11143               goto unknown;
11144           }
11145
11146         default:
11147           goto unknown;
11148       }
11149
11150     case 11: /* 8 tokens of length 11 */
11151       switch (name[0])
11152       {
11153         case '_':
11154           if (name[1] == '_' &&
11155               name[2] == 'P' &&
11156               name[3] == 'A' &&
11157               name[4] == 'C' &&
11158               name[5] == 'K' &&
11159               name[6] == 'A' &&
11160               name[7] == 'G' &&
11161               name[8] == 'E' &&
11162               name[9] == '_' &&
11163               name[10] == '_')
11164           {                                       /* __PACKAGE__ */
11165             return -KEY___PACKAGE__;
11166           }
11167
11168           goto unknown;
11169
11170         case 'e':
11171           if (name[1] == 'n' &&
11172               name[2] == 'd' &&
11173               name[3] == 'p' &&
11174               name[4] == 'r' &&
11175               name[5] == 'o' &&
11176               name[6] == 't' &&
11177               name[7] == 'o' &&
11178               name[8] == 'e' &&
11179               name[9] == 'n' &&
11180               name[10] == 't')
11181           {                                       /* endprotoent */
11182             return -KEY_endprotoent;
11183           }
11184
11185           goto unknown;
11186
11187         case 'g':
11188           if (name[1] == 'e' &&
11189               name[2] == 't')
11190           {
11191             switch (name[3])
11192             {
11193               case 'p':
11194                 switch (name[4])
11195                 {
11196                   case 'e':
11197                     if (name[5] == 'e' &&
11198                         name[6] == 'r' &&
11199                         name[7] == 'n' &&
11200                         name[8] == 'a' &&
11201                         name[9] == 'm' &&
11202                         name[10] == 'e')
11203                     {                             /* getpeername */
11204                       return -KEY_getpeername;
11205                     }
11206
11207                     goto unknown;
11208
11209                   case 'r':
11210                     switch (name[5])
11211                     {
11212                       case 'i':
11213                         if (name[6] == 'o' &&
11214                             name[7] == 'r' &&
11215                             name[8] == 'i' &&
11216                             name[9] == 't' &&
11217                             name[10] == 'y')
11218                         {                         /* getpriority */
11219                           return -KEY_getpriority;
11220                         }
11221
11222                         goto unknown;
11223
11224                       case 'o':
11225                         if (name[6] == 't' &&
11226                             name[7] == 'o' &&
11227                             name[8] == 'e' &&
11228                             name[9] == 'n' &&
11229                             name[10] == 't')
11230                         {                         /* getprotoent */
11231                           return -KEY_getprotoent;
11232                         }
11233
11234                         goto unknown;
11235
11236                       default:
11237                         goto unknown;
11238                     }
11239
11240                   default:
11241                     goto unknown;
11242                 }
11243
11244               case 's':
11245                 if (name[4] == 'o' &&
11246                     name[5] == 'c' &&
11247                     name[6] == 'k' &&
11248                     name[7] == 'n' &&
11249                     name[8] == 'a' &&
11250                     name[9] == 'm' &&
11251                     name[10] == 'e')
11252                 {                                 /* getsockname */
11253                   return -KEY_getsockname;
11254                 }
11255
11256                 goto unknown;
11257
11258               default:
11259                 goto unknown;
11260             }
11261           }
11262
11263           goto unknown;
11264
11265         case 's':
11266           if (name[1] == 'e' &&
11267               name[2] == 't' &&
11268               name[3] == 'p' &&
11269               name[4] == 'r')
11270           {
11271             switch (name[5])
11272             {
11273               case 'i':
11274                 if (name[6] == 'o' &&
11275                     name[7] == 'r' &&
11276                     name[8] == 'i' &&
11277                     name[9] == 't' &&
11278                     name[10] == 'y')
11279                 {                                 /* setpriority */
11280                   return -KEY_setpriority;
11281                 }
11282
11283                 goto unknown;
11284
11285               case 'o':
11286                 if (name[6] == 't' &&
11287                     name[7] == 'o' &&
11288                     name[8] == 'e' &&
11289                     name[9] == 'n' &&
11290                     name[10] == 't')
11291                 {                                 /* setprotoent */
11292                   return -KEY_setprotoent;
11293                 }
11294
11295                 goto unknown;
11296
11297               default:
11298                 goto unknown;
11299             }
11300           }
11301
11302           goto unknown;
11303
11304         default:
11305           goto unknown;
11306       }
11307
11308     case 12: /* 2 tokens of length 12 */
11309       if (name[0] == 'g' &&
11310           name[1] == 'e' &&
11311           name[2] == 't' &&
11312           name[3] == 'n' &&
11313           name[4] == 'e' &&
11314           name[5] == 't' &&
11315           name[6] == 'b' &&
11316           name[7] == 'y')
11317       {
11318         switch (name[8])
11319         {
11320           case 'a':
11321             if (name[9] == 'd' &&
11322                 name[10] == 'd' &&
11323                 name[11] == 'r')
11324             {                                     /* getnetbyaddr */
11325               return -KEY_getnetbyaddr;
11326             }
11327
11328             goto unknown;
11329
11330           case 'n':
11331             if (name[9] == 'a' &&
11332                 name[10] == 'm' &&
11333                 name[11] == 'e')
11334             {                                     /* getnetbyname */
11335               return -KEY_getnetbyname;
11336             }
11337
11338             goto unknown;
11339
11340           default:
11341             goto unknown;
11342         }
11343       }
11344
11345       goto unknown;
11346
11347     case 13: /* 4 tokens of length 13 */
11348       if (name[0] == 'g' &&
11349           name[1] == 'e' &&
11350           name[2] == 't')
11351       {
11352         switch (name[3])
11353         {
11354           case 'h':
11355             if (name[4] == 'o' &&
11356                 name[5] == 's' &&
11357                 name[6] == 't' &&
11358                 name[7] == 'b' &&
11359                 name[8] == 'y')
11360             {
11361               switch (name[9])
11362               {
11363                 case 'a':
11364                   if (name[10] == 'd' &&
11365                       name[11] == 'd' &&
11366                       name[12] == 'r')
11367                   {                               /* gethostbyaddr */
11368                     return -KEY_gethostbyaddr;
11369                   }
11370
11371                   goto unknown;
11372
11373                 case 'n':
11374                   if (name[10] == 'a' &&
11375                       name[11] == 'm' &&
11376                       name[12] == 'e')
11377                   {                               /* gethostbyname */
11378                     return -KEY_gethostbyname;
11379                   }
11380
11381                   goto unknown;
11382
11383                 default:
11384                   goto unknown;
11385               }
11386             }
11387
11388             goto unknown;
11389
11390           case 's':
11391             if (name[4] == 'e' &&
11392                 name[5] == 'r' &&
11393                 name[6] == 'v' &&
11394                 name[7] == 'b' &&
11395                 name[8] == 'y')
11396             {
11397               switch (name[9])
11398               {
11399                 case 'n':
11400                   if (name[10] == 'a' &&
11401                       name[11] == 'm' &&
11402                       name[12] == 'e')
11403                   {                               /* getservbyname */
11404                     return -KEY_getservbyname;
11405                   }
11406
11407                   goto unknown;
11408
11409                 case 'p':
11410                   if (name[10] == 'o' &&
11411                       name[11] == 'r' &&
11412                       name[12] == 't')
11413                   {                               /* getservbyport */
11414                     return -KEY_getservbyport;
11415                   }
11416
11417                   goto unknown;
11418
11419                 default:
11420                   goto unknown;
11421               }
11422             }
11423
11424             goto unknown;
11425
11426           default:
11427             goto unknown;
11428         }
11429       }
11430
11431       goto unknown;
11432
11433     case 14: /* 1 tokens of length 14 */
11434       if (name[0] == 'g' &&
11435           name[1] == 'e' &&
11436           name[2] == 't' &&
11437           name[3] == 'p' &&
11438           name[4] == 'r' &&
11439           name[5] == 'o' &&
11440           name[6] == 't' &&
11441           name[7] == 'o' &&
11442           name[8] == 'b' &&
11443           name[9] == 'y' &&
11444           name[10] == 'n' &&
11445           name[11] == 'a' &&
11446           name[12] == 'm' &&
11447           name[13] == 'e')
11448       {                                           /* getprotobyname */
11449         return -KEY_getprotobyname;
11450       }
11451
11452       goto unknown;
11453
11454     case 16: /* 1 tokens of length 16 */
11455       if (name[0] == 'g' &&
11456           name[1] == 'e' &&
11457           name[2] == 't' &&
11458           name[3] == 'p' &&
11459           name[4] == 'r' &&
11460           name[5] == 'o' &&
11461           name[6] == 't' &&
11462           name[7] == 'o' &&
11463           name[8] == 'b' &&
11464           name[9] == 'y' &&
11465           name[10] == 'n' &&
11466           name[11] == 'u' &&
11467           name[12] == 'm' &&
11468           name[13] == 'b' &&
11469           name[14] == 'e' &&
11470           name[15] == 'r')
11471       {                                           /* getprotobynumber */
11472         return -KEY_getprotobynumber;
11473       }
11474
11475       goto unknown;
11476
11477     default:
11478       goto unknown;
11479   }
11480
11481 unknown:
11482   return 0;
11483 }
11484
11485 STATIC void
11486 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11487 {
11488     dVAR;
11489
11490     PERL_ARGS_ASSERT_CHECKCOMMA;
11491
11492     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11493         if (ckWARN(WARN_SYNTAX)) {
11494             int level = 1;
11495             const char *w;
11496             for (w = s+2; *w && level; w++) {
11497                 if (*w == '(')
11498                     ++level;
11499                 else if (*w == ')')
11500                     --level;
11501             }
11502             while (isSPACE(*w))
11503                 ++w;
11504             /* the list of chars below is for end of statements or
11505              * block / parens, boolean operators (&&, ||, //) and branch
11506              * constructs (or, and, if, until, unless, while, err, for).
11507              * Not a very solid hack... */
11508             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11509                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11510                             "%s (...) interpreted as function",name);
11511         }
11512     }
11513     while (s < PL_bufend && isSPACE(*s))
11514         s++;
11515     if (*s == '(')
11516         s++;
11517     while (s < PL_bufend && isSPACE(*s))
11518         s++;
11519     if (isIDFIRST_lazy_if(s,UTF)) {
11520         const char * const w = s++;
11521         while (isALNUM_lazy_if(s,UTF))
11522             s++;
11523         while (s < PL_bufend && isSPACE(*s))
11524             s++;
11525         if (*s == ',') {
11526             GV* gv;
11527             if (keyword(w, s - w, 0))
11528                 return;
11529
11530             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11531             if (gv && GvCVu(gv))
11532                 return;
11533             Perl_croak(aTHX_ "No comma allowed after %s", what);
11534         }
11535     }
11536 }
11537
11538 /* Either returns sv, or mortalizes sv and returns a new SV*.
11539    Best used as sv=new_constant(..., sv, ...).
11540    If s, pv are NULL, calls subroutine with one argument,
11541    and type is used with error messages only. */
11542
11543 STATIC SV *
11544 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11545                SV *sv, SV *pv, const char *type, STRLEN typelen)
11546 {
11547     dVAR; dSP;
11548     HV * const table = GvHV(PL_hintgv);          /* ^H */
11549     SV *res;
11550     SV **cvp;
11551     SV *cv, *typesv;
11552     const char *why1 = "", *why2 = "", *why3 = "";
11553
11554     PERL_ARGS_ASSERT_NEW_CONSTANT;
11555
11556     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11557         SV *msg;
11558
11559         why2 = (const char *)
11560             (strEQ(key,"charnames")
11561              ? "(possibly a missing \"use charnames ...\")"
11562              : "");
11563         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11564                             (type ? type: "undef"), why2);
11565
11566         /* This is convoluted and evil ("goto considered harmful")
11567          * but I do not understand the intricacies of all the different
11568          * failure modes of %^H in here.  The goal here is to make
11569          * the most probable error message user-friendly. --jhi */
11570
11571         goto msgdone;
11572
11573     report:
11574         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11575                             (type ? type: "undef"), why1, why2, why3);
11576     msgdone:
11577         yyerror(SvPVX_const(msg));
11578         SvREFCNT_dec(msg);
11579         return sv;
11580     }
11581
11582     /* charnames doesn't work well if there have been errors found */
11583     if (PL_error_count > 0 && strEQ(key,"charnames"))
11584         return &PL_sv_undef;
11585
11586     cvp = hv_fetch(table, key, keylen, FALSE);
11587     if (!cvp || !SvOK(*cvp)) {
11588         why1 = "$^H{";
11589         why2 = key;
11590         why3 = "} is not defined";
11591         goto report;
11592     }
11593     sv_2mortal(sv);                     /* Parent created it permanently */
11594     cv = *cvp;
11595     if (!pv && s)
11596         pv = newSVpvn_flags(s, len, SVs_TEMP);
11597     if (type && pv)
11598         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11599     else
11600         typesv = &PL_sv_undef;
11601
11602     PUSHSTACKi(PERLSI_OVERLOAD);
11603     ENTER ;
11604     SAVETMPS;
11605
11606     PUSHMARK(SP) ;
11607     EXTEND(sp, 3);
11608     if (pv)
11609         PUSHs(pv);
11610     PUSHs(sv);
11611     if (pv)
11612         PUSHs(typesv);
11613     PUTBACK;
11614     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11615
11616     SPAGAIN ;
11617
11618     /* Check the eval first */
11619     if (!PL_in_eval && SvTRUE(ERRSV)) {
11620         sv_catpvs(ERRSV, "Propagated");
11621         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11622         (void)POPs;
11623         res = SvREFCNT_inc_simple(sv);
11624     }
11625     else {
11626         res = POPs;
11627         SvREFCNT_inc_simple_void(res);
11628     }
11629
11630     PUTBACK ;
11631     FREETMPS ;
11632     LEAVE ;
11633     POPSTACK;
11634
11635     if (!SvOK(res)) {
11636         why1 = "Call to &{$^H{";
11637         why2 = key;
11638         why3 = "}} did not return a defined value";
11639         sv = res;
11640         goto report;
11641     }
11642
11643     return res;
11644 }
11645
11646 /* Returns a NUL terminated string, with the length of the string written to
11647    *slp
11648    */
11649 STATIC char *
11650 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11651 {
11652     dVAR;
11653     register char *d = dest;
11654     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11655
11656     PERL_ARGS_ASSERT_SCAN_WORD;
11657
11658     for (;;) {
11659         if (d >= e)
11660             Perl_croak(aTHX_ ident_too_long);
11661         if (isALNUM(*s))        /* UTF handled below */
11662             *d++ = *s++;
11663         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11664             *d++ = ':';
11665             *d++ = ':';
11666             s++;
11667         }
11668         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11669             *d++ = *s++;
11670             *d++ = *s++;
11671         }
11672         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11673             char *t = s + UTF8SKIP(s);
11674             size_t len;
11675             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11676                 t += UTF8SKIP(t);
11677             len = t - s;
11678             if (d + len > e)
11679                 Perl_croak(aTHX_ ident_too_long);
11680             Copy(s, d, len, char);
11681             d += len;
11682             s = t;
11683         }
11684         else {
11685             *d = '\0';
11686             *slp = d - dest;
11687             return s;
11688         }
11689     }
11690 }
11691
11692 STATIC char *
11693 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11694 {
11695     dVAR;
11696     char *bracket = NULL;
11697     char funny = *s++;
11698     register char *d = dest;
11699     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
11700
11701     PERL_ARGS_ASSERT_SCAN_IDENT;
11702
11703     if (isSPACE(*s))
11704         s = PEEKSPACE(s);
11705     if (isDIGIT(*s)) {
11706         while (isDIGIT(*s)) {
11707             if (d >= e)
11708                 Perl_croak(aTHX_ ident_too_long);
11709             *d++ = *s++;
11710         }
11711     }
11712     else {
11713         for (;;) {
11714             if (d >= e)
11715                 Perl_croak(aTHX_ ident_too_long);
11716             if (isALNUM(*s))    /* UTF handled below */
11717                 *d++ = *s++;
11718             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11719                 *d++ = ':';
11720                 *d++ = ':';
11721                 s++;
11722             }
11723             else if (*s == ':' && s[1] == ':') {
11724                 *d++ = *s++;
11725                 *d++ = *s++;
11726             }
11727             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11728                 char *t = s + UTF8SKIP(s);
11729                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11730                     t += UTF8SKIP(t);
11731                 if (d + (t - s) > e)
11732                     Perl_croak(aTHX_ ident_too_long);
11733                 Copy(s, d, t - s, char);
11734                 d += t - s;
11735                 s = t;
11736             }
11737             else
11738                 break;
11739         }
11740     }
11741     *d = '\0';
11742     d = dest;
11743     if (*d) {
11744         if (PL_lex_state != LEX_NORMAL)
11745             PL_lex_state = LEX_INTERPENDMAYBE;
11746         return s;
11747     }
11748     if (*s == '$' && s[1] &&
11749         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11750     {
11751         return s;
11752     }
11753     if (*s == '{') {
11754         bracket = s;
11755         s++;
11756     }
11757     else if (ck_uni)
11758         check_uni();
11759     if (s < send)
11760         *d = *s++;
11761     d[1] = '\0';
11762     if (*d == '^' && *s && isCONTROLVAR(*s)) {
11763         *d = toCTRL(*s);
11764         s++;
11765     }
11766     if (bracket) {
11767         if (isSPACE(s[-1])) {
11768             while (s < send) {
11769                 const char ch = *s++;
11770                 if (!SPACE_OR_TAB(ch)) {
11771                     *d = ch;
11772                     break;
11773                 }
11774             }
11775         }
11776         if (isIDFIRST_lazy_if(d,UTF)) {
11777             d++;
11778             if (UTF) {
11779                 char *end = s;
11780                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11781                     end += UTF8SKIP(end);
11782                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11783                         end += UTF8SKIP(end);
11784                 }
11785                 Copy(s, d, end - s, char);
11786                 d += end - s;
11787                 s = end;
11788             }
11789             else {
11790                 while ((isALNUM(*s) || *s == ':') && d < e)
11791                     *d++ = *s++;
11792                 if (d >= e)
11793                     Perl_croak(aTHX_ ident_too_long);
11794             }
11795             *d = '\0';
11796             while (s < send && SPACE_OR_TAB(*s))
11797                 s++;
11798             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11799                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11800                     const char * const brack =
11801                         (const char *)
11802                         ((*s == '[') ? "[...]" : "{...}");
11803                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11804                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11805                         funny, dest, brack, funny, dest, brack);
11806                 }
11807                 bracket++;
11808                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11809                 return s;
11810             }
11811         }
11812         /* Handle extended ${^Foo} variables
11813          * 1999-02-27 mjd-perl-patch@plover.com */
11814         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11815                  && isALNUM(*s))
11816         {
11817             d++;
11818             while (isALNUM(*s) && d < e) {
11819                 *d++ = *s++;
11820             }
11821             if (d >= e)
11822                 Perl_croak(aTHX_ ident_too_long);
11823             *d = '\0';
11824         }
11825         if (*s == '}') {
11826             s++;
11827             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11828                 PL_lex_state = LEX_INTERPEND;
11829                 PL_expect = XREF;
11830             }
11831             if (PL_lex_state == LEX_NORMAL) {
11832                 if (ckWARN(WARN_AMBIGUOUS) &&
11833                     (keyword(dest, d - dest, 0)
11834                      || get_cvn_flags(dest, d - dest, 0)))
11835                 {
11836                     if (funny == '#')
11837                         funny = '@';
11838                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11839                         "Ambiguous use of %c{%s} resolved to %c%s",
11840                         funny, dest, funny, dest);
11841                 }
11842             }
11843         }
11844         else {
11845             s = bracket;                /* let the parser handle it */
11846             *dest = '\0';
11847         }
11848     }
11849     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11850         PL_lex_state = LEX_INTERPEND;
11851     return s;
11852 }
11853
11854 static U32
11855 S_pmflag(U32 pmfl, const char ch) {
11856     switch (ch) {
11857         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11858     case GLOBAL_PAT_MOD:      pmfl |= PMf_GLOBAL; break;
11859     case CONTINUE_PAT_MOD:    pmfl |= PMf_CONTINUE; break;
11860     case ONCE_PAT_MOD:        pmfl |= PMf_KEEP; break;
11861     case KEEPCOPY_PAT_MOD:    pmfl |= PMf_KEEPCOPY; break;
11862     case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
11863     }
11864     return pmfl;
11865 }
11866
11867 STATIC char *
11868 S_scan_pat(pTHX_ char *start, I32 type)
11869 {
11870     dVAR;
11871     PMOP *pm;
11872     char *s = scan_str(start,!!PL_madskills,FALSE);
11873     const char * const valid_flags =
11874         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11875 #ifdef PERL_MAD
11876     char *modstart;
11877 #endif
11878
11879     PERL_ARGS_ASSERT_SCAN_PAT;
11880
11881     if (!s) {
11882         const char * const delimiter = skipspace(start);
11883         Perl_croak(aTHX_
11884                    (const char *)
11885                    (*delimiter == '?'
11886                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11887                     : "Search pattern not terminated" ));
11888     }
11889
11890     pm = (PMOP*)newPMOP(type, 0);
11891     if (PL_multi_open == '?') {
11892         /* This is the only point in the code that sets PMf_ONCE:  */
11893         pm->op_pmflags |= PMf_ONCE;
11894
11895         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11896            allows us to restrict the list needed by reset to just the ??
11897            matches.  */
11898         assert(type != OP_TRANS);
11899         if (PL_curstash) {
11900             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11901             U32 elements;
11902             if (!mg) {
11903                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11904                                  0);
11905             }
11906             elements = mg->mg_len / sizeof(PMOP**);
11907             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11908             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11909             mg->mg_len = elements * sizeof(PMOP**);
11910             PmopSTASH_set(pm,PL_curstash);
11911         }
11912     }
11913 #ifdef PERL_MAD
11914     modstart = s;
11915 #endif
11916     while (*s && strchr(valid_flags, *s))
11917         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11918
11919     if (isALNUM(*s)) {
11920         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11921             "Having no space between pattern and following word is deprecated");
11922
11923     }
11924 #ifdef PERL_MAD
11925     if (PL_madskills && modstart != s) {
11926         SV* tmptoken = newSVpvn(modstart, s - modstart);
11927         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11928     }
11929 #endif
11930     /* issue a warning if /c is specified,but /g is not */
11931     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11932     {
11933         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11934                        "Use of /c modifier is meaningless without /g" );
11935     }
11936
11937     PL_lex_op = (OP*)pm;
11938     pl_yylval.ival = OP_MATCH;
11939     return s;
11940 }
11941
11942 STATIC char *
11943 S_scan_subst(pTHX_ char *start)
11944 {
11945     dVAR;
11946     register char *s;
11947     register PMOP *pm;
11948     I32 first_start;
11949     I32 es = 0;
11950 #ifdef PERL_MAD
11951     char *modstart;
11952 #endif
11953
11954     PERL_ARGS_ASSERT_SCAN_SUBST;
11955
11956     pl_yylval.ival = OP_NULL;
11957
11958     s = scan_str(start,!!PL_madskills,FALSE);
11959
11960     if (!s)
11961         Perl_croak(aTHX_ "Substitution pattern not terminated");
11962
11963     if (s[-1] == PL_multi_open)
11964         s--;
11965 #ifdef PERL_MAD
11966     if (PL_madskills) {
11967         CURMAD('q', PL_thisopen);
11968         CURMAD('_', PL_thiswhite);
11969         CURMAD('E', PL_thisstuff);
11970         CURMAD('Q', PL_thisclose);
11971         PL_realtokenstart = s - SvPVX(PL_linestr);
11972     }
11973 #endif
11974
11975     first_start = PL_multi_start;
11976     s = scan_str(s,!!PL_madskills,FALSE);
11977     if (!s) {
11978         if (PL_lex_stuff) {
11979             SvREFCNT_dec(PL_lex_stuff);
11980             PL_lex_stuff = NULL;
11981         }
11982         Perl_croak(aTHX_ "Substitution replacement not terminated");
11983     }
11984     PL_multi_start = first_start;       /* so whole substitution is taken together */
11985
11986     pm = (PMOP*)newPMOP(OP_SUBST, 0);
11987
11988 #ifdef PERL_MAD
11989     if (PL_madskills) {
11990         CURMAD('z', PL_thisopen);
11991         CURMAD('R', PL_thisstuff);
11992         CURMAD('Z', PL_thisclose);
11993     }
11994     modstart = s;
11995 #endif
11996
11997     while (*s) {
11998         if (*s == EXEC_PAT_MOD) {
11999             s++;
12000             es++;
12001         }
12002         else if (strchr(S_PAT_MODS, *s))
12003             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
12004         else {
12005             if (isALNUM(*s)) {
12006                 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12007                     "Having no space between pattern and following word is deprecated");
12008
12009             }
12010             break;
12011         }
12012     }
12013
12014 #ifdef PERL_MAD
12015     if (PL_madskills) {
12016         if (modstart != s)
12017             curmad('m', newSVpvn(modstart, s - modstart));
12018         append_madprops(PL_thismad, (OP*)pm, 0);
12019         PL_thismad = 0;
12020     }
12021 #endif
12022     if ((pm->op_pmflags & PMf_CONTINUE)) {
12023         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
12024     }
12025
12026     if (es) {
12027         SV * const repl = newSVpvs("");
12028
12029         PL_sublex_info.super_bufptr = s;
12030         PL_sublex_info.super_bufend = PL_bufend;
12031         PL_multi_end = 0;
12032         pm->op_pmflags |= PMf_EVAL;
12033         while (es-- > 0) {
12034             if (es)
12035                 sv_catpvs(repl, "eval ");
12036             else
12037                 sv_catpvs(repl, "do ");
12038         }
12039         sv_catpvs(repl, "{");
12040         sv_catsv(repl, PL_lex_repl);
12041         if (strchr(SvPVX(PL_lex_repl), '#'))
12042             sv_catpvs(repl, "\n");
12043         sv_catpvs(repl, "}");
12044         SvEVALED_on(repl);
12045         SvREFCNT_dec(PL_lex_repl);
12046         PL_lex_repl = repl;
12047     }
12048
12049     PL_lex_op = (OP*)pm;
12050     pl_yylval.ival = OP_SUBST;
12051     return s;
12052 }
12053
12054 STATIC char *
12055 S_scan_trans(pTHX_ char *start)
12056 {
12057     dVAR;
12058     register char* s;
12059     OP *o;
12060     short *tbl;
12061     U8 squash;
12062     U8 del;
12063     U8 complement;
12064 #ifdef PERL_MAD
12065     char *modstart;
12066 #endif
12067
12068     PERL_ARGS_ASSERT_SCAN_TRANS;
12069
12070     pl_yylval.ival = OP_NULL;
12071
12072     s = scan_str(start,!!PL_madskills,FALSE);
12073     if (!s)
12074         Perl_croak(aTHX_ "Transliteration pattern not terminated");
12075
12076     if (s[-1] == PL_multi_open)
12077         s--;
12078 #ifdef PERL_MAD
12079     if (PL_madskills) {
12080         CURMAD('q', PL_thisopen);
12081         CURMAD('_', PL_thiswhite);
12082         CURMAD('E', PL_thisstuff);
12083         CURMAD('Q', PL_thisclose);
12084         PL_realtokenstart = s - SvPVX(PL_linestr);
12085     }
12086 #endif
12087
12088     s = scan_str(s,!!PL_madskills,FALSE);
12089     if (!s) {
12090         if (PL_lex_stuff) {
12091             SvREFCNT_dec(PL_lex_stuff);
12092             PL_lex_stuff = NULL;
12093         }
12094         Perl_croak(aTHX_ "Transliteration replacement not terminated");
12095     }
12096     if (PL_madskills) {
12097         CURMAD('z', PL_thisopen);
12098         CURMAD('R', PL_thisstuff);
12099         CURMAD('Z', PL_thisclose);
12100     }
12101
12102     complement = del = squash = 0;
12103 #ifdef PERL_MAD
12104     modstart = s;
12105 #endif
12106     while (1) {
12107         switch (*s) {
12108         case 'c':
12109             complement = OPpTRANS_COMPLEMENT;
12110             break;
12111         case 'd':
12112             del = OPpTRANS_DELETE;
12113             break;
12114         case 's':
12115             squash = OPpTRANS_SQUASH;
12116             break;
12117         default:
12118             goto no_more;
12119         }
12120         s++;
12121     }
12122   no_more:
12123
12124     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
12125     o = newPVOP(OP_TRANS, 0, (char*)tbl);
12126     o->op_private &= ~OPpTRANS_ALL;
12127     o->op_private |= del|squash|complement|
12128       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12129       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
12130
12131     PL_lex_op = o;
12132     pl_yylval.ival = OP_TRANS;
12133
12134 #ifdef PERL_MAD
12135     if (PL_madskills) {
12136         if (modstart != s)
12137             curmad('m', newSVpvn(modstart, s - modstart));
12138         append_madprops(PL_thismad, o, 0);
12139         PL_thismad = 0;
12140     }
12141 #endif
12142
12143     return s;
12144 }
12145
12146 STATIC char *
12147 S_scan_heredoc(pTHX_ register char *s)
12148 {
12149     dVAR;
12150     SV *herewas;
12151     I32 op_type = OP_SCALAR;
12152     I32 len;
12153     SV *tmpstr;
12154     char term;
12155     const char *found_newline;
12156     register char *d;
12157     register char *e;
12158     char *peek;
12159     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
12160 #ifdef PERL_MAD
12161     I32 stuffstart = s - SvPVX(PL_linestr);
12162     char *tstart;
12163
12164     PL_realtokenstart = -1;
12165 #endif
12166
12167     PERL_ARGS_ASSERT_SCAN_HEREDOC;
12168
12169     s += 2;
12170     d = PL_tokenbuf;
12171     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
12172     if (!outer)
12173         *d++ = '\n';
12174     peek = s;
12175     while (SPACE_OR_TAB(*peek))
12176         peek++;
12177     if (*peek == '`' || *peek == '\'' || *peek =='"') {
12178         s = peek;
12179         term = *s++;
12180         s = delimcpy(d, e, s, PL_bufend, term, &len);
12181         d += len;
12182         if (s < PL_bufend)
12183             s++;
12184     }
12185     else {
12186         if (*s == '\\')
12187             s++, term = '\'';
12188         else
12189             term = '"';
12190         if (!isALNUM_lazy_if(s,UTF))
12191             deprecate("bare << to mean <<\"\"");
12192         for (; isALNUM_lazy_if(s,UTF); s++) {
12193             if (d < e)
12194                 *d++ = *s;
12195         }
12196     }
12197     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
12198         Perl_croak(aTHX_ "Delimiter for here document is too long");
12199     *d++ = '\n';
12200     *d = '\0';
12201     len = d - PL_tokenbuf;
12202
12203 #ifdef PERL_MAD
12204     if (PL_madskills) {
12205         tstart = PL_tokenbuf + !outer;
12206         PL_thisclose = newSVpvn(tstart, len - !outer);
12207         tstart = SvPVX(PL_linestr) + stuffstart;
12208         PL_thisopen = newSVpvn(tstart, s - tstart);
12209         stuffstart = s - SvPVX(PL_linestr);
12210     }
12211 #endif
12212 #ifndef PERL_STRICT_CR
12213     d = strchr(s, '\r');
12214     if (d) {
12215         char * const olds = s;
12216         s = d;
12217         while (s < PL_bufend) {
12218             if (*s == '\r') {
12219                 *d++ = '\n';
12220                 if (*++s == '\n')
12221                     s++;
12222             }
12223             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
12224                 *d++ = *s++;
12225                 s++;
12226             }
12227             else
12228                 *d++ = *s++;
12229         }
12230         *d = '\0';
12231         PL_bufend = d;
12232         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12233         s = olds;
12234     }
12235 #endif
12236 #ifdef PERL_MAD
12237     found_newline = 0;
12238 #endif
12239     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
12240         herewas = newSVpvn(s,PL_bufend-s);
12241     }
12242     else {
12243 #ifdef PERL_MAD
12244         herewas = newSVpvn(s-1,found_newline-s+1);
12245 #else
12246         s--;
12247         herewas = newSVpvn(s,found_newline-s);
12248 #endif
12249     }
12250 #ifdef PERL_MAD
12251     if (PL_madskills) {
12252         tstart = SvPVX(PL_linestr) + stuffstart;
12253         if (PL_thisstuff)
12254             sv_catpvn(PL_thisstuff, tstart, s - tstart);
12255         else
12256             PL_thisstuff = newSVpvn(tstart, s - tstart);
12257     }
12258 #endif
12259     s += SvCUR(herewas);
12260
12261 #ifdef PERL_MAD
12262     stuffstart = s - SvPVX(PL_linestr);
12263
12264     if (found_newline)
12265         s--;
12266 #endif
12267
12268     tmpstr = newSV_type(SVt_PVIV);
12269     SvGROW(tmpstr, 80);
12270     if (term == '\'') {
12271         op_type = OP_CONST;
12272         SvIV_set(tmpstr, -1);
12273     }
12274     else if (term == '`') {
12275         op_type = OP_BACKTICK;
12276         SvIV_set(tmpstr, '\\');
12277     }
12278
12279     CLINE;
12280     PL_multi_start = CopLINE(PL_curcop);
12281     PL_multi_open = PL_multi_close = '<';
12282     term = *PL_tokenbuf;
12283     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
12284         char * const bufptr = PL_sublex_info.super_bufptr;
12285         char * const bufend = PL_sublex_info.super_bufend;
12286         char * const olds = s - SvCUR(herewas);
12287         s = strchr(bufptr, '\n');
12288         if (!s)
12289             s = bufend;
12290         d = s;
12291         while (s < bufend &&
12292           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12293             if (*s++ == '\n')
12294                 CopLINE_inc(PL_curcop);
12295         }
12296         if (s >= bufend) {
12297             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12298             missingterm(PL_tokenbuf);
12299         }
12300         sv_setpvn(herewas,bufptr,d-bufptr+1);
12301         sv_setpvn(tmpstr,d+1,s-d);
12302         s += len - 1;
12303         sv_catpvn(herewas,s,bufend-s);
12304         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
12305
12306         s = olds;
12307         goto retval;
12308     }
12309     else if (!outer) {
12310         d = s;
12311         while (s < PL_bufend &&
12312           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12313             if (*s++ == '\n')
12314                 CopLINE_inc(PL_curcop);
12315         }
12316         if (s >= PL_bufend) {
12317             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12318             missingterm(PL_tokenbuf);
12319         }
12320         sv_setpvn(tmpstr,d+1,s-d);
12321 #ifdef PERL_MAD
12322         if (PL_madskills) {
12323             if (PL_thisstuff)
12324                 sv_catpvn(PL_thisstuff, d + 1, s - d);
12325             else
12326                 PL_thisstuff = newSVpvn(d + 1, s - d);
12327             stuffstart = s - SvPVX(PL_linestr);
12328         }
12329 #endif
12330         s += len - 1;
12331         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12332
12333         sv_catpvn(herewas,s,PL_bufend-s);
12334         sv_setsv(PL_linestr,herewas);
12335         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12336         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12337         PL_last_lop = PL_last_uni = NULL;
12338     }
12339     else
12340         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12341     while (s >= PL_bufend) {    /* multiple line string? */
12342 #ifdef PERL_MAD
12343         if (PL_madskills) {
12344             tstart = SvPVX(PL_linestr) + stuffstart;
12345             if (PL_thisstuff)
12346                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12347             else
12348                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12349         }
12350 #endif
12351         PL_bufptr = s;
12352         CopLINE_inc(PL_curcop);
12353         if (!outer || !lex_next_chunk(0)) {
12354             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12355             missingterm(PL_tokenbuf);
12356         }
12357         CopLINE_dec(PL_curcop);
12358         s = PL_bufptr;
12359 #ifdef PERL_MAD
12360         stuffstart = s - SvPVX(PL_linestr);
12361 #endif
12362         CopLINE_inc(PL_curcop);
12363         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12364         PL_last_lop = PL_last_uni = NULL;
12365 #ifndef PERL_STRICT_CR
12366         if (PL_bufend - PL_linestart >= 2) {
12367             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12368                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12369             {
12370                 PL_bufend[-2] = '\n';
12371                 PL_bufend--;
12372                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12373             }
12374             else if (PL_bufend[-1] == '\r')
12375                 PL_bufend[-1] = '\n';
12376         }
12377         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12378             PL_bufend[-1] = '\n';
12379 #endif
12380         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12381             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12382             *(SvPVX(PL_linestr) + off ) = ' ';
12383             sv_catsv(PL_linestr,herewas);
12384             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12385             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12386         }
12387         else {
12388             s = PL_bufend;
12389             sv_catsv(tmpstr,PL_linestr);
12390         }
12391     }
12392     s++;
12393 retval:
12394     PL_multi_end = CopLINE(PL_curcop);
12395     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12396         SvPV_shrink_to_cur(tmpstr);
12397     }
12398     SvREFCNT_dec(herewas);
12399     if (!IN_BYTES) {
12400         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12401             SvUTF8_on(tmpstr);
12402         else if (PL_encoding)
12403             sv_recode_to_utf8(tmpstr, PL_encoding);
12404     }
12405     PL_lex_stuff = tmpstr;
12406     pl_yylval.ival = op_type;
12407     return s;
12408 }
12409
12410 /* scan_inputsymbol
12411    takes: current position in input buffer
12412    returns: new position in input buffer
12413    side-effects: pl_yylval and lex_op are set.
12414
12415    This code handles:
12416
12417    <>           read from ARGV
12418    <FH>         read from filehandle
12419    <pkg::FH>    read from package qualified filehandle
12420    <pkg'FH>     read from package qualified filehandle
12421    <$fh>        read from filehandle in $fh
12422    <*.h>        filename glob
12423
12424 */
12425
12426 STATIC char *
12427 S_scan_inputsymbol(pTHX_ char *start)
12428 {
12429     dVAR;
12430     register char *s = start;           /* current position in buffer */
12431     char *end;
12432     I32 len;
12433     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12434     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12435
12436     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12437
12438     end = strchr(s, '\n');
12439     if (!end)
12440         end = PL_bufend;
12441     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12442
12443     /* die if we didn't have space for the contents of the <>,
12444        or if it didn't end, or if we see a newline
12445     */
12446
12447     if (len >= (I32)sizeof PL_tokenbuf)
12448         Perl_croak(aTHX_ "Excessively long <> operator");
12449     if (s >= end)
12450         Perl_croak(aTHX_ "Unterminated <> operator");
12451
12452     s++;
12453
12454     /* check for <$fh>
12455        Remember, only scalar variables are interpreted as filehandles by
12456        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12457        treated as a glob() call.
12458        This code makes use of the fact that except for the $ at the front,
12459        a scalar variable and a filehandle look the same.
12460     */
12461     if (*d == '$' && d[1]) d++;
12462
12463     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12464     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12465         d++;
12466
12467     /* If we've tried to read what we allow filehandles to look like, and
12468        there's still text left, then it must be a glob() and not a getline.
12469        Use scan_str to pull out the stuff between the <> and treat it
12470        as nothing more than a string.
12471     */
12472
12473     if (d - PL_tokenbuf != len) {
12474         pl_yylval.ival = OP_GLOB;
12475         s = scan_str(start,!!PL_madskills,FALSE);
12476         if (!s)
12477            Perl_croak(aTHX_ "Glob not terminated");
12478         return s;
12479     }
12480     else {
12481         bool readline_overriden = FALSE;
12482         GV *gv_readline;
12483         GV **gvp;
12484         /* we're in a filehandle read situation */
12485         d = PL_tokenbuf;
12486
12487         /* turn <> into <ARGV> */
12488         if (!len)
12489             Copy("ARGV",d,5,char);
12490
12491         /* Check whether readline() is overriden */
12492         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12493         if ((gv_readline
12494                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12495                 ||
12496                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12497                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12498                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12499             readline_overriden = TRUE;
12500
12501         /* if <$fh>, create the ops to turn the variable into a
12502            filehandle
12503         */
12504         if (*d == '$') {
12505             /* try to find it in the pad for this block, otherwise find
12506                add symbol table ops
12507             */
12508             const PADOFFSET tmp = pad_findmy(d, len, 0);
12509             if (tmp != NOT_IN_PAD) {
12510                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12511                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12512                     HEK * const stashname = HvNAME_HEK(stash);
12513                     SV * const sym = sv_2mortal(newSVhek(stashname));
12514                     sv_catpvs(sym, "::");
12515                     sv_catpv(sym, d+1);
12516                     d = SvPVX(sym);
12517                     goto intro_sym;
12518                 }
12519                 else {
12520                     OP * const o = newOP(OP_PADSV, 0);
12521                     o->op_targ = tmp;
12522                     PL_lex_op = readline_overriden
12523                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12524                                 append_elem(OP_LIST, o,
12525                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12526                         : (OP*)newUNOP(OP_READLINE, 0, o);
12527                 }
12528             }
12529             else {
12530                 GV *gv;
12531                 ++d;
12532 intro_sym:
12533                 gv = gv_fetchpv(d,
12534                                 (PL_in_eval
12535                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12536                                  : GV_ADDMULTI),
12537                                 SVt_PV);
12538                 PL_lex_op = readline_overriden
12539                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12540                             append_elem(OP_LIST,
12541                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12542                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12543                     : (OP*)newUNOP(OP_READLINE, 0,
12544                             newUNOP(OP_RV2SV, 0,
12545                                 newGVOP(OP_GV, 0, gv)));
12546             }
12547             if (!readline_overriden)
12548                 PL_lex_op->op_flags |= OPf_SPECIAL;
12549             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12550             pl_yylval.ival = OP_NULL;
12551         }
12552
12553         /* If it's none of the above, it must be a literal filehandle
12554            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12555         else {
12556             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12557             PL_lex_op = readline_overriden
12558                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12559                         append_elem(OP_LIST,
12560                             newGVOP(OP_GV, 0, gv),
12561                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12562                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12563             pl_yylval.ival = OP_NULL;
12564         }
12565     }
12566
12567     return s;
12568 }
12569
12570
12571 /* scan_str
12572    takes: start position in buffer
12573           keep_quoted preserve \ on the embedded delimiter(s)
12574           keep_delims preserve the delimiters around the string
12575    returns: position to continue reading from buffer
12576    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12577         updates the read buffer.
12578
12579    This subroutine pulls a string out of the input.  It is called for:
12580         q               single quotes           q(literal text)
12581         '               single quotes           'literal text'
12582         qq              double quotes           qq(interpolate $here please)
12583         "               double quotes           "interpolate $here please"
12584         qx              backticks               qx(/bin/ls -l)
12585         `               backticks               `/bin/ls -l`
12586         qw              quote words             @EXPORT_OK = qw( func() $spam )
12587         m//             regexp match            m/this/
12588         s///            regexp substitute       s/this/that/
12589         tr///           string transliterate    tr/this/that/
12590         y///            string transliterate    y/this/that/
12591         ($*@)           sub prototypes          sub foo ($)
12592         (stuff)         sub attr parameters     sub foo : attr(stuff)
12593         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12594
12595    In most of these cases (all but <>, patterns and transliterate)
12596    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12597    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12598    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12599    calls scan_str().
12600
12601    It skips whitespace before the string starts, and treats the first
12602    character as the delimiter.  If the delimiter is one of ([{< then
12603    the corresponding "close" character )]}> is used as the closing
12604    delimiter.  It allows quoting of delimiters, and if the string has
12605    balanced delimiters ([{<>}]) it allows nesting.
12606
12607    On success, the SV with the resulting string is put into lex_stuff or,
12608    if that is already non-NULL, into lex_repl. The second case occurs only
12609    when parsing the RHS of the special constructs s/// and tr/// (y///).
12610    For convenience, the terminating delimiter character is stuffed into
12611    SvIVX of the SV.
12612 */
12613
12614 STATIC char *
12615 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12616 {
12617     dVAR;
12618     SV *sv;                             /* scalar value: string */
12619     const char *tmps;                   /* temp string, used for delimiter matching */
12620     register char *s = start;           /* current position in the buffer */
12621     register char term;                 /* terminating character */
12622     register char *to;                  /* current position in the sv's data */
12623     I32 brackets = 1;                   /* bracket nesting level */
12624     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12625     I32 termcode;                       /* terminating char. code */
12626     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12627     STRLEN termlen;                     /* length of terminating string */
12628     int last_off = 0;                   /* last position for nesting bracket */
12629 #ifdef PERL_MAD
12630     int stuffstart;
12631     char *tstart;
12632 #endif
12633
12634     PERL_ARGS_ASSERT_SCAN_STR;
12635
12636     /* skip space before the delimiter */
12637     if (isSPACE(*s)) {
12638         s = PEEKSPACE(s);
12639     }
12640
12641 #ifdef PERL_MAD
12642     if (PL_realtokenstart >= 0) {
12643         stuffstart = PL_realtokenstart;
12644         PL_realtokenstart = -1;
12645     }
12646     else
12647         stuffstart = start - SvPVX(PL_linestr);
12648 #endif
12649     /* mark where we are, in case we need to report errors */
12650     CLINE;
12651
12652     /* after skipping whitespace, the next character is the terminator */
12653     term = *s;
12654     if (!UTF) {
12655         termcode = termstr[0] = term;
12656         termlen = 1;
12657     }
12658     else {
12659         termcode = utf8_to_uvchr((U8*)s, &termlen);
12660         Copy(s, termstr, termlen, U8);
12661         if (!UTF8_IS_INVARIANT(term))
12662             has_utf8 = TRUE;
12663     }
12664
12665     /* mark where we are */
12666     PL_multi_start = CopLINE(PL_curcop);
12667     PL_multi_open = term;
12668
12669     /* find corresponding closing delimiter */
12670     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12671         termcode = termstr[0] = term = tmps[5];
12672
12673     PL_multi_close = term;
12674
12675     /* create a new SV to hold the contents.  79 is the SV's initial length.
12676        What a random number. */
12677     sv = newSV_type(SVt_PVIV);
12678     SvGROW(sv, 80);
12679     SvIV_set(sv, termcode);
12680     (void)SvPOK_only(sv);               /* validate pointer */
12681
12682     /* move past delimiter and try to read a complete string */
12683     if (keep_delims)
12684         sv_catpvn(sv, s, termlen);
12685     s += termlen;
12686 #ifdef PERL_MAD
12687     tstart = SvPVX(PL_linestr) + stuffstart;
12688     if (!PL_thisopen && !keep_delims) {
12689         PL_thisopen = newSVpvn(tstart, s - tstart);
12690         stuffstart = s - SvPVX(PL_linestr);
12691     }
12692 #endif
12693     for (;;) {
12694         if (PL_encoding && !UTF) {
12695             bool cont = TRUE;
12696
12697             while (cont) {
12698                 int offset = s - SvPVX_const(PL_linestr);
12699                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12700                                            &offset, (char*)termstr, termlen);
12701                 const char * const ns = SvPVX_const(PL_linestr) + offset;
12702                 char * const svlast = SvEND(sv) - 1;
12703
12704                 for (; s < ns; s++) {
12705                     if (*s == '\n' && !PL_rsfp)
12706                         CopLINE_inc(PL_curcop);
12707                 }
12708                 if (!found)
12709                     goto read_more_line;
12710                 else {
12711                     /* handle quoted delimiters */
12712                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12713                         const char *t;
12714                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12715                             t--;
12716                         if ((svlast-1 - t) % 2) {
12717                             if (!keep_quoted) {
12718                                 *(svlast-1) = term;
12719                                 *svlast = '\0';
12720                                 SvCUR_set(sv, SvCUR(sv) - 1);
12721                             }
12722                             continue;
12723                         }
12724                     }
12725                     if (PL_multi_open == PL_multi_close) {
12726                         cont = FALSE;
12727                     }
12728                     else {
12729                         const char *t;
12730                         char *w;
12731                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12732                             /* At here, all closes are "was quoted" one,
12733                                so we don't check PL_multi_close. */
12734                             if (*t == '\\') {
12735                                 if (!keep_quoted && *(t+1) == PL_multi_open)
12736                                     t++;
12737                                 else
12738                                     *w++ = *t++;
12739                             }
12740                             else if (*t == PL_multi_open)
12741                                 brackets++;
12742
12743                             *w = *t;
12744                         }
12745                         if (w < t) {
12746                             *w++ = term;
12747                             *w = '\0';
12748                             SvCUR_set(sv, w - SvPVX_const(sv));
12749                         }
12750                         last_off = w - SvPVX(sv);
12751                         if (--brackets <= 0)
12752                             cont = FALSE;
12753                     }
12754                 }
12755             }
12756             if (!keep_delims) {
12757                 SvCUR_set(sv, SvCUR(sv) - 1);
12758                 *SvEND(sv) = '\0';
12759             }
12760             break;
12761         }
12762
12763         /* extend sv if need be */
12764         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12765         /* set 'to' to the next character in the sv's string */
12766         to = SvPVX(sv)+SvCUR(sv);
12767
12768         /* if open delimiter is the close delimiter read unbridle */
12769         if (PL_multi_open == PL_multi_close) {
12770             for (; s < PL_bufend; s++,to++) {
12771                 /* embedded newlines increment the current line number */
12772                 if (*s == '\n' && !PL_rsfp)
12773                     CopLINE_inc(PL_curcop);
12774                 /* handle quoted delimiters */
12775                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12776                     if (!keep_quoted && s[1] == term)
12777                         s++;
12778                 /* any other quotes are simply copied straight through */
12779                     else
12780                         *to++ = *s++;
12781                 }
12782                 /* terminate when run out of buffer (the for() condition), or
12783                    have found the terminator */
12784                 else if (*s == term) {
12785                     if (termlen == 1)
12786                         break;
12787                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12788                         break;
12789                 }
12790                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12791                     has_utf8 = TRUE;
12792                 *to = *s;
12793             }
12794         }
12795
12796         /* if the terminator isn't the same as the start character (e.g.,
12797            matched brackets), we have to allow more in the quoting, and
12798            be prepared for nested brackets.
12799         */
12800         else {
12801             /* read until we run out of string, or we find the terminator */
12802             for (; s < PL_bufend; s++,to++) {
12803                 /* embedded newlines increment the line count */
12804                 if (*s == '\n' && !PL_rsfp)
12805                     CopLINE_inc(PL_curcop);
12806                 /* backslashes can escape the open or closing characters */
12807                 if (*s == '\\' && s+1 < PL_bufend) {
12808                     if (!keep_quoted &&
12809                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12810                         s++;
12811                     else
12812                         *to++ = *s++;
12813                 }
12814                 /* allow nested opens and closes */
12815                 else if (*s == PL_multi_close && --brackets <= 0)
12816                     break;
12817                 else if (*s == PL_multi_open)
12818                     brackets++;
12819                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12820                     has_utf8 = TRUE;
12821                 *to = *s;
12822             }
12823         }
12824         /* terminate the copied string and update the sv's end-of-string */
12825         *to = '\0';
12826         SvCUR_set(sv, to - SvPVX_const(sv));
12827
12828         /*
12829          * this next chunk reads more into the buffer if we're not done yet
12830          */
12831
12832         if (s < PL_bufend)
12833             break;              /* handle case where we are done yet :-) */
12834
12835 #ifndef PERL_STRICT_CR
12836         if (to - SvPVX_const(sv) >= 2) {
12837             if ((to[-2] == '\r' && to[-1] == '\n') ||
12838                 (to[-2] == '\n' && to[-1] == '\r'))
12839             {
12840                 to[-2] = '\n';
12841                 to--;
12842                 SvCUR_set(sv, to - SvPVX_const(sv));
12843             }
12844             else if (to[-1] == '\r')
12845                 to[-1] = '\n';
12846         }
12847         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12848             to[-1] = '\n';
12849 #endif
12850
12851      read_more_line:
12852         /* if we're out of file, or a read fails, bail and reset the current
12853            line marker so we can report where the unterminated string began
12854         */
12855 #ifdef PERL_MAD
12856         if (PL_madskills) {
12857             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12858             if (PL_thisstuff)
12859                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12860             else
12861                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12862         }
12863 #endif
12864         CopLINE_inc(PL_curcop);
12865         PL_bufptr = PL_bufend;
12866         if (!lex_next_chunk(0)) {
12867             sv_free(sv);
12868             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12869             return NULL;
12870         }
12871         s = PL_bufptr;
12872 #ifdef PERL_MAD
12873         stuffstart = 0;
12874 #endif
12875     }
12876
12877     /* at this point, we have successfully read the delimited string */
12878
12879     if (!PL_encoding || UTF) {
12880 #ifdef PERL_MAD
12881         if (PL_madskills) {
12882             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12883             const int len = s - tstart;
12884             if (PL_thisstuff)
12885                 sv_catpvn(PL_thisstuff, tstart, len);
12886             else
12887                 PL_thisstuff = newSVpvn(tstart, len);
12888             if (!PL_thisclose && !keep_delims)
12889                 PL_thisclose = newSVpvn(s,termlen);
12890         }
12891 #endif
12892
12893         if (keep_delims)
12894             sv_catpvn(sv, s, termlen);
12895         s += termlen;
12896     }
12897 #ifdef PERL_MAD
12898     else {
12899         if (PL_madskills) {
12900             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12901             const int len = s - tstart - termlen;
12902             if (PL_thisstuff)
12903                 sv_catpvn(PL_thisstuff, tstart, len);
12904             else
12905                 PL_thisstuff = newSVpvn(tstart, len);
12906             if (!PL_thisclose && !keep_delims)
12907                 PL_thisclose = newSVpvn(s - termlen,termlen);
12908         }
12909     }
12910 #endif
12911     if (has_utf8 || PL_encoding)
12912         SvUTF8_on(sv);
12913
12914     PL_multi_end = CopLINE(PL_curcop);
12915
12916     /* if we allocated too much space, give some back */
12917     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12918         SvLEN_set(sv, SvCUR(sv) + 1);
12919         SvPV_renew(sv, SvLEN(sv));
12920     }
12921
12922     /* decide whether this is the first or second quoted string we've read
12923        for this op
12924     */
12925
12926     if (PL_lex_stuff)
12927         PL_lex_repl = sv;
12928     else
12929         PL_lex_stuff = sv;
12930     return s;
12931 }
12932
12933 /*
12934   scan_num
12935   takes: pointer to position in buffer
12936   returns: pointer to new position in buffer
12937   side-effects: builds ops for the constant in pl_yylval.op
12938
12939   Read a number in any of the formats that Perl accepts:
12940
12941   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12942   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12943   0b[01](_?[01])*
12944   0[0-7](_?[0-7])*
12945   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12946
12947   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12948   thing it reads.
12949
12950   If it reads a number without a decimal point or an exponent, it will
12951   try converting the number to an integer and see if it can do so
12952   without loss of precision.
12953 */
12954
12955 char *
12956 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12957 {
12958     dVAR;
12959     register const char *s = start;     /* current position in buffer */
12960     register char *d;                   /* destination in temp buffer */
12961     register char *e;                   /* end of temp buffer */
12962     NV nv;                              /* number read, as a double */
12963     SV *sv = NULL;                      /* place to put the converted number */
12964     bool floatit;                       /* boolean: int or float? */
12965     const char *lastub = NULL;          /* position of last underbar */
12966     static char const number_too_long[] = "Number too long";
12967
12968     PERL_ARGS_ASSERT_SCAN_NUM;
12969
12970     /* We use the first character to decide what type of number this is */
12971
12972     switch (*s) {
12973     default:
12974       Perl_croak(aTHX_ "panic: scan_num");
12975
12976     /* if it starts with a 0, it could be an octal number, a decimal in
12977        0.13 disguise, or a hexadecimal number, or a binary number. */
12978     case '0':
12979         {
12980           /* variables:
12981              u          holds the "number so far"
12982              shift      the power of 2 of the base
12983                         (hex == 4, octal == 3, binary == 1)
12984              overflowed was the number more than we can hold?
12985
12986              Shift is used when we add a digit.  It also serves as an "are
12987              we in octal/hex/binary?" indicator to disallow hex characters
12988              when in octal mode.
12989            */
12990             NV n = 0.0;
12991             UV u = 0;
12992             I32 shift;
12993             bool overflowed = FALSE;
12994             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
12995             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12996             static const char* const bases[5] =
12997               { "", "binary", "", "octal", "hexadecimal" };
12998             static const char* const Bases[5] =
12999               { "", "Binary", "", "Octal", "Hexadecimal" };
13000             static const char* const maxima[5] =
13001               { "",
13002                 "0b11111111111111111111111111111111",
13003                 "",
13004                 "037777777777",
13005                 "0xffffffff" };
13006             const char *base, *Base, *max;
13007
13008             /* check for hex */
13009             if (s[1] == 'x' || s[1] == 'X') {
13010                 shift = 4;
13011                 s += 2;
13012                 just_zero = FALSE;
13013             } else if (s[1] == 'b' || s[1] == 'B') {
13014                 shift = 1;
13015                 s += 2;
13016                 just_zero = FALSE;
13017             }
13018             /* check for a decimal in disguise */
13019             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
13020                 goto decimal;
13021             /* so it must be octal */
13022             else {
13023                 shift = 3;
13024                 s++;
13025             }
13026
13027             if (*s == '_') {
13028                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13029                                "Misplaced _ in number");
13030                lastub = s++;
13031             }
13032
13033             base = bases[shift];
13034             Base = Bases[shift];
13035             max  = maxima[shift];
13036
13037             /* read the rest of the number */
13038             for (;;) {
13039                 /* x is used in the overflow test,
13040                    b is the digit we're adding on. */
13041                 UV x, b;
13042
13043                 switch (*s) {
13044
13045                 /* if we don't mention it, we're done */
13046                 default:
13047                     goto out;
13048
13049                 /* _ are ignored -- but warned about if consecutive */
13050                 case '_':
13051                     if (lastub && s == lastub + 1)
13052                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13053                                        "Misplaced _ in number");
13054                     lastub = s++;
13055                     break;
13056
13057                 /* 8 and 9 are not octal */
13058                 case '8': case '9':
13059                     if (shift == 3)
13060                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
13061                     /* FALL THROUGH */
13062
13063                 /* octal digits */
13064                 case '2': case '3': case '4':
13065                 case '5': case '6': case '7':
13066                     if (shift == 1)
13067                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
13068                     /* FALL THROUGH */
13069
13070                 case '0': case '1':
13071                     b = *s++ & 15;              /* ASCII digit -> value of digit */
13072                     goto digit;
13073
13074                 /* hex digits */
13075                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13076                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
13077                     /* make sure they said 0x */
13078                     if (shift != 4)
13079                         goto out;
13080                     b = (*s++ & 7) + 9;
13081
13082                     /* Prepare to put the digit we have onto the end
13083                        of the number so far.  We check for overflows.
13084                     */
13085
13086                   digit:
13087                     just_zero = FALSE;
13088                     if (!overflowed) {
13089                         x = u << shift; /* make room for the digit */
13090
13091                         if ((x >> shift) != u
13092                             && !(PL_hints & HINT_NEW_BINARY)) {
13093                             overflowed = TRUE;
13094                             n = (NV) u;
13095                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13096                                              "Integer overflow in %s number",
13097                                              base);
13098                         } else
13099                             u = x | b;          /* add the digit to the end */
13100                     }
13101                     if (overflowed) {
13102                         n *= nvshift[shift];
13103                         /* If an NV has not enough bits in its
13104                          * mantissa to represent an UV this summing of
13105                          * small low-order numbers is a waste of time
13106                          * (because the NV cannot preserve the
13107                          * low-order bits anyway): we could just
13108                          * remember when did we overflow and in the
13109                          * end just multiply n by the right
13110                          * amount. */
13111                         n += (NV) b;
13112                     }
13113                     break;
13114                 }
13115             }
13116
13117           /* if we get here, we had success: make a scalar value from
13118              the number.
13119           */
13120           out:
13121
13122             /* final misplaced underbar check */
13123             if (s[-1] == '_') {
13124                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13125             }
13126
13127             if (overflowed) {
13128                 if (n > 4294967295.0)
13129                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13130                                    "%s number > %s non-portable",
13131                                    Base, max);
13132                 sv = newSVnv(n);
13133             }
13134             else {
13135 #if UVSIZE > 4
13136                 if (u > 0xffffffff)
13137                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13138                                    "%s number > %s non-portable",
13139                                    Base, max);
13140 #endif
13141                 sv = newSVuv(u);
13142             }
13143             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
13144                 sv = new_constant(start, s - start, "integer",
13145                                   sv, NULL, NULL, 0);
13146             else if (PL_hints & HINT_NEW_BINARY)
13147                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
13148         }
13149         break;
13150
13151     /*
13152       handle decimal numbers.
13153       we're also sent here when we read a 0 as the first digit
13154     */
13155     case '1': case '2': case '3': case '4': case '5':
13156     case '6': case '7': case '8': case '9': case '.':
13157       decimal:
13158         d = PL_tokenbuf;
13159         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
13160         floatit = FALSE;
13161
13162         /* read next group of digits and _ and copy into d */
13163         while (isDIGIT(*s) || *s == '_') {
13164             /* skip underscores, checking for misplaced ones
13165                if -w is on
13166             */
13167             if (*s == '_') {
13168                 if (lastub && s == lastub + 1)
13169                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13170                                    "Misplaced _ in number");
13171                 lastub = s++;
13172             }
13173             else {
13174                 /* check for end of fixed-length buffer */
13175                 if (d >= e)
13176                     Perl_croak(aTHX_ number_too_long);
13177                 /* if we're ok, copy the character */
13178                 *d++ = *s++;
13179             }
13180         }
13181
13182         /* final misplaced underbar check */
13183         if (lastub && s == lastub + 1) {
13184             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13185         }
13186
13187         /* read a decimal portion if there is one.  avoid
13188            3..5 being interpreted as the number 3. followed
13189            by .5
13190         */
13191         if (*s == '.' && s[1] != '.') {
13192             floatit = TRUE;
13193             *d++ = *s++;
13194
13195             if (*s == '_') {
13196                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13197                                "Misplaced _ in number");
13198                 lastub = s;
13199             }
13200
13201             /* copy, ignoring underbars, until we run out of digits.
13202             */
13203             for (; isDIGIT(*s) || *s == '_'; s++) {
13204                 /* fixed length buffer check */
13205                 if (d >= e)
13206                     Perl_croak(aTHX_ number_too_long);
13207                 if (*s == '_') {
13208                    if (lastub && s == lastub + 1)
13209                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13210                                       "Misplaced _ in number");
13211                    lastub = s;
13212                 }
13213                 else
13214                     *d++ = *s;
13215             }
13216             /* fractional part ending in underbar? */
13217             if (s[-1] == '_') {
13218                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13219                                "Misplaced _ in number");
13220             }
13221             if (*s == '.' && isDIGIT(s[1])) {
13222                 /* oops, it's really a v-string, but without the "v" */
13223                 s = start;
13224                 goto vstring;
13225             }
13226         }
13227
13228         /* read exponent part, if present */
13229         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
13230             floatit = TRUE;
13231             s++;
13232
13233             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
13234             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
13235
13236             /* stray preinitial _ */
13237             if (*s == '_') {
13238                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13239                                "Misplaced _ in number");
13240                 lastub = s++;
13241             }
13242
13243             /* allow positive or negative exponent */
13244             if (*s == '+' || *s == '-')
13245                 *d++ = *s++;
13246
13247             /* stray initial _ */
13248             if (*s == '_') {
13249                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13250                                "Misplaced _ in number");
13251                 lastub = s++;
13252             }
13253
13254             /* read digits of exponent */
13255             while (isDIGIT(*s) || *s == '_') {
13256                 if (isDIGIT(*s)) {
13257                     if (d >= e)
13258                         Perl_croak(aTHX_ number_too_long);
13259                     *d++ = *s++;
13260                 }
13261                 else {
13262                    if (((lastub && s == lastub + 1) ||
13263                         (!isDIGIT(s[1]) && s[1] != '_')))
13264                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13265                                       "Misplaced _ in number");
13266                    lastub = s++;
13267                 }
13268             }
13269         }
13270
13271
13272         /*
13273            We try to do an integer conversion first if no characters
13274            indicating "float" have been found.
13275          */
13276
13277         if (!floatit) {
13278             UV uv;
13279             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
13280
13281             if (flags == IS_NUMBER_IN_UV) {
13282               if (uv <= IV_MAX)
13283                 sv = newSViv(uv); /* Prefer IVs over UVs. */
13284               else
13285                 sv = newSVuv(uv);
13286             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13287               if (uv <= (UV) IV_MIN)
13288                 sv = newSViv(-(IV)uv);
13289               else
13290                 floatit = TRUE;
13291             } else
13292               floatit = TRUE;
13293         }
13294         if (floatit) {
13295             /* terminate the string */
13296             *d = '\0';
13297             nv = Atof(PL_tokenbuf);
13298             sv = newSVnv(nv);
13299         }
13300
13301         if ( floatit
13302              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13303             const char *const key = floatit ? "float" : "integer";
13304             const STRLEN keylen = floatit ? 5 : 7;
13305             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13306                                 key, keylen, sv, NULL, NULL, 0);
13307         }
13308         break;
13309
13310     /* if it starts with a v, it could be a v-string */
13311     case 'v':
13312 vstring:
13313                 sv = newSV(5); /* preallocate storage space */
13314                 s = scan_vstring(s, PL_bufend, sv);
13315         break;
13316     }
13317
13318     /* make the op for the constant and return */
13319
13320     if (sv)
13321         lvalp->opval = newSVOP(OP_CONST, 0, sv);
13322     else
13323         lvalp->opval = NULL;
13324
13325     return (char *)s;
13326 }
13327
13328 STATIC char *
13329 S_scan_formline(pTHX_ register char *s)
13330 {
13331     dVAR;
13332     register char *eol;
13333     register char *t;
13334     SV * const stuff = newSVpvs("");
13335     bool needargs = FALSE;
13336     bool eofmt = FALSE;
13337 #ifdef PERL_MAD
13338     char *tokenstart = s;
13339     SV* savewhite = NULL;
13340
13341     if (PL_madskills) {
13342         savewhite = PL_thiswhite;
13343         PL_thiswhite = 0;
13344     }
13345 #endif
13346
13347     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13348
13349     while (!needargs) {
13350         if (*s == '.') {
13351             t = s+1;
13352 #ifdef PERL_STRICT_CR
13353             while (SPACE_OR_TAB(*t))
13354                 t++;
13355 #else
13356             while (SPACE_OR_TAB(*t) || *t == '\r')
13357                 t++;
13358 #endif
13359             if (*t == '\n' || t == PL_bufend) {
13360                 eofmt = TRUE;
13361                 break;
13362             }
13363         }
13364         if (PL_in_eval && !PL_rsfp) {
13365             eol = (char *) memchr(s,'\n',PL_bufend-s);
13366             if (!eol++)
13367                 eol = PL_bufend;
13368         }
13369         else
13370             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13371         if (*s != '#') {
13372             for (t = s; t < eol; t++) {
13373                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13374                     needargs = FALSE;
13375                     goto enough;        /* ~~ must be first line in formline */
13376                 }
13377                 if (*t == '@' || *t == '^')
13378                     needargs = TRUE;
13379             }
13380             if (eol > s) {
13381                 sv_catpvn(stuff, s, eol-s);
13382 #ifndef PERL_STRICT_CR
13383                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13384                     char *end = SvPVX(stuff) + SvCUR(stuff);
13385                     end[-2] = '\n';
13386                     end[-1] = '\0';
13387                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13388                 }
13389 #endif
13390             }
13391             else
13392               break;
13393         }
13394         s = (char*)eol;
13395         if (PL_rsfp) {
13396             bool got_some;
13397 #ifdef PERL_MAD
13398             if (PL_madskills) {
13399                 if (PL_thistoken)
13400                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13401                 else
13402                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13403             }
13404 #endif
13405             PL_bufptr = PL_bufend;
13406             CopLINE_inc(PL_curcop);
13407             got_some = lex_next_chunk(0);
13408             CopLINE_dec(PL_curcop);
13409             s = PL_bufptr;
13410 #ifdef PERL_MAD
13411             tokenstart = PL_bufptr;
13412 #endif
13413             if (!got_some)
13414                 break;
13415         }
13416         incline(s);
13417     }
13418   enough:
13419     if (SvCUR(stuff)) {
13420         PL_expect = XTERM;
13421         if (needargs) {
13422             PL_lex_state = LEX_NORMAL;
13423             start_force(PL_curforce);
13424             NEXTVAL_NEXTTOKE.ival = 0;
13425             force_next(',');
13426         }
13427         else
13428             PL_lex_state = LEX_FORMLINE;
13429         if (!IN_BYTES) {
13430             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13431                 SvUTF8_on(stuff);
13432             else if (PL_encoding)
13433                 sv_recode_to_utf8(stuff, PL_encoding);
13434         }
13435         start_force(PL_curforce);
13436         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13437         force_next(THING);
13438         start_force(PL_curforce);
13439         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13440         force_next(LSTOP);
13441     }
13442     else {
13443         SvREFCNT_dec(stuff);
13444         if (eofmt)
13445             PL_lex_formbrack = 0;
13446         PL_bufptr = s;
13447     }
13448 #ifdef PERL_MAD
13449     if (PL_madskills) {
13450         if (PL_thistoken)
13451             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13452         else
13453             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13454         PL_thiswhite = savewhite;
13455     }
13456 #endif
13457     return s;
13458 }
13459
13460 I32
13461 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13462 {
13463     dVAR;
13464     const I32 oldsavestack_ix = PL_savestack_ix;
13465     CV* const outsidecv = PL_compcv;
13466
13467     if (PL_compcv) {
13468         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13469     }
13470     SAVEI32(PL_subline);
13471     save_item(PL_subname);
13472     SAVESPTR(PL_compcv);
13473
13474     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13475     CvFLAGS(PL_compcv) |= flags;
13476
13477     PL_subline = CopLINE(PL_curcop);
13478     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13479     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13480     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13481
13482     return oldsavestack_ix;
13483 }
13484
13485 #ifdef __SC__
13486 #pragma segment Perl_yylex
13487 #endif
13488 static int
13489 S_yywarn(pTHX_ const char *const s)
13490 {
13491     dVAR;
13492
13493     PERL_ARGS_ASSERT_YYWARN;
13494
13495     PL_in_eval |= EVAL_WARNONLY;
13496     yyerror(s);
13497     PL_in_eval &= ~EVAL_WARNONLY;
13498     return 0;
13499 }
13500
13501 int
13502 Perl_yyerror(pTHX_ const char *const s)
13503 {
13504     dVAR;
13505     const char *where = NULL;
13506     const char *context = NULL;
13507     int contlen = -1;
13508     SV *msg;
13509     int yychar  = PL_parser->yychar;
13510
13511     PERL_ARGS_ASSERT_YYERROR;
13512
13513     if (!yychar || (yychar == ';' && !PL_rsfp))
13514         where = "at EOF";
13515     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13516       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13517       PL_oldbufptr != PL_bufptr) {
13518         /*
13519                 Only for NetWare:
13520                 The code below is removed for NetWare because it abends/crashes on NetWare
13521                 when the script has error such as not having the closing quotes like:
13522                     if ($var eq "value)
13523                 Checking of white spaces is anyway done in NetWare code.
13524         */
13525 #ifndef NETWARE
13526         while (isSPACE(*PL_oldoldbufptr))
13527             PL_oldoldbufptr++;
13528 #endif
13529         context = PL_oldoldbufptr;
13530         contlen = PL_bufptr - PL_oldoldbufptr;
13531     }
13532     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13533       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13534         /*
13535                 Only for NetWare:
13536                 The code below is removed for NetWare because it abends/crashes on NetWare
13537                 when the script has error such as not having the closing quotes like:
13538                     if ($var eq "value)
13539                 Checking of white spaces is anyway done in NetWare code.
13540         */
13541 #ifndef NETWARE
13542         while (isSPACE(*PL_oldbufptr))
13543             PL_oldbufptr++;
13544 #endif
13545         context = PL_oldbufptr;
13546         contlen = PL_bufptr - PL_oldbufptr;
13547     }
13548     else if (yychar > 255)
13549         where = "next token ???";
13550     else if (yychar == -2) { /* YYEMPTY */
13551         if (PL_lex_state == LEX_NORMAL ||
13552            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13553             where = "at end of line";
13554         else if (PL_lex_inpat)
13555             where = "within pattern";
13556         else
13557             where = "within string";
13558     }
13559     else {
13560         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13561         if (yychar < 32)
13562             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13563         else if (isPRINT_LC(yychar)) {
13564             const char string = yychar;
13565             sv_catpvn(where_sv, &string, 1);
13566         }
13567         else
13568             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13569         where = SvPVX_const(where_sv);
13570     }
13571     msg = sv_2mortal(newSVpv(s, 0));
13572     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13573         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13574     if (context)
13575         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13576     else
13577         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13578     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13579         Perl_sv_catpvf(aTHX_ msg,
13580         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13581                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13582         PL_multi_end = 0;
13583     }
13584     if (PL_in_eval & EVAL_WARNONLY) {
13585         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13586     }
13587     else
13588         qerror(msg);
13589     if (PL_error_count >= 10) {
13590         if (PL_in_eval && SvCUR(ERRSV))
13591             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13592                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13593         else
13594             Perl_croak(aTHX_ "%s has too many errors.\n",
13595             OutCopFILE(PL_curcop));
13596     }
13597     PL_in_my = 0;
13598     PL_in_my_stash = NULL;
13599     return 0;
13600 }
13601 #ifdef __SC__
13602 #pragma segment Main
13603 #endif
13604
13605 STATIC char*
13606 S_swallow_bom(pTHX_ U8 *s)
13607 {
13608     dVAR;
13609     const STRLEN slen = SvCUR(PL_linestr);
13610
13611     PERL_ARGS_ASSERT_SWALLOW_BOM;
13612
13613     switch (s[0]) {
13614     case 0xFF:
13615         if (s[1] == 0xFE) {
13616             /* UTF-16 little-endian? (or UTF-32LE?) */
13617             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13618                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13619 #ifndef PERL_NO_UTF16_FILTER
13620             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13621             s += 2;
13622             if (PL_bufend > (char*)s) {
13623                 s = add_utf16_textfilter(s, TRUE);
13624             }
13625 #else
13626             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13627 #endif
13628         }
13629         break;
13630     case 0xFE:
13631         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13632 #ifndef PERL_NO_UTF16_FILTER
13633             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13634             s += 2;
13635             if (PL_bufend > (char *)s) {
13636                 s = add_utf16_textfilter(s, FALSE);
13637             }
13638 #else
13639             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13640 #endif
13641         }
13642         break;
13643     case 0xEF:
13644         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13645             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13646             s += 3;                      /* UTF-8 */
13647         }
13648         break;
13649     case 0:
13650         if (slen > 3) {
13651              if (s[1] == 0) {
13652                   if (s[2] == 0xFE && s[3] == 0xFF) {
13653                        /* UTF-32 big-endian */
13654                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13655                   }
13656              }
13657              else if (s[2] == 0 && s[3] != 0) {
13658                   /* Leading bytes
13659                    * 00 xx 00 xx
13660                    * are a good indicator of UTF-16BE. */
13661 #ifndef PERL_NO_UTF16_FILTER
13662                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13663                   s = add_utf16_textfilter(s, FALSE);
13664 #else
13665                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13666 #endif
13667              }
13668         }
13669 #ifdef EBCDIC
13670     case 0xDD:
13671         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13672             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13673             s += 4;                      /* UTF-8 */
13674         }
13675         break;
13676 #endif
13677
13678     default:
13679          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13680                   /* Leading bytes
13681                    * xx 00 xx 00
13682                    * are a good indicator of UTF-16LE. */
13683 #ifndef PERL_NO_UTF16_FILTER
13684               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13685               s = add_utf16_textfilter(s, TRUE);
13686 #else
13687               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13688 #endif
13689          }
13690     }
13691     return (char*)s;
13692 }
13693
13694
13695 #ifndef PERL_NO_UTF16_FILTER
13696 static I32
13697 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13698 {
13699     dVAR;
13700     SV *const filter = FILTER_DATA(idx);
13701     /* We re-use this each time round, throwing the contents away before we
13702        return.  */
13703     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13704     SV *const utf8_buffer = filter;
13705     IV status = IoPAGE(filter);
13706     const bool reverse = cBOOL(IoLINES(filter));
13707     I32 retval;
13708
13709     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13710
13711     /* As we're automatically added, at the lowest level, and hence only called
13712        from this file, we can be sure that we're not called in block mode. Hence
13713        don't bother writing code to deal with block mode.  */
13714     if (maxlen) {
13715         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13716     }
13717     if (status < 0) {
13718         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13719     }
13720     DEBUG_P(PerlIO_printf(Perl_debug_log,
13721                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13722                           FPTR2DPTR(void *, S_utf16_textfilter),
13723                           reverse ? 'l' : 'b', idx, maxlen, status,
13724                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13725
13726     while (1) {
13727         STRLEN chars;
13728         STRLEN have;
13729         I32 newlen;
13730         U8 *end;
13731         /* First, look in our buffer of existing UTF-8 data:  */
13732         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13733
13734         if (nl) {
13735             ++nl;
13736         } else if (status == 0) {
13737             /* EOF */
13738             IoPAGE(filter) = 0;
13739             nl = SvEND(utf8_buffer);
13740         }
13741         if (nl) {
13742             STRLEN got = nl - SvPVX(utf8_buffer);
13743             /* Did we have anything to append?  */
13744             retval = got != 0;
13745             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13746             /* Everything else in this code works just fine if SVp_POK isn't
13747                set.  This, however, needs it, and we need it to work, else
13748                we loop infinitely because the buffer is never consumed.  */
13749             sv_chop(utf8_buffer, nl);
13750             break;
13751         }
13752
13753         /* OK, not a complete line there, so need to read some more UTF-16.
13754            Read an extra octect if the buffer currently has an odd number. */
13755         while (1) {
13756             if (status <= 0)
13757                 break;
13758             if (SvCUR(utf16_buffer) >= 2) {
13759                 /* Location of the high octet of the last complete code point.
13760                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13761                    *coupled* with all the benefits of partial reads and
13762                    endianness.  */
13763                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13764                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13765
13766                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13767                     break;
13768                 }
13769
13770                 /* We have the first half of a surrogate. Read more.  */
13771                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13772             }
13773
13774             status = FILTER_READ(idx + 1, utf16_buffer,
13775                                  160 + (SvCUR(utf16_buffer) & 1));
13776             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13777             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13778             if (status < 0) {
13779                 /* Error */
13780                 IoPAGE(filter) = status;
13781                 return status;
13782             }
13783         }
13784
13785         chars = SvCUR(utf16_buffer) >> 1;
13786         have = SvCUR(utf8_buffer);
13787         SvGROW(utf8_buffer, have + chars * 3 + 1);
13788
13789         if (reverse) {
13790             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13791                                          (U8*)SvPVX_const(utf8_buffer) + have,
13792                                          chars * 2, &newlen);
13793         } else {
13794             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13795                                 (U8*)SvPVX_const(utf8_buffer) + have,
13796                                 chars * 2, &newlen);
13797         }
13798         SvCUR_set(utf8_buffer, have + newlen);
13799         *end = '\0';
13800
13801         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13802            it's private to us, and utf16_to_utf8{,reversed} take a
13803            (pointer,length) pair, rather than a NUL-terminated string.  */
13804         if(SvCUR(utf16_buffer) & 1) {
13805             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13806             SvCUR_set(utf16_buffer, 1);
13807         } else {
13808             SvCUR_set(utf16_buffer, 0);
13809         }
13810     }
13811     DEBUG_P(PerlIO_printf(Perl_debug_log,
13812                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13813                           status,
13814                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13815     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13816     return retval;
13817 }
13818
13819 static U8 *
13820 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13821 {
13822     SV *filter = filter_add(S_utf16_textfilter, NULL);
13823
13824     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13825
13826     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13827     sv_setpvs(filter, "");
13828     IoLINES(filter) = reversed;
13829     IoPAGE(filter) = 1; /* Not EOF */
13830
13831     /* Sadly, we have to return a valid pointer, come what may, so we have to
13832        ignore any error return from this.  */
13833     SvCUR_set(PL_linestr, 0);
13834     if (FILTER_READ(0, PL_linestr, 0)) {
13835         SvUTF8_on(PL_linestr);
13836     } else {
13837         SvUTF8_on(PL_linestr);
13838     }
13839     PL_bufend = SvEND(PL_linestr);
13840     return (U8*)SvPVX(PL_linestr);
13841 }
13842 #endif
13843
13844 /*
13845 Returns a pointer to the next character after the parsed
13846 vstring, as well as updating the passed in sv.
13847
13848 Function must be called like
13849
13850         sv = newSV(5);
13851         s = scan_vstring(s,e,sv);
13852
13853 where s and e are the start and end of the string.
13854 The sv should already be large enough to store the vstring
13855 passed in, for performance reasons.
13856
13857 */
13858
13859 char *
13860 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13861 {
13862     dVAR;
13863     const char *pos = s;
13864     const char *start = s;
13865
13866     PERL_ARGS_ASSERT_SCAN_VSTRING;
13867
13868     if (*pos == 'v') pos++;  /* get past 'v' */
13869     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13870         pos++;
13871     if ( *pos != '.') {
13872         /* this may not be a v-string if followed by => */
13873         const char *next = pos;
13874         while (next < e && isSPACE(*next))
13875             ++next;
13876         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13877             /* return string not v-string */
13878             sv_setpvn(sv,(char *)s,pos-s);
13879             return (char *)pos;
13880         }
13881     }
13882
13883     if (!isALPHA(*pos)) {
13884         U8 tmpbuf[UTF8_MAXBYTES+1];
13885
13886         if (*s == 'v')
13887             s++;  /* get past 'v' */
13888
13889         sv_setpvs(sv, "");
13890
13891         for (;;) {
13892             /* this is atoi() that tolerates underscores */
13893             U8 *tmpend;
13894             UV rev = 0;
13895             const char *end = pos;
13896             UV mult = 1;
13897             while (--end >= s) {
13898                 if (*end != '_') {
13899                     const UV orev = rev;
13900                     rev += (*end - '0') * mult;
13901                     mult *= 10;
13902                     if (orev > rev)
13903                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13904                                          "Integer overflow in decimal number");
13905                 }
13906             }
13907 #ifdef EBCDIC
13908             if (rev > 0x7FFFFFFF)
13909                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13910 #endif
13911             /* Append native character for the rev point */
13912             tmpend = uvchr_to_utf8(tmpbuf, rev);
13913             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13914             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13915                  SvUTF8_on(sv);
13916             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13917                  s = ++pos;
13918             else {
13919                  s = pos;
13920                  break;
13921             }
13922             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13923                  pos++;
13924         }
13925         SvPOK_on(sv);
13926         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13927         SvRMAGICAL_on(sv);
13928     }
13929     return (char *)s;
13930 }
13931
13932 int
13933 Perl_keyword_plugin_standard(pTHX_
13934         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13935 {
13936     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13937     PERL_UNUSED_CONTEXT;
13938     PERL_UNUSED_ARG(keyword_ptr);
13939     PERL_UNUSED_ARG(keyword_len);
13940     PERL_UNUSED_ARG(op_ptr);
13941     return KEYWORD_PLUGIN_DECLINE;
13942 }
13943
13944 /*
13945 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
13946
13947 Parse a single complete Perl statement.  This may be a normal imperative
13948 statement, including optional label, or a declaration that has
13949 compile-time effect.  It is up to the caller to ensure that the dynamic
13950 parser state (L</PL_parser> et al) is correctly set to reflect the source
13951 of the code to be parsed and the lexical context for the statement.
13952
13953 The op tree representing the statement is returned.  This may be a
13954 null pointer if the statement is null, for example if it was actually
13955 a subroutine definition (which has compile-time side effects).  If not
13956 null, it will be the result of a L</newSTATEOP> call, normally including
13957 a C<nextstate> or equivalent op.
13958
13959 If an error occurs in parsing or compilation, in most cases a valid op
13960 tree (most likely null) is returned anyway.  The error is reflected in
13961 the parser state, normally resulting in a single exception at the top
13962 level of parsing which covers all the compilation errors that occurred.
13963 Some compilation errors, however, will throw an exception immediately.
13964
13965 The I<flags> parameter is reserved for future use, and must always
13966 be zero.
13967
13968 =cut
13969 */
13970
13971 OP *
13972 Perl_parse_fullstmt(pTHX_ U32 flags)
13973 {
13974     OP *fullstmtop;
13975     if (flags)
13976         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13977     ENTER;
13978     SAVEVPTR(PL_eval_root);
13979     PL_eval_root = NULL;
13980     if(yyparse(GRAMFULLSTMT) && !PL_parser->error_count)
13981         qerror(Perl_mess(aTHX_ "Parse error"));
13982     fullstmtop = PL_eval_root;
13983     LEAVE;
13984     return fullstmtop;
13985 }
13986
13987 void
13988 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
13989 {
13990     PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
13991     deprecate("qw(...) as parentheses");
13992     force_next(')');
13993     if (qwlist->op_type == OP_STUB) {
13994         op_free(qwlist);
13995     }
13996     else {
13997         start_force(PL_curforce);
13998         NEXTVAL_NEXTTOKE.opval = qwlist;
13999         force_next(THING);
14000     }
14001     force_next('(');
14002 }
14003
14004 /*
14005  * Local variables:
14006  * c-indentation-style: bsd
14007  * c-basic-offset: 4
14008  * indent-tabs-mode: t
14009  * End:
14010  *
14011  * ex: set ts=8 sts=4 sw=4 noet:
14012  */