toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42 #include "dquote_static.c"
  43
  44 #define new_constant(a,b,c,d,e,f,g)     \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XENUMMASK  0x3f
 130 #define XFAKEEOF   0x40
 131 #define XFAKEBRACK 0x80
 132
 133 #ifdef USE_UTF8_SCRIPTS
 134 #   define UTF (!IN_BYTES)
 135 #else
 136 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 137 #endif
 138
 139 /* The maximum number of characters preceding the unrecognized one to display */
 140 #define UNRECOGNIZED_PRECEDE_COUNT 10
 141
 142 /* In variables named $^X, these are the legal values for X.
 143  * 1999-02-27 mjd-perl-patch@plover.com */
 144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 145
 146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 147
 148 /* LEX_* are values for PL_lex_state, the state of the lexer.
 149  * They are arranged oddly so that the guard on the switch statement
 150  * can get by with a single comparison (if the compiler is smart enough).
 151  */
 152
 153 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 154
 155 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 156 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 157 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 158 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 159 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 160
 161                                    /* at end of code, eg "$x" followed by:  */
 162 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 163 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 164
 165 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 166                                         string or after \E, $foo, etc       */
 167 #define LEX_INTERPCONST          2 /* NOT USED */
 168 #define LEX_FORMLINE             1 /* expecting a format line               */
 169 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 170
 171
 172 #ifdef DEBUGGING
 173 static const char* const lex_state_names[] = {
 174     "KNOWNEXT",
 175     "FORMLINE",
 176     "INTERPCONST",
 177     "INTERPCONCAT",
 178     "INTERPENDMAYBE",
 179     "INTERPEND",
 180     "INTERPSTART",
 181     "INTERPPUSH",
 182     "INTERPCASEMOD",
 183     "INTERPNORMAL",
 184     "NORMAL"
 185 };
 186 #endif
 187
 188 #ifdef ff_next
 189 #undef ff_next
 190 #endif
 191
 192 #include "keywords.h"
 193
 194 /* CLINE is a macro that ensures PL_copline has a sane value */
 195
 196 #ifdef CLINE
 197 #undef CLINE
 198 #endif
 199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 200
 201 #ifdef PERL_MAD
 202 #  define SKIPSPACE0(s) skipspace0(s)
 203 #  define SKIPSPACE1(s) skipspace1(s)
 204 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 205 #  define PEEKSPACE(s) skipspace2(s,0)
 206 #else
 207 #  define SKIPSPACE0(s) skipspace(s)
 208 #  define SKIPSPACE1(s) skipspace(s)
 209 #  define SKIPSPACE2(s,tsv) skipspace(s)
 210 #  define PEEKSPACE(s) skipspace(s)
 211 #endif
 212
 213 /*
 214  * Convenience functions to return different tokens and prime the
 215  * lexer for the next token.  They all take an argument.
 216  *
 217  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 218  * OPERATOR     : generic operator
 219  * AOPERATOR    : assignment operator
 220  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 221  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 222  * PREREF       : *EXPR where EXPR is not a simple identifier
 223  * TERM         : expression term
 224  * LOOPX        : loop exiting command (goto, last, dump, etc)
 225  * FTST         : file test operator
 226  * FUN0         : zero-argument function
 227  * FUN1         : not used, except for not, which isn't a UNIOP
 228  * BOop         : bitwise or or xor
 229  * BAop         : bitwise and
 230  * SHop         : shift operator
 231  * PWop         : power operator
 232  * PMop         : pattern-matching operator
 233  * Aop          : addition-level operator
 234  * Mop          : multiplication-level operator
 235  * Eop          : equality-testing operator
 236  * Rop          : relational operator <= != gt
 237  *
 238  * Also see LOP and lop() below.
 239  */
 240
 241 #ifdef DEBUGGING /* Serve -DT. */
 242 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 243 #else
 244 #   define REPORT(retval) (retval)
 245 #endif
 246
 247 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 248 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 249 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 250 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 252 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 253 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 254 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 255 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 256 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 257 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 258 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 259 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 260 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 261 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 262 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 263 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 264 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 265 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 266 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 267
 268 /* This bit of chicanery makes a unary function followed by
 269  * a parenthesis into a function with one argument, highest precedence.
 270  * The UNIDOR macro is for unary functions that can be followed by the //
 271  * operator (such as C<shift // 0>).
 272  */
 273 #define UNI2(f,x) { \
 274         pl_yylval.ival = f; \
 275         PL_expect = x; \
 276         PL_bufptr = s; \
 277         PL_last_uni = PL_oldbufptr; \
 278         PL_last_lop_op = f; \
 279         if (*s == '(') \
 280             return REPORT( (int)FUNC1 ); \
 281         s = PEEKSPACE(s); \
 282         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 283         }
 284 #define UNI(f)    UNI2(f,XTERM)
 285 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 286
 287 #define UNIBRACK(f) { \
 288         pl_yylval.ival = f; \
 289         PL_bufptr = s; \
 290         PL_last_uni = PL_oldbufptr; \
 291         if (*s == '(') \
 292             return REPORT( (int)FUNC1 ); \
 293         s = PEEKSPACE(s); \
 294         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 295         }
 296
 297 /* grandfather return to old style */
 298 #define OLDLOP(f) \
 299         do { \
 300             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 301                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 302             pl_yylval.ival = (f); \
 303             PL_expect = XTERM; \
 304             PL_bufptr = s; \
 305             return (int)LSTOP; \
 306         } while(0)
 307
 308 #ifdef DEBUGGING
 309
 310 /* how to interpret the pl_yylval associated with the token */
 311 enum token_type {
 312     TOKENTYPE_NONE,
 313     TOKENTYPE_IVAL,
 314     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 315     TOKENTYPE_PVAL,
 316     TOKENTYPE_OPVAL,
 317     TOKENTYPE_GVVAL
 318 };
 319
 320 static struct debug_tokens {
 321     const int token;
 322     enum token_type type;
 323     const char *name;
 324 } const debug_tokens[] =
 325 {
 326     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 327     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 328     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 329     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 330     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 331     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 332     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 333     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 334     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 335     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 336     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 337     { DO,               TOKENTYPE_NONE,         "DO" },
 338     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 339     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 340     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 341     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 342     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 343     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 344     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 345     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 346     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 347     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 348     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 349     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 350     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 351     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 352     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 353     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 354     { IF,               TOKENTYPE_IVAL,         "IF" },
 355     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 356     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 357     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 358     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 359     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 360     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 361     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 362     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 363     { MY,               TOKENTYPE_IVAL,         "MY" },
 364     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 365     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 366     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 367     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 368     { OROR,             TOKENTYPE_NONE,         "OROR" },
 369     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 370     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 371     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 372     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 373     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 374     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 375     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 376     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 377     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 378     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 379     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 380     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 381     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 382     { SUB,              TOKENTYPE_NONE,         "SUB" },
 383     { THING,            TOKENTYPE_OPVAL,        "THING" },
 384     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 385     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 386     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 387     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 388     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 389     { USE,              TOKENTYPE_IVAL,         "USE" },
 390     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 391     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 392     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 393     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 394     { 0,                TOKENTYPE_NONE,         NULL }
 395 };
 396
 397 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 398
 399 STATIC int
 400 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 401 {
 402     dVAR;
 403
 404     PERL_ARGS_ASSERT_TOKEREPORT;
 405
 406     if (DEBUG_T_TEST) {
 407         const char *name = NULL;
 408         enum token_type type = TOKENTYPE_NONE;
 409         const struct debug_tokens *p;
 410         SV* const report = newSVpvs("<== ");
 411
 412         for (p = debug_tokens; p->token; p++) {
 413             if (p->token == (int)rv) {
 414                 name = p->name;
 415                 type = p->type;
 416                 break;
 417             }
 418         }
 419         if (name)
 420             Perl_sv_catpv(aTHX_ report, name);
 421         else if ((char)rv > ' ' && (char)rv < '~')
 422             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 423         else if (!rv)
 424             sv_catpvs(report, "EOF");
 425         else
 426             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 427         switch (type) {
 428         case TOKENTYPE_NONE:
 429         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 430             break;
 431         case TOKENTYPE_IVAL:
 432             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 433             break;
 434         case TOKENTYPE_OPNUM:
 435             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 436                                     PL_op_name[lvalp->ival]);
 437             break;
 438         case TOKENTYPE_PVAL:
 439             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 440             break;
 441         case TOKENTYPE_OPVAL:
 442             if (lvalp->opval) {
 443                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 444                                     PL_op_name[lvalp->opval->op_type]);
 445                 if (lvalp->opval->op_type == OP_CONST) {
 446                     Perl_sv_catpvf(aTHX_ report, " %s",
 447                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 448                 }
 449
 450             }
 451             else
 452                 sv_catpvs(report, "(opval=null)");
 453             break;
 454         }
 455         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 456     };
 457     return (int)rv;
 458 }
 459
 460
 461 /* print the buffer with suitable escapes */
 462
 463 STATIC void
 464 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 465 {
 466     SV* const tmp = newSVpvs("");
 467
 468     PERL_ARGS_ASSERT_PRINTBUF;
 469
 470     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 471     SvREFCNT_dec(tmp);
 472 }
 473
 474 #endif
 475
 476 static int
 477 S_deprecate_commaless_var_list(pTHX) {
 478     PL_expect = XTERM;
 479     deprecate("comma-less variable list");
 480     return REPORT(','); /* grandfather non-comma-format format */
 481 }
 482
 483 /*
 484  * S_ao
 485  *
 486  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 487  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 488  */
 489
 490 STATIC int
 491 S_ao(pTHX_ int toketype)
 492 {
 493     dVAR;
 494     if (*PL_bufptr == '=') {
 495         PL_bufptr++;
 496         if (toketype == ANDAND)
 497             pl_yylval.ival = OP_ANDASSIGN;
 498         else if (toketype == OROR)
 499             pl_yylval.ival = OP_ORASSIGN;
 500         else if (toketype == DORDOR)
 501             pl_yylval.ival = OP_DORASSIGN;
 502         toketype = ASSIGNOP;
 503     }
 504     return toketype;
 505 }
 506
 507 /*
 508  * S_no_op
 509  * When Perl expects an operator and finds something else, no_op
 510  * prints the warning.  It always prints "<something> found where
 511  * operator expected.  It prints "Missing semicolon on previous line?"
 512  * if the surprise occurs at the start of the line.  "do you need to
 513  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 514  * where the compiler doesn't know if foo is a method call or a function.
 515  * It prints "Missing operator before end of line" if there's nothing
 516  * after the missing operator, or "... before <...>" if there is something
 517  * after the missing operator.
 518  */
 519
 520 STATIC void
 521 S_no_op(pTHX_ const char *const what, char *s)
 522 {
 523     dVAR;
 524     char * const oldbp = PL_bufptr;
 525     const bool is_first = (PL_oldbufptr == PL_linestart);
 526
 527     PERL_ARGS_ASSERT_NO_OP;
 528
 529     if (!s)
 530         s = oldbp;
 531     else
 532         PL_bufptr = s;
 533     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 534     if (ckWARN_d(WARN_SYNTAX)) {
 535         if (is_first)
 536             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 537                     "\t(Missing semicolon on previous line?)\n");
 538         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 539             const char *t;
 540             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 541                 NOOP;
 542             if (t < PL_bufptr && isSPACE(*t))
 543                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 544                         "\t(Do you need to predeclare %.*s?)\n",
 545                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 546         }
 547         else {
 548             assert(s >= oldbp);
 549             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 550                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 551         }
 552     }
 553     PL_bufptr = oldbp;
 554 }
 555
 556 /*
 557  * S_missingterm
 558  * Complain about missing quote/regexp/heredoc terminator.
 559  * If it's called with NULL then it cauterizes the line buffer.
 560  * If we're in a delimited string and the delimiter is a control
 561  * character, it's reformatted into a two-char sequence like ^C.
 562  * This is fatal.
 563  */
 564
 565 STATIC void
 566 S_missingterm(pTHX_ char *s)
 567 {
 568     dVAR;
 569     char tmpbuf[3];
 570     char q;
 571     if (s) {
 572         char * const nl = strrchr(s,'\n');
 573         if (nl)
 574             *nl = '\0';
 575     }
 576     else if (isCNTRL(PL_multi_close)) {
 577         *tmpbuf = '^';
 578         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 579         tmpbuf[2] = '\0';
 580         s = tmpbuf;
 581     }
 582     else {
 583         *tmpbuf = (char)PL_multi_close;
 584         tmpbuf[1] = '\0';
 585         s = tmpbuf;
 586     }
 587     q = strchr(s,'"') ? '\'' : '"';
 588     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 589 }
 590
 591 #define FEATURE_IS_ENABLED(name)                                        \
 592         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 593             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 594 /* The longest string we pass in.  */
 595 #define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
 596
 597 /*
 598  * S_feature_is_enabled
 599  * Check whether the named feature is enabled.
 600  */
 601 STATIC bool
 602 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 603 {
 604     dVAR;
 605     HV * const hinthv = GvHV(PL_hintgv);
 606     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 607
 608     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 609
 610     assert(namelen <= MAX_FEATURE_LEN);
 611     memcpy(&he_name[8], name, namelen);
 612
 613     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 614 }
 615
 616 /*
 617  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 618  * utf16-to-utf8-reversed.
 619  */
 620
 621 #ifdef PERL_CR_FILTER
 622 static void
 623 strip_return(SV *sv)
 624 {
 625     register const char *s = SvPVX_const(sv);
 626     register const char * const e = s + SvCUR(sv);
 627
 628     PERL_ARGS_ASSERT_STRIP_RETURN;
 629
 630     /* outer loop optimized to do nothing if there are no CR-LFs */
 631     while (s < e) {
 632         if (*s++ == '\r' && *s == '\n') {
 633             /* hit a CR-LF, need to copy the rest */
 634             register char *d = s - 1;
 635             *d++ = *s++;
 636             while (s < e) {
 637                 if (*s == '\r' && s[1] == '\n')
 638                     s++;
 639                 *d++ = *s++;
 640             }
 641             SvCUR(sv) -= s - d;
 642             return;
 643         }
 644     }
 645 }
 646
 647 STATIC I32
 648 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 649 {
 650     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 651     if (count > 0 && !maxlen)
 652         strip_return(sv);
 653     return count;
 654 }
 655 #endif
 656
 657 /*
 658 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 659
 660 Creates and initialises a new lexer/parser state object, supplying
 661 a context in which to lex and parse from a new source of Perl code.
 662 A pointer to the new state object is placed in L</PL_parser>.  An entry
 663 is made on the save stack so that upon unwinding the new state object
 664 will be destroyed and the former value of L</PL_parser> will be restored.
 665 Nothing else need be done to clean up the parsing context.
 666
 667 The code to be parsed comes from I<line> and I<rsfp>.  I<line>, if
 668 non-null, provides a string (in SV form) containing code to be parsed.
 669 A copy of the string is made, so subsequent modification of I<line>
 670 does not affect parsing.  I<rsfp>, if non-null, provides an input stream
 671 from which code will be read to be parsed.  If both are non-null, the
 672 code in I<line> comes first and must consist of complete lines of input,
 673 and I<rsfp> supplies the remainder of the source.
 674
 675 The I<flags> parameter is reserved for future use, and must always
 676 be zero.
 677
 678 =cut
 679 */
 680
 681 void
 682 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 683 {
 684     dVAR;
 685     const char *s = NULL;
 686     STRLEN len;
 687     yy_parser *parser, *oparser;
 688     if (flags)
 689         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 690
 691     /* create and initialise a parser */
 692
 693     Newxz(parser, 1, yy_parser);
 694     parser->old_parser = oparser = PL_parser;
 695     PL_parser = parser;
 696
 697     parser->stack = NULL;
 698     parser->ps = NULL;
 699     parser->stack_size = 0;
 700
 701     /* on scope exit, free this parser and restore any outer one */
 702     SAVEPARSER(parser);
 703     parser->saved_curcop = PL_curcop;
 704
 705     /* initialise lexer state */
 706
 707 #ifdef PERL_MAD
 708     parser->curforce = -1;
 709 #else
 710     parser->nexttoke = 0;
 711 #endif
 712     parser->error_count = oparser ? oparser->error_count : 0;
 713     parser->copline = NOLINE;
 714     parser->lex_state = LEX_NORMAL;
 715     parser->expect = XSTATE;
 716     parser->rsfp = rsfp;
 717     parser->rsfp_filters = newAV();
 718
 719     Newx(parser->lex_brackstack, 120, char);
 720     Newx(parser->lex_casestack, 12, char);
 721     *parser->lex_casestack = '\0';
 722
 723     if (line) {
 724         s = SvPV_const(line, len);
 725     } else {
 726         len = 0;
 727     }
 728
 729     if (!len) {
 730         parser->linestr = newSVpvs("\n;");
 731     } else {
 732         parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
 733         if (s[len-1] != ';')
 734             sv_catpvs(parser->linestr, "\n;");
 735     }
 736     parser->oldoldbufptr =
 737         parser->oldbufptr =
 738         parser->bufptr =
 739         parser->linestart = SvPVX(parser->linestr);
 740     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 741     parser->last_lop = parser->last_uni = NULL;
 742
 743     parser->in_pod = 0;
 744 }
 745
 746
 747 /* delete a parser object */
 748
 749 void
 750 Perl_parser_free(pTHX_  const yy_parser *parser)
 751 {
 752     PERL_ARGS_ASSERT_PARSER_FREE;
 753
 754     PL_curcop = parser->saved_curcop;
 755     SvREFCNT_dec(parser->linestr);
 756
 757     if (parser->rsfp == PerlIO_stdin())
 758         PerlIO_clearerr(parser->rsfp);
 759     else if (parser->rsfp && (!parser->old_parser ||
 760                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 761         PerlIO_close(parser->rsfp);
 762     SvREFCNT_dec(parser->rsfp_filters);
 763
 764     Safefree(parser->lex_brackstack);
 765     Safefree(parser->lex_casestack);
 766     PL_parser = parser->old_parser;
 767     Safefree(parser);
 768 }
 769
 770
 771 /*
 772 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 773
 774 Buffer scalar containing the chunk currently under consideration of the
 775 text currently being lexed.  This is always a plain string scalar (for
 776 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 777 normal scalar means; instead refer to the buffer directly by the pointer
 778 variables described below.
 779
 780 The lexer maintains various C<char*> pointers to things in the
 781 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 782 reallocated, all of these pointers must be updated.  Don't attempt to
 783 do this manually, but rather use L</lex_grow_linestr> if you need to
 784 reallocate the buffer.
 785
 786 The content of the text chunk in the buffer is commonly exactly one
 787 complete line of input, up to and including a newline terminator,
 788 but there are situations where it is otherwise.  The octets of the
 789 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 790 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 791 flag on this scalar, which may disagree with it.
 792
 793 For direct examination of the buffer, the variable
 794 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 795 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 796 of these pointers is usually preferable to examination of the scalar
 797 through normal scalar means.
 798
 799 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 800
 801 Direct pointer to the end of the chunk of text currently being lexed, the
 802 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 803 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 804 always located at the end of the buffer, and does not count as part of
 805 the buffer's contents.
 806
 807 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 808
 809 Points to the current position of lexing inside the lexer buffer.
 810 Characters around this point may be freely examined, within
 811 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 812 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 813 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 814
 815 Lexing code (whether in the Perl core or not) moves this pointer past
 816 the characters that it consumes.  It is also expected to perform some
 817 bookkeeping whenever a newline character is consumed.  This movement
 818 can be more conveniently performed by the function L</lex_read_to>,
 819 which handles newlines appropriately.
 820
 821 Interpretation of the buffer's octets can be abstracted out by
 822 using the slightly higher-level functions L</lex_peek_unichar> and
 823 L</lex_read_unichar>.
 824
 825 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 826
 827 Points to the start of the current line inside the lexer buffer.
 828 This is useful for indicating at which column an error occurred, and
 829 not much else.  This must be updated by any lexing code that consumes
 830 a newline; the function L</lex_read_to> handles this detail.
 831
 832 =cut
 833 */
 834
 835 /*
 836 =for apidoc Amx|bool|lex_bufutf8
 837
 838 Indicates whether the octets in the lexer buffer
 839 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 840 of Unicode characters.  If not, they should be interpreted as Latin-1
 841 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 842
 843 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 844 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 845 encoding.
 846
 847 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 848 is significant, but not the whole story regarding the input character
 849 encoding.  Normally, when a file is being read, the scalar contains octets
 850 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 851 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 852 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 853 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 854 is in effect.  This logic may change in the future; use this function
 855 instead of implementing the logic yourself.
 856
 857 =cut
 858 */
 859
 860 bool
 861 Perl_lex_bufutf8(pTHX)
 862 {
 863     return UTF;
 864 }
 865
 866 /*
 867 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 868
 869 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 870 at least I<len> octets (including terminating NUL).  Returns a
 871 pointer to the reallocated buffer.  This is necessary before making
 872 any direct modification of the buffer that would increase its length.
 873 L</lex_stuff_pvn> provides a more convenient way to insert text into
 874 the buffer.
 875
 876 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 877 this function updates all of the lexer's variables that point directly
 878 into the buffer.
 879
 880 =cut
 881 */
 882
 883 char *
 884 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 885 {
 886     SV *linestr;
 887     char *buf;
 888     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 889     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 890     linestr = PL_parser->linestr;
 891     buf = SvPVX(linestr);
 892     if (len <= SvLEN(linestr))
 893         return buf;
 894     bufend_pos = PL_parser->bufend - buf;
 895     bufptr_pos = PL_parser->bufptr - buf;
 896     oldbufptr_pos = PL_parser->oldbufptr - buf;
 897     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 898     linestart_pos = PL_parser->linestart - buf;
 899     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 900     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 901     buf = sv_grow(linestr, len);
 902     PL_parser->bufend = buf + bufend_pos;
 903     PL_parser->bufptr = buf + bufptr_pos;
 904     PL_parser->oldbufptr = buf + oldbufptr_pos;
 905     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 906     PL_parser->linestart = buf + linestart_pos;
 907     if (PL_parser->last_uni)
 908         PL_parser->last_uni = buf + last_uni_pos;
 909     if (PL_parser->last_lop)
 910         PL_parser->last_lop = buf + last_lop_pos;
 911     return buf;
 912 }
 913
 914 /*
 915 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 916
 917 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 918 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 919 reallocating the buffer if necessary.  This means that lexing code that
 920 runs later will see the characters as if they had appeared in the input.
 921 It is not recommended to do this as part of normal parsing, and most
 922 uses of this facility run the risk of the inserted characters being
 923 interpreted in an unintended manner.
 924
 925 The string to be inserted is represented by I<len> octets starting
 926 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 927 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 928 The characters are recoded for the lexer buffer, according to how the
 929 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 930 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 931 function is more convenient.
 932
 933 =cut
 934 */
 935
 936 void
 937 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 938 {
 939     dVAR;
 940     char *bufptr;
 941     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 942     if (flags & ~(LEX_STUFF_UTF8))
 943         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 944     if (UTF) {
 945         if (flags & LEX_STUFF_UTF8) {
 946             goto plain_copy;
 947         } else {
 948             STRLEN highhalf = 0;
 949             const char *p, *e = pv+len;
 950             for (p = pv; p != e; p++)
 951                 highhalf += !!(((U8)*p) & 0x80);
 952             if (!highhalf)
 953                 goto plain_copy;
 954             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 955             bufptr = PL_parser->bufptr;
 956             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 957             SvCUR_set(PL_parser->linestr,
 958                 SvCUR(PL_parser->linestr) + len+highhalf);
 959             PL_parser->bufend += len+highhalf;
 960             for (p = pv; p != e; p++) {
 961                 U8 c = (U8)*p;
 962                 if (c & 0x80) {
 963                     *bufptr++ = (char)(0xc0 | (c >> 6));
 964                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 965                 } else {
 966                     *bufptr++ = (char)c;
 967                 }
 968             }
 969         }
 970     } else {
 971         if (flags & LEX_STUFF_UTF8) {
 972             STRLEN highhalf = 0;
 973             const char *p, *e = pv+len;
 974             for (p = pv; p != e; p++) {
 975                 U8 c = (U8)*p;
 976                 if (c >= 0xc4) {
 977                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 978                                 "non-Latin-1 character into Latin-1 input");
 979                 } else if (c >= 0xc2 && p+1 != e &&
 980                             (((U8)p[1]) & 0xc0) == 0x80) {
 981                     p++;
 982                     highhalf++;
 983                 } else if (c >= 0x80) {
 984                     /* malformed UTF-8 */
 985                     ENTER;
 986                     SAVESPTR(PL_warnhook);
 987                     PL_warnhook = PERL_WARNHOOK_FATAL;
 988                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 989                     LEAVE;
 990                 }
 991             }
 992             if (!highhalf)
 993                 goto plain_copy;
 994             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 995             bufptr = PL_parser->bufptr;
 996             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 997             SvCUR_set(PL_parser->linestr,
 998                 SvCUR(PL_parser->linestr) + len-highhalf);
 999             PL_parser->bufend += len-highhalf;
1000             for (p = pv; p != e; p++) {
1001                 U8 c = (U8)*p;
1002                 if (c & 0x80) {
1003                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1004                     p++;
1005                 } else {
1006                     *bufptr++ = (char)c;
1007                 }
1008             }
1009         } else {
1010             plain_copy:
1011             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1012             bufptr = PL_parser->bufptr;
1013             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1014             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1015             PL_parser->bufend += len;
1016             Copy(pv, bufptr, len, char);
1017         }
1018     }
1019 }
1020
1021 /*
1022 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1023
1024 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1025 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1026 reallocating the buffer if necessary.  This means that lexing code that
1027 runs later will see the characters as if they had appeared in the input.
1028 It is not recommended to do this as part of normal parsing, and most
1029 uses of this facility run the risk of the inserted characters being
1030 interpreted in an unintended manner.
1031
1032 The string to be inserted is represented by octets starting at I<pv>
1033 and continuing to the first nul.  These octets are interpreted as either
1034 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1035 in I<flags>.  The characters are recoded for the lexer buffer, according
1036 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1037 If it is not convenient to nul-terminate a string to be inserted, the
1038 L</lex_stuff_pvn> function is more appropriate.
1039
1040 =cut
1041 */
1042
1043 void
1044 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1045 {
1046     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1047     lex_stuff_pvn(pv, strlen(pv), flags);
1048 }
1049
1050 /*
1051 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1052
1053 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1054 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1055 reallocating the buffer if necessary.  This means that lexing code that
1056 runs later will see the characters as if they had appeared in the input.
1057 It is not recommended to do this as part of normal parsing, and most
1058 uses of this facility run the risk of the inserted characters being
1059 interpreted in an unintended manner.
1060
1061 The string to be inserted is the string value of I<sv>.  The characters
1062 are recoded for the lexer buffer, according to how the buffer is currently
1063 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1064 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1065 need to construct a scalar.
1066
1067 =cut
1068 */
1069
1070 void
1071 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1072 {
1073     char *pv;
1074     STRLEN len;
1075     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1076     if (flags)
1077         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1078     pv = SvPV(sv, len);
1079     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1080 }
1081
1082 /*
1083 =for apidoc Amx|void|lex_unstuff|char *ptr
1084
1085 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1086 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1087 This hides the discarded text from any lexing code that runs later,
1088 as if the text had never appeared.
1089
1090 This is not the normal way to consume lexed text.  For that, use
1091 L</lex_read_to>.
1092
1093 =cut
1094 */
1095
1096 void
1097 Perl_lex_unstuff(pTHX_ char *ptr)
1098 {
1099     char *buf, *bufend;
1100     STRLEN unstuff_len;
1101     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1102     buf = PL_parser->bufptr;
1103     if (ptr < buf)
1104         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1105     if (ptr == buf)
1106         return;
1107     bufend = PL_parser->bufend;
1108     if (ptr > bufend)
1109         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1110     unstuff_len = ptr - buf;
1111     Move(ptr, buf, bufend+1-ptr, char);
1112     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1113     PL_parser->bufend = bufend - unstuff_len;
1114 }
1115
1116 /*
1117 =for apidoc Amx|void|lex_read_to|char *ptr
1118
1119 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1120 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1121 performing the correct bookkeeping whenever a newline character is passed.
1122 This is the normal way to consume lexed text.
1123
1124 Interpretation of the buffer's octets can be abstracted out by
1125 using the slightly higher-level functions L</lex_peek_unichar> and
1126 L</lex_read_unichar>.
1127
1128 =cut
1129 */
1130
1131 void
1132 Perl_lex_read_to(pTHX_ char *ptr)
1133 {
1134     char *s;
1135     PERL_ARGS_ASSERT_LEX_READ_TO;
1136     s = PL_parser->bufptr;
1137     if (ptr < s || ptr > PL_parser->bufend)
1138         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1139     for (; s != ptr; s++)
1140         if (*s == '\n') {
1141             CopLINE_inc(PL_curcop);
1142             PL_parser->linestart = s+1;
1143         }
1144     PL_parser->bufptr = ptr;
1145 }
1146
1147 /*
1148 =for apidoc Amx|void|lex_discard_to|char *ptr
1149
1150 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1151 up to I<ptr>.  The remaining content of the buffer will be moved, and
1152 all pointers into the buffer updated appropriately.  I<ptr> must not
1153 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1154 it is not permitted to discard text that has yet to be lexed.
1155
1156 Normally it is not necessarily to do this directly, because it suffices to
1157 use the implicit discarding behaviour of L</lex_next_chunk> and things
1158 based on it.  However, if a token stretches across multiple lines,
1159 and the lexing code has kept multiple lines of text in the buffer for
1160 that purpose, then after completion of the token it would be wise to
1161 explicitly discard the now-unneeded earlier lines, to avoid future
1162 multi-line tokens growing the buffer without bound.
1163
1164 =cut
1165 */
1166
1167 void
1168 Perl_lex_discard_to(pTHX_ char *ptr)
1169 {
1170     char *buf;
1171     STRLEN discard_len;
1172     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1173     buf = SvPVX(PL_parser->linestr);
1174     if (ptr < buf)
1175         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1176     if (ptr == buf)
1177         return;
1178     if (ptr > PL_parser->bufptr)
1179         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1180     discard_len = ptr - buf;
1181     if (PL_parser->oldbufptr < ptr)
1182         PL_parser->oldbufptr = ptr;
1183     if (PL_parser->oldoldbufptr < ptr)
1184         PL_parser->oldoldbufptr = ptr;
1185     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1186         PL_parser->last_uni = NULL;
1187     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1188         PL_parser->last_lop = NULL;
1189     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1190     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1191     PL_parser->bufend -= discard_len;
1192     PL_parser->bufptr -= discard_len;
1193     PL_parser->oldbufptr -= discard_len;
1194     PL_parser->oldoldbufptr -= discard_len;
1195     if (PL_parser->last_uni)
1196         PL_parser->last_uni -= discard_len;
1197     if (PL_parser->last_lop)
1198         PL_parser->last_lop -= discard_len;
1199 }
1200
1201 /*
1202 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1203
1204 Reads in the next chunk of text to be lexed, appending it to
1205 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1206 looked to the end of the current chunk and wants to know more.  It is
1207 usual, but not necessary, for lexing to have consumed the entirety of
1208 the current chunk at this time.
1209
1210 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1211 chunk (i.e., the current chunk has been entirely consumed), normally the
1212 current chunk will be discarded at the same time that the new chunk is
1213 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1214 will not be discarded.  If the current chunk has not been entirely
1215 consumed, then it will not be discarded regardless of the flag.
1216
1217 Returns true if some new text was added to the buffer, or false if the
1218 buffer has reached the end of the input text.
1219
1220 =cut
1221 */
1222
1223 #define LEX_FAKE_EOF 0x80000000
1224
1225 bool
1226 Perl_lex_next_chunk(pTHX_ U32 flags)
1227 {
1228     SV *linestr;
1229     char *buf;
1230     STRLEN old_bufend_pos, new_bufend_pos;
1231     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1232     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1233     bool got_some_for_debugger = 0;
1234     bool got_some;
1235     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1236         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1237     linestr = PL_parser->linestr;
1238     buf = SvPVX(linestr);
1239     if (!(flags & LEX_KEEP_PREVIOUS) &&
1240             PL_parser->bufptr == PL_parser->bufend) {
1241         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1242         linestart_pos = 0;
1243         if (PL_parser->last_uni != PL_parser->bufend)
1244             PL_parser->last_uni = NULL;
1245         if (PL_parser->last_lop != PL_parser->bufend)
1246             PL_parser->last_lop = NULL;
1247         last_uni_pos = last_lop_pos = 0;
1248         *buf = 0;
1249         SvCUR(linestr) = 0;
1250     } else {
1251         old_bufend_pos = PL_parser->bufend - buf;
1252         bufptr_pos = PL_parser->bufptr - buf;
1253         oldbufptr_pos = PL_parser->oldbufptr - buf;
1254         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1255         linestart_pos = PL_parser->linestart - buf;
1256         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1257         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1258     }
1259     if (flags & LEX_FAKE_EOF) {
1260         goto eof;
1261     } else if (!PL_parser->rsfp) {
1262         got_some = 0;
1263     } else if (filter_gets(linestr, old_bufend_pos)) {
1264         got_some = 1;
1265         got_some_for_debugger = 1;
1266     } else {
1267         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1268             sv_setpvs(linestr, "");
1269         eof:
1270         /* End of real input.  Close filehandle (unless it was STDIN),
1271          * then add implicit termination.
1272          */
1273         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1274             PerlIO_clearerr(PL_parser->rsfp);
1275         else if (PL_parser->rsfp)
1276             (void)PerlIO_close(PL_parser->rsfp);
1277         PL_parser->rsfp = NULL;
1278         PL_parser->in_pod = 0;
1279 #ifdef PERL_MAD
1280         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1281             PL_faketokens = 1;
1282 #endif
1283         if (!PL_in_eval && PL_minus_p) {
1284             sv_catpvs(linestr,
1285                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1286             PL_minus_n = PL_minus_p = 0;
1287         } else if (!PL_in_eval && PL_minus_n) {
1288             sv_catpvs(linestr, /*{*/";}");
1289             PL_minus_n = 0;
1290         } else
1291             sv_catpvs(linestr, ";");
1292         got_some = 1;
1293     }
1294     buf = SvPVX(linestr);
1295     new_bufend_pos = SvCUR(linestr);
1296     PL_parser->bufend = buf + new_bufend_pos;
1297     PL_parser->bufptr = buf + bufptr_pos;
1298     PL_parser->oldbufptr = buf + oldbufptr_pos;
1299     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1300     PL_parser->linestart = buf + linestart_pos;
1301     if (PL_parser->last_uni)
1302         PL_parser->last_uni = buf + last_uni_pos;
1303     if (PL_parser->last_lop)
1304         PL_parser->last_lop = buf + last_lop_pos;
1305     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1306             PL_curstash != PL_debstash) {
1307         /* debugger active and we're not compiling the debugger code,
1308          * so store the line into the debugger's array of lines
1309          */
1310         update_debugger_info(NULL, buf+old_bufend_pos,
1311             new_bufend_pos-old_bufend_pos);
1312     }
1313     return got_some;
1314 }
1315
1316 /*
1317 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1318
1319 Looks ahead one (Unicode) character in the text currently being lexed.
1320 Returns the codepoint (unsigned integer value) of the next character,
1321 or -1 if lexing has reached the end of the input text.  To consume the
1322 peeked character, use L</lex_read_unichar>.
1323
1324 If the next character is in (or extends into) the next chunk of input
1325 text, the next chunk will be read in.  Normally the current chunk will be
1326 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1327 then the current chunk will not be discarded.
1328
1329 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1330 is encountered, an exception is generated.
1331
1332 =cut
1333 */
1334
1335 I32
1336 Perl_lex_peek_unichar(pTHX_ U32 flags)
1337 {
1338     dVAR;
1339     char *s, *bufend;
1340     if (flags & ~(LEX_KEEP_PREVIOUS))
1341         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1342     s = PL_parser->bufptr;
1343     bufend = PL_parser->bufend;
1344     if (UTF) {
1345         U8 head;
1346         I32 unichar;
1347         STRLEN len, retlen;
1348         if (s == bufend) {
1349             if (!lex_next_chunk(flags))
1350                 return -1;
1351             s = PL_parser->bufptr;
1352             bufend = PL_parser->bufend;
1353         }
1354         head = (U8)*s;
1355         if (!(head & 0x80))
1356             return head;
1357         if (head & 0x40) {
1358             len = PL_utf8skip[head];
1359             while ((STRLEN)(bufend-s) < len) {
1360                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1361                     break;
1362                 s = PL_parser->bufptr;
1363                 bufend = PL_parser->bufend;
1364             }
1365         }
1366         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1367         if (retlen == (STRLEN)-1) {
1368             /* malformed UTF-8 */
1369             ENTER;
1370             SAVESPTR(PL_warnhook);
1371             PL_warnhook = PERL_WARNHOOK_FATAL;
1372             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1373             LEAVE;
1374         }
1375         return unichar;
1376     } else {
1377         if (s == bufend) {
1378             if (!lex_next_chunk(flags))
1379                 return -1;
1380             s = PL_parser->bufptr;
1381         }
1382         return (U8)*s;
1383     }
1384 }
1385
1386 /*
1387 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1388
1389 Reads the next (Unicode) character in the text currently being lexed.
1390 Returns the codepoint (unsigned integer value) of the character read,
1391 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1392 if lexing has reached the end of the input text.  To non-destructively
1393 examine the next character, use L</lex_peek_unichar> instead.
1394
1395 If the next character is in (or extends into) the next chunk of input
1396 text, the next chunk will be read in.  Normally the current chunk will be
1397 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1398 then the current chunk will not be discarded.
1399
1400 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1401 is encountered, an exception is generated.
1402
1403 =cut
1404 */
1405
1406 I32
1407 Perl_lex_read_unichar(pTHX_ U32 flags)
1408 {
1409     I32 c;
1410     if (flags & ~(LEX_KEEP_PREVIOUS))
1411         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1412     c = lex_peek_unichar(flags);
1413     if (c != -1) {
1414         if (c == '\n')
1415             CopLINE_inc(PL_curcop);
1416         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1417     }
1418     return c;
1419 }
1420
1421 /*
1422 =for apidoc Amx|void|lex_read_space|U32 flags
1423
1424 Reads optional spaces, in Perl style, in the text currently being
1425 lexed.  The spaces may include ordinary whitespace characters and
1426 Perl-style comments.  C<#line> directives are processed if encountered.
1427 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1428 at a non-space character (or the end of the input text).
1429
1430 If spaces extend into the next chunk of input text, the next chunk will
1431 be read in.  Normally the current chunk will be discarded at the same
1432 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1433 chunk will not be discarded.
1434
1435 =cut
1436 */
1437
1438 #define LEX_NO_NEXT_CHUNK 0x80000000
1439
1440 void
1441 Perl_lex_read_space(pTHX_ U32 flags)
1442 {
1443     char *s, *bufend;
1444     bool need_incline = 0;
1445     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1446         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1447 #ifdef PERL_MAD
1448     if (PL_skipwhite) {
1449         sv_free(PL_skipwhite);
1450         PL_skipwhite = NULL;
1451     }
1452     if (PL_madskills)
1453         PL_skipwhite = newSVpvs("");
1454 #endif /* PERL_MAD */
1455     s = PL_parser->bufptr;
1456     bufend = PL_parser->bufend;
1457     while (1) {
1458         char c = *s;
1459         if (c == '#') {
1460             do {
1461                 c = *++s;
1462             } while (!(c == '\n' || (c == 0 && s == bufend)));
1463         } else if (c == '\n') {
1464             s++;
1465             PL_parser->linestart = s;
1466             if (s == bufend)
1467                 need_incline = 1;
1468             else
1469                 incline(s);
1470         } else if (isSPACE(c)) {
1471             s++;
1472         } else if (c == 0 && s == bufend) {
1473             bool got_more;
1474 #ifdef PERL_MAD
1475             if (PL_madskills)
1476                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1477 #endif /* PERL_MAD */
1478             if (flags & LEX_NO_NEXT_CHUNK)
1479                 break;
1480             PL_parser->bufptr = s;
1481             CopLINE_inc(PL_curcop);
1482             got_more = lex_next_chunk(flags);
1483             CopLINE_dec(PL_curcop);
1484             s = PL_parser->bufptr;
1485             bufend = PL_parser->bufend;
1486             if (!got_more)
1487                 break;
1488             if (need_incline && PL_parser->rsfp) {
1489                 incline(s);
1490                 need_incline = 0;
1491             }
1492         } else {
1493             break;
1494         }
1495     }
1496 #ifdef PERL_MAD
1497     if (PL_madskills)
1498         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1499 #endif /* PERL_MAD */
1500     PL_parser->bufptr = s;
1501 }
1502
1503 /*
1504  * S_incline
1505  * This subroutine has nothing to do with tilting, whether at windmills
1506  * or pinball tables.  Its name is short for "increment line".  It
1507  * increments the current line number in CopLINE(PL_curcop) and checks
1508  * to see whether the line starts with a comment of the form
1509  *    # line 500 "foo.pm"
1510  * If so, it sets the current line number and file to the values in the comment.
1511  */
1512
1513 STATIC void
1514 S_incline(pTHX_ const char *s)
1515 {
1516     dVAR;
1517     const char *t;
1518     const char *n;
1519     const char *e;
1520
1521     PERL_ARGS_ASSERT_INCLINE;
1522
1523     CopLINE_inc(PL_curcop);
1524     if (*s++ != '#')
1525         return;
1526     while (SPACE_OR_TAB(*s))
1527         s++;
1528     if (strnEQ(s, "line", 4))
1529         s += 4;
1530     else
1531         return;
1532     if (SPACE_OR_TAB(*s))
1533         s++;
1534     else
1535         return;
1536     while (SPACE_OR_TAB(*s))
1537         s++;
1538     if (!isDIGIT(*s))
1539         return;
1540
1541     n = s;
1542     while (isDIGIT(*s))
1543         s++;
1544     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1545         return;
1546     while (SPACE_OR_TAB(*s))
1547         s++;
1548     if (*s == '"' && (t = strchr(s+1, '"'))) {
1549         s++;
1550         e = t + 1;
1551     }
1552     else {
1553         t = s;
1554         while (!isSPACE(*t))
1555             t++;
1556         e = t;
1557     }
1558     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1559         e++;
1560     if (*e != '\n' && *e != '\0')
1561         return;         /* false alarm */
1562
1563     if (t - s > 0) {
1564         const STRLEN len = t - s;
1565 #ifndef USE_ITHREADS
1566         SV *const temp_sv = CopFILESV(PL_curcop);
1567         const char *cf;
1568         STRLEN tmplen;
1569
1570         if (temp_sv) {
1571             cf = SvPVX(temp_sv);
1572             tmplen = SvCUR(temp_sv);
1573         } else {
1574             cf = NULL;
1575             tmplen = 0;
1576         }
1577
1578         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1579             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1580              * to *{"::_<newfilename"} */
1581             /* However, the long form of evals is only turned on by the
1582                debugger - usually they're "(eval %lu)" */
1583             char smallbuf[128];
1584             char *tmpbuf;
1585             GV **gvp;
1586             STRLEN tmplen2 = len;
1587             if (tmplen + 2 <= sizeof smallbuf)
1588                 tmpbuf = smallbuf;
1589             else
1590                 Newx(tmpbuf, tmplen + 2, char);
1591             tmpbuf[0] = '_';
1592             tmpbuf[1] = '<';
1593             memcpy(tmpbuf + 2, cf, tmplen);
1594             tmplen += 2;
1595             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1596             if (gvp) {
1597                 char *tmpbuf2;
1598                 GV *gv2;
1599
1600                 if (tmplen2 + 2 <= sizeof smallbuf)
1601                     tmpbuf2 = smallbuf;
1602                 else
1603                     Newx(tmpbuf2, tmplen2 + 2, char);
1604
1605                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1606                     /* Either they malloc'd it, or we malloc'd it,
1607                        so no prefix is present in ours.  */
1608                     tmpbuf2[0] = '_';
1609                     tmpbuf2[1] = '<';
1610                 }
1611
1612                 memcpy(tmpbuf2 + 2, s, tmplen2);
1613                 tmplen2 += 2;
1614
1615                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1616                 if (!isGV(gv2)) {
1617                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1618                     /* adjust ${"::_<newfilename"} to store the new file name */
1619                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1620                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1621                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1622                 }
1623
1624                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1625             }
1626             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1627         }
1628 #endif
1629         CopFILE_free(PL_curcop);
1630         CopFILE_setn(PL_curcop, s, len);
1631     }
1632     CopLINE_set(PL_curcop, atoi(n)-1);
1633 }
1634
1635 #ifdef PERL_MAD
1636 /* skip space before PL_thistoken */
1637
1638 STATIC char *
1639 S_skipspace0(pTHX_ register char *s)
1640 {
1641     PERL_ARGS_ASSERT_SKIPSPACE0;
1642
1643     s = skipspace(s);
1644     if (!PL_madskills)
1645         return s;
1646     if (PL_skipwhite) {
1647         if (!PL_thiswhite)
1648             PL_thiswhite = newSVpvs("");
1649         sv_catsv(PL_thiswhite, PL_skipwhite);
1650         sv_free(PL_skipwhite);
1651         PL_skipwhite = 0;
1652     }
1653     PL_realtokenstart = s - SvPVX(PL_linestr);
1654     return s;
1655 }
1656
1657 /* skip space after PL_thistoken */
1658
1659 STATIC char *
1660 S_skipspace1(pTHX_ register char *s)
1661 {
1662     const char *start = s;
1663     I32 startoff = start - SvPVX(PL_linestr);
1664
1665     PERL_ARGS_ASSERT_SKIPSPACE1;
1666
1667     s = skipspace(s);
1668     if (!PL_madskills)
1669         return s;
1670     start = SvPVX(PL_linestr) + startoff;
1671     if (!PL_thistoken && PL_realtokenstart >= 0) {
1672         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1673         PL_thistoken = newSVpvn(tstart, start - tstart);
1674     }
1675     PL_realtokenstart = -1;
1676     if (PL_skipwhite) {
1677         if (!PL_nextwhite)
1678             PL_nextwhite = newSVpvs("");
1679         sv_catsv(PL_nextwhite, PL_skipwhite);
1680         sv_free(PL_skipwhite);
1681         PL_skipwhite = 0;
1682     }
1683     return s;
1684 }
1685
1686 STATIC char *
1687 S_skipspace2(pTHX_ register char *s, SV **svp)
1688 {
1689     char *start;
1690     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1691     const I32 startoff = s - SvPVX(PL_linestr);
1692
1693     PERL_ARGS_ASSERT_SKIPSPACE2;
1694
1695     s = skipspace(s);
1696     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1697     if (!PL_madskills || !svp)
1698         return s;
1699     start = SvPVX(PL_linestr) + startoff;
1700     if (!PL_thistoken && PL_realtokenstart >= 0) {
1701         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1702         PL_thistoken = newSVpvn(tstart, start - tstart);
1703         PL_realtokenstart = -1;
1704     }
1705     if (PL_skipwhite) {
1706         if (!*svp)
1707             *svp = newSVpvs("");
1708         sv_setsv(*svp, PL_skipwhite);
1709         sv_free(PL_skipwhite);
1710         PL_skipwhite = 0;
1711     }
1712
1713     return s;
1714 }
1715 #endif
1716
1717 STATIC void
1718 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1719 {
1720     AV *av = CopFILEAVx(PL_curcop);
1721     if (av) {
1722         SV * const sv = newSV_type(SVt_PVMG);
1723         if (orig_sv)
1724             sv_setsv(sv, orig_sv);
1725         else
1726             sv_setpvn(sv, buf, len);
1727         (void)SvIOK_on(sv);
1728         SvIV_set(sv, 0);
1729         av_store(av, (I32)CopLINE(PL_curcop), sv);
1730     }
1731 }
1732
1733 /*
1734  * S_skipspace
1735  * Called to gobble the appropriate amount and type of whitespace.
1736  * Skips comments as well.
1737  */
1738
1739 STATIC char *
1740 S_skipspace(pTHX_ register char *s)
1741 {
1742 #ifdef PERL_MAD
1743     char *start = s;
1744 #endif /* PERL_MAD */
1745     PERL_ARGS_ASSERT_SKIPSPACE;
1746 #ifdef PERL_MAD
1747     if (PL_skipwhite) {
1748         sv_free(PL_skipwhite);
1749         PL_skipwhite = NULL;
1750     }
1751 #endif /* PERL_MAD */
1752     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1753         while (s < PL_bufend && SPACE_OR_TAB(*s))
1754             s++;
1755     } else {
1756         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1757         PL_bufptr = s;
1758         lex_read_space(LEX_KEEP_PREVIOUS |
1759                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1760                     LEX_NO_NEXT_CHUNK : 0));
1761         s = PL_bufptr;
1762         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1763         if (PL_linestart > PL_bufptr)
1764             PL_bufptr = PL_linestart;
1765         return s;
1766     }
1767 #ifdef PERL_MAD
1768     if (PL_madskills)
1769         PL_skipwhite = newSVpvn(start, s-start);
1770 #endif /* PERL_MAD */
1771     return s;
1772 }
1773
1774 /*
1775  * S_check_uni
1776  * Check the unary operators to ensure there's no ambiguity in how they're
1777  * used.  An ambiguous piece of code would be:
1778  *     rand + 5
1779  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1780  * the +5 is its argument.
1781  */
1782
1783 STATIC void
1784 S_check_uni(pTHX)
1785 {
1786     dVAR;
1787     const char *s;
1788     const char *t;
1789
1790     if (PL_oldoldbufptr != PL_last_uni)
1791         return;
1792     while (isSPACE(*PL_last_uni))
1793         PL_last_uni++;
1794     s = PL_last_uni;
1795     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1796         s++;
1797     if ((t = strchr(s, '(')) && t < PL_bufptr)
1798         return;
1799
1800     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1801                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1802                      (int)(s - PL_last_uni), PL_last_uni);
1803 }
1804
1805 /*
1806  * LOP : macro to build a list operator.  Its behaviour has been replaced
1807  * with a subroutine, S_lop() for which LOP is just another name.
1808  */
1809
1810 #define LOP(f,x) return lop(f,x,s)
1811
1812 /*
1813  * S_lop
1814  * Build a list operator (or something that might be one).  The rules:
1815  *  - if we have a next token, then it's a list operator [why?]
1816  *  - if the next thing is an opening paren, then it's a function
1817  *  - else it's a list operator
1818  */
1819
1820 STATIC I32
1821 S_lop(pTHX_ I32 f, int x, char *s)
1822 {
1823     dVAR;
1824
1825     PERL_ARGS_ASSERT_LOP;
1826
1827     pl_yylval.ival = f;
1828     CLINE;
1829     PL_expect = x;
1830     PL_bufptr = s;
1831     PL_last_lop = PL_oldbufptr;
1832     PL_last_lop_op = (OPCODE)f;
1833 #ifdef PERL_MAD
1834     if (PL_lasttoke)
1835         goto lstop;
1836 #else
1837     if (PL_nexttoke)
1838         goto lstop;
1839 #endif
1840     if (*s == '(')
1841         return REPORT(FUNC);
1842     s = PEEKSPACE(s);
1843     if (*s == '(')
1844         return REPORT(FUNC);
1845     else {
1846         lstop:
1847         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1848             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1849         return REPORT(LSTOP);
1850     }
1851 }
1852
1853 #ifdef PERL_MAD
1854  /*
1855  * S_start_force
1856  * Sets up for an eventual force_next().  start_force(0) basically does
1857  * an unshift, while start_force(-1) does a push.  yylex removes items
1858  * on the "pop" end.
1859  */
1860
1861 STATIC void
1862 S_start_force(pTHX_ int where)
1863 {
1864     int i;
1865
1866     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1867         where = PL_lasttoke;
1868     assert(PL_curforce < 0 || PL_curforce == where);
1869     if (PL_curforce != where) {
1870         for (i = PL_lasttoke; i > where; --i) {
1871             PL_nexttoke[i] = PL_nexttoke[i-1];
1872         }
1873         PL_lasttoke++;
1874     }
1875     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1876         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1877     PL_curforce = where;
1878     if (PL_nextwhite) {
1879         if (PL_madskills)
1880             curmad('^', newSVpvs(""));
1881         CURMAD('_', PL_nextwhite);
1882     }
1883 }
1884
1885 STATIC void
1886 S_curmad(pTHX_ char slot, SV *sv)
1887 {
1888     MADPROP **where;
1889
1890     if (!sv)
1891         return;
1892     if (PL_curforce < 0)
1893         where = &PL_thismad;
1894     else
1895         where = &PL_nexttoke[PL_curforce].next_mad;
1896
1897     if (PL_faketokens)
1898         sv_setpvs(sv, "");
1899     else {
1900         if (!IN_BYTES) {
1901             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1902                 SvUTF8_on(sv);
1903             else if (PL_encoding) {
1904                 sv_recode_to_utf8(sv, PL_encoding);
1905             }
1906         }
1907     }
1908
1909     /* keep a slot open for the head of the list? */
1910     if (slot != '_' && *where && (*where)->mad_key == '^') {
1911         (*where)->mad_key = slot;
1912         sv_free(MUTABLE_SV(((*where)->mad_val)));
1913         (*where)->mad_val = (void*)sv;
1914     }
1915     else
1916         addmad(newMADsv(slot, sv), where, 0);
1917 }
1918 #else
1919 #  define start_force(where)    NOOP
1920 #  define curmad(slot, sv)      NOOP
1921 #endif
1922
1923 /*
1924  * S_force_next
1925  * When the lexer realizes it knows the next token (for instance,
1926  * it is reordering tokens for the parser) then it can call S_force_next
1927  * to know what token to return the next time the lexer is called.  Caller
1928  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1929  * and possibly PL_expect to ensure the lexer handles the token correctly.
1930  */
1931
1932 STATIC void
1933 S_force_next(pTHX_ I32 type)
1934 {
1935     dVAR;
1936 #ifdef DEBUGGING
1937     if (DEBUG_T_TEST) {
1938         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1939         tokereport(type, &NEXTVAL_NEXTTOKE);
1940     }
1941 #endif
1942 #ifdef PERL_MAD
1943     if (PL_curforce < 0)
1944         start_force(PL_lasttoke);
1945     PL_nexttoke[PL_curforce].next_type = type;
1946     if (PL_lex_state != LEX_KNOWNEXT)
1947         PL_lex_defer = PL_lex_state;
1948     PL_lex_state = LEX_KNOWNEXT;
1949     PL_lex_expect = PL_expect;
1950     PL_curforce = -1;
1951 #else
1952     PL_nexttype[PL_nexttoke] = type;
1953     PL_nexttoke++;
1954     if (PL_lex_state != LEX_KNOWNEXT) {
1955         PL_lex_defer = PL_lex_state;
1956         PL_lex_expect = PL_expect;
1957         PL_lex_state = LEX_KNOWNEXT;
1958     }
1959 #endif
1960 }
1961
1962 void
1963 Perl_yyunlex(pTHX)
1964 {
1965     int yyc = PL_parser->yychar;
1966     if (yyc != YYEMPTY) {
1967         if (yyc) {
1968             start_force(-1);
1969             NEXTVAL_NEXTTOKE = PL_parser->yylval;
1970             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1971                 PL_lex_allbrackets--;
1972                 PL_lex_brackets--;
1973                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1974             } else if (yyc == '('/*)*/) {
1975                 PL_lex_allbrackets--;
1976                 yyc |= (2<<24);
1977             }
1978             force_next(yyc);
1979         }
1980         PL_parser->yychar = YYEMPTY;
1981     }
1982 }
1983
1984 STATIC SV *
1985 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1986 {
1987     dVAR;
1988     SV * const sv = newSVpvn_utf8(start, len,
1989                                   !IN_BYTES
1990                                   && UTF
1991                                   && !is_ascii_string((const U8*)start, len)
1992                                   && is_utf8_string((const U8*)start, len));
1993     return sv;
1994 }
1995
1996 /*
1997  * S_force_word
1998  * When the lexer knows the next thing is a word (for instance, it has
1999  * just seen -> and it knows that the next char is a word char, then
2000  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2001  * lookahead.
2002  *
2003  * Arguments:
2004  *   char *start : buffer position (must be within PL_linestr)
2005  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2006  *   int check_keyword : if true, Perl checks to make sure the word isn't
2007  *       a keyword (do this if the word is a label, e.g. goto FOO)
2008  *   int allow_pack : if true, : characters will also be allowed (require,
2009  *       use, etc. do this)
2010  *   int allow_initial_tick : used by the "sub" lexer only.
2011  */
2012
2013 STATIC char *
2014 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2015 {
2016     dVAR;
2017     register char *s;
2018     STRLEN len;
2019
2020     PERL_ARGS_ASSERT_FORCE_WORD;
2021
2022     start = SKIPSPACE1(start);
2023     s = start;
2024     if (isIDFIRST_lazy_if(s,UTF) ||
2025         (allow_pack && *s == ':') ||
2026         (allow_initial_tick && *s == '\'') )
2027     {
2028         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2029         if (check_keyword && keyword(PL_tokenbuf, len, 0))
2030             return start;
2031         start_force(PL_curforce);
2032         if (PL_madskills)
2033             curmad('X', newSVpvn(start,s-start));
2034         if (token == METHOD) {
2035             s = SKIPSPACE1(s);
2036             if (*s == '(')
2037                 PL_expect = XTERM;
2038             else {
2039                 PL_expect = XOPERATOR;
2040             }
2041         }
2042         if (PL_madskills)
2043             curmad('g', newSVpvs( "forced" ));
2044         NEXTVAL_NEXTTOKE.opval
2045             = (OP*)newSVOP(OP_CONST,0,
2046                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2047         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2048         force_next(token);
2049     }
2050     return s;
2051 }
2052
2053 /*
2054  * S_force_ident
2055  * Called when the lexer wants $foo *foo &foo etc, but the program
2056  * text only contains the "foo" portion.  The first argument is a pointer
2057  * to the "foo", and the second argument is the type symbol to prefix.
2058  * Forces the next token to be a "WORD".
2059  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2060  */
2061
2062 STATIC void
2063 S_force_ident(pTHX_ register const char *s, int kind)
2064 {
2065     dVAR;
2066
2067     PERL_ARGS_ASSERT_FORCE_IDENT;
2068
2069     if (*s) {
2070         const STRLEN len = strlen(s);
2071         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2072         start_force(PL_curforce);
2073         NEXTVAL_NEXTTOKE.opval = o;
2074         force_next(WORD);
2075         if (kind) {
2076             o->op_private = OPpCONST_ENTERED;
2077             /* XXX see note in pp_entereval() for why we forgo typo
2078                warnings if the symbol must be introduced in an eval.
2079                GSAR 96-10-12 */
2080             gv_fetchpvn_flags(s, len,
2081                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2082                               : GV_ADD,
2083                               kind == '$' ? SVt_PV :
2084                               kind == '@' ? SVt_PVAV :
2085                               kind == '%' ? SVt_PVHV :
2086                               SVt_PVGV
2087                               );
2088         }
2089     }
2090 }
2091
2092 NV
2093 Perl_str_to_version(pTHX_ SV *sv)
2094 {
2095     NV retval = 0.0;
2096     NV nshift = 1.0;
2097     STRLEN len;
2098     const char *start = SvPV_const(sv,len);
2099     const char * const end = start + len;
2100     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2101
2102     PERL_ARGS_ASSERT_STR_TO_VERSION;
2103
2104     while (start < end) {
2105         STRLEN skip;
2106         UV n;
2107         if (utf)
2108             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2109         else {
2110             n = *(U8*)start;
2111             skip = 1;
2112         }
2113         retval += ((NV)n)/nshift;
2114         start += skip;
2115         nshift *= 1000;
2116     }
2117     return retval;
2118 }
2119
2120 /*
2121  * S_force_version
2122  * Forces the next token to be a version number.
2123  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2124  * and if "guessing" is TRUE, then no new token is created (and the caller
2125  * must use an alternative parsing method).
2126  */
2127
2128 STATIC char *
2129 S_force_version(pTHX_ char *s, int guessing)
2130 {
2131     dVAR;
2132     OP *version = NULL;
2133     char *d;
2134 #ifdef PERL_MAD
2135     I32 startoff = s - SvPVX(PL_linestr);
2136 #endif
2137
2138     PERL_ARGS_ASSERT_FORCE_VERSION;
2139
2140     s = SKIPSPACE1(s);
2141
2142     d = s;
2143     if (*d == 'v')
2144         d++;
2145     if (isDIGIT(*d)) {
2146         while (isDIGIT(*d) || *d == '_' || *d == '.')
2147             d++;
2148 #ifdef PERL_MAD
2149         if (PL_madskills) {
2150             start_force(PL_curforce);
2151             curmad('X', newSVpvn(s,d-s));
2152         }
2153 #endif
2154         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2155             SV *ver;
2156 #ifdef USE_LOCALE_NUMERIC
2157             char *loc = setlocale(LC_NUMERIC, "C");
2158 #endif
2159             s = scan_num(s, &pl_yylval);
2160 #ifdef USE_LOCALE_NUMERIC
2161             setlocale(LC_NUMERIC, loc);
2162 #endif
2163             version = pl_yylval.opval;
2164             ver = cSVOPx(version)->op_sv;
2165             if (SvPOK(ver) && !SvNIOK(ver)) {
2166                 SvUPGRADE(ver, SVt_PVNV);
2167                 SvNV_set(ver, str_to_version(ver));
2168                 SvNOK_on(ver);          /* hint that it is a version */
2169             }
2170         }
2171         else if (guessing) {
2172 #ifdef PERL_MAD
2173             if (PL_madskills) {
2174                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2175                 PL_nextwhite = 0;
2176                 s = SvPVX(PL_linestr) + startoff;
2177             }
2178 #endif
2179             return s;
2180         }
2181     }
2182
2183 #ifdef PERL_MAD
2184     if (PL_madskills && !version) {
2185         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2186         PL_nextwhite = 0;
2187         s = SvPVX(PL_linestr) + startoff;
2188     }
2189 #endif
2190     /* NOTE: The parser sees the package name and the VERSION swapped */
2191     start_force(PL_curforce);
2192     NEXTVAL_NEXTTOKE.opval = version;
2193     force_next(WORD);
2194
2195     return s;
2196 }
2197
2198 /*
2199  * S_force_strict_version
2200  * Forces the next token to be a version number using strict syntax rules.
2201  */
2202
2203 STATIC char *
2204 S_force_strict_version(pTHX_ char *s)
2205 {
2206     dVAR;
2207     OP *version = NULL;
2208 #ifdef PERL_MAD
2209     I32 startoff = s - SvPVX(PL_linestr);
2210 #endif
2211     const char *errstr = NULL;
2212
2213     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2214
2215     while (isSPACE(*s)) /* leading whitespace */
2216         s++;
2217
2218     if (is_STRICT_VERSION(s,&errstr)) {
2219         SV *ver = newSV(0);
2220         s = (char *)scan_version(s, ver, 0);
2221         version = newSVOP(OP_CONST, 0, ver);
2222     }
2223     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2224             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2225     {
2226         PL_bufptr = s;
2227         if (errstr)
2228             yyerror(errstr); /* version required */
2229         return s;
2230     }
2231
2232 #ifdef PERL_MAD
2233     if (PL_madskills && !version) {
2234         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2235         PL_nextwhite = 0;
2236         s = SvPVX(PL_linestr) + startoff;
2237     }
2238 #endif
2239     /* NOTE: The parser sees the package name and the VERSION swapped */
2240     start_force(PL_curforce);
2241     NEXTVAL_NEXTTOKE.opval = version;
2242     force_next(WORD);
2243
2244     return s;
2245 }
2246
2247 /*
2248  * S_tokeq
2249  * Tokenize a quoted string passed in as an SV.  It finds the next
2250  * chunk, up to end of string or a backslash.  It may make a new
2251  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2252  * turns \\ into \.
2253  */
2254
2255 STATIC SV *
2256 S_tokeq(pTHX_ SV *sv)
2257 {
2258     dVAR;
2259     register char *s;
2260     register char *send;
2261     register char *d;
2262     STRLEN len = 0;
2263     SV *pv = sv;
2264
2265     PERL_ARGS_ASSERT_TOKEQ;
2266
2267     if (!SvLEN(sv))
2268         goto finish;
2269
2270     s = SvPV_force(sv, len);
2271     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2272         goto finish;
2273     send = s + len;
2274     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2275     while (s < send && !(*s == '\\' && s[1] == '\\'))
2276         s++;
2277     if (s == send)
2278         goto finish;
2279     d = s;
2280     if ( PL_hints & HINT_NEW_STRING ) {
2281         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2282     }
2283     while (s < send) {
2284         if (*s == '\\') {
2285             if (s + 1 < send && (s[1] == '\\'))
2286                 s++;            /* all that, just for this */
2287         }
2288         *d++ = *s++;
2289     }
2290     *d = '\0';
2291     SvCUR_set(sv, d - SvPVX_const(sv));
2292   finish:
2293     if ( PL_hints & HINT_NEW_STRING )
2294        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2295     return sv;
2296 }
2297
2298 /*
2299  * Now come three functions related to double-quote context,
2300  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2301  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2302  * interact with PL_lex_state, and create fake ( ... ) argument lists
2303  * to handle functions and concatenation.
2304  * They assume that whoever calls them will be setting up a fake
2305  * join call, because each subthing puts a ',' after it.  This lets
2306  *   "lower \luPpEr"
2307  * become
2308  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2309  *
2310  * (I'm not sure whether the spurious commas at the end of lcfirst's
2311  * arguments and join's arguments are created or not).
2312  */
2313
2314 /*
2315  * S_sublex_start
2316  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2317  *
2318  * Pattern matching will set PL_lex_op to the pattern-matching op to
2319  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2320  *
2321  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2322  *
2323  * Everything else becomes a FUNC.
2324  *
2325  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2326  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2327  * call to S_sublex_push().
2328  */
2329
2330 STATIC I32
2331 S_sublex_start(pTHX)
2332 {
2333     dVAR;
2334     register const I32 op_type = pl_yylval.ival;
2335
2336     if (op_type == OP_NULL) {
2337         pl_yylval.opval = PL_lex_op;
2338         PL_lex_op = NULL;
2339         return THING;
2340     }
2341     if (op_type == OP_CONST || op_type == OP_READLINE) {
2342         SV *sv = tokeq(PL_lex_stuff);
2343
2344         if (SvTYPE(sv) == SVt_PVIV) {
2345             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2346             STRLEN len;
2347             const char * const p = SvPV_const(sv, len);
2348             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2349             SvREFCNT_dec(sv);
2350             sv = nsv;
2351         }
2352         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2353         PL_lex_stuff = NULL;
2354         /* Allow <FH> // "foo" */
2355         if (op_type == OP_READLINE)
2356             PL_expect = XTERMORDORDOR;
2357         return THING;
2358     }
2359     else if (op_type == OP_BACKTICK && PL_lex_op) {
2360         /* readpipe() vas overriden */
2361         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2362         pl_yylval.opval = PL_lex_op;
2363         PL_lex_op = NULL;
2364         PL_lex_stuff = NULL;
2365         return THING;
2366     }
2367
2368     PL_sublex_info.super_state = PL_lex_state;
2369     PL_sublex_info.sub_inwhat = (U16)op_type;
2370     PL_sublex_info.sub_op = PL_lex_op;
2371     PL_lex_state = LEX_INTERPPUSH;
2372
2373     PL_expect = XTERM;
2374     if (PL_lex_op) {
2375         pl_yylval.opval = PL_lex_op;
2376         PL_lex_op = NULL;
2377         return PMFUNC;
2378     }
2379     else
2380         return FUNC;
2381 }
2382
2383 /*
2384  * S_sublex_push
2385  * Create a new scope to save the lexing state.  The scope will be
2386  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2387  * to the uc, lc, etc. found before.
2388  * Sets PL_lex_state to LEX_INTERPCONCAT.
2389  */
2390
2391 STATIC I32
2392 S_sublex_push(pTHX)
2393 {
2394     dVAR;
2395     ENTER;
2396
2397     PL_lex_state = PL_sublex_info.super_state;
2398     SAVEBOOL(PL_lex_dojoin);
2399     SAVEI32(PL_lex_brackets);
2400     SAVEI32(PL_lex_allbrackets);
2401     SAVEI8(PL_lex_fakeeof);
2402     SAVEI32(PL_lex_casemods);
2403     SAVEI32(PL_lex_starts);
2404     SAVEI8(PL_lex_state);
2405     SAVEVPTR(PL_lex_inpat);
2406     SAVEI16(PL_lex_inwhat);
2407     SAVECOPLINE(PL_curcop);
2408     SAVEPPTR(PL_bufptr);
2409     SAVEPPTR(PL_bufend);
2410     SAVEPPTR(PL_oldbufptr);
2411     SAVEPPTR(PL_oldoldbufptr);
2412     SAVEPPTR(PL_last_lop);
2413     SAVEPPTR(PL_last_uni);
2414     SAVEPPTR(PL_linestart);
2415     SAVESPTR(PL_linestr);
2416     SAVEGENERICPV(PL_lex_brackstack);
2417     SAVEGENERICPV(PL_lex_casestack);
2418
2419     PL_linestr = PL_lex_stuff;
2420     PL_lex_stuff = NULL;
2421
2422     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2423         = SvPVX(PL_linestr);
2424     PL_bufend += SvCUR(PL_linestr);
2425     PL_last_lop = PL_last_uni = NULL;
2426     SAVEFREESV(PL_linestr);
2427
2428     PL_lex_dojoin = FALSE;
2429     PL_lex_brackets = 0;
2430     PL_lex_allbrackets = 0;
2431     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2432     Newx(PL_lex_brackstack, 120, char);
2433     Newx(PL_lex_casestack, 12, char);
2434     PL_lex_casemods = 0;
2435     *PL_lex_casestack = '\0';
2436     PL_lex_starts = 0;
2437     PL_lex_state = LEX_INTERPCONCAT;
2438     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2439
2440     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2441     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2442     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2443         PL_lex_inpat = PL_sublex_info.sub_op;
2444     else
2445         PL_lex_inpat = NULL;
2446
2447     return '(';
2448 }
2449
2450 /*
2451  * S_sublex_done
2452  * Restores lexer state after a S_sublex_push.
2453  */
2454
2455 STATIC I32
2456 S_sublex_done(pTHX)
2457 {
2458     dVAR;
2459     if (!PL_lex_starts++) {
2460         SV * const sv = newSVpvs("");
2461         if (SvUTF8(PL_linestr))
2462             SvUTF8_on(sv);
2463         PL_expect = XOPERATOR;
2464         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2465         return THING;
2466     }
2467
2468     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2469         PL_lex_state = LEX_INTERPCASEMOD;
2470         return yylex();
2471     }
2472
2473     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2474     assert(PL_lex_inwhat != OP_TRANSR);
2475     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2476         PL_linestr = PL_lex_repl;
2477         PL_lex_inpat = 0;
2478         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2479         PL_bufend += SvCUR(PL_linestr);
2480         PL_last_lop = PL_last_uni = NULL;
2481         SAVEFREESV(PL_linestr);
2482         PL_lex_dojoin = FALSE;
2483         PL_lex_brackets = 0;
2484         PL_lex_allbrackets = 0;
2485         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2486         PL_lex_casemods = 0;
2487         *PL_lex_casestack = '\0';
2488         PL_lex_starts = 0;
2489         if (SvEVALED(PL_lex_repl)) {
2490             PL_lex_state = LEX_INTERPNORMAL;
2491             PL_lex_starts++;
2492             /*  we don't clear PL_lex_repl here, so that we can check later
2493                 whether this is an evalled subst; that means we rely on the
2494                 logic to ensure sublex_done() is called again only via the
2495                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2496         }
2497         else {
2498             PL_lex_state = LEX_INTERPCONCAT;
2499             PL_lex_repl = NULL;
2500         }
2501         return ',';
2502     }
2503     else {
2504 #ifdef PERL_MAD
2505         if (PL_madskills) {
2506             if (PL_thiswhite) {
2507                 if (!PL_endwhite)
2508                     PL_endwhite = newSVpvs("");
2509                 sv_catsv(PL_endwhite, PL_thiswhite);
2510                 PL_thiswhite = 0;
2511             }
2512             if (PL_thistoken)
2513                 sv_setpvs(PL_thistoken,"");
2514             else
2515                 PL_realtokenstart = -1;
2516         }
2517 #endif
2518         LEAVE;
2519         PL_bufend = SvPVX(PL_linestr);
2520         PL_bufend += SvCUR(PL_linestr);
2521         PL_expect = XOPERATOR;
2522         PL_sublex_info.sub_inwhat = 0;
2523         return ')';
2524     }
2525 }
2526
2527 /*
2528   scan_const
2529
2530   Extracts a pattern, double-quoted string, or transliteration.  This
2531   is terrifying code.
2532
2533   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2534   processing a pattern (PL_lex_inpat is true), a transliteration
2535   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2536
2537   Returns a pointer to the character scanned up to. If this is
2538   advanced from the start pointer supplied (i.e. if anything was
2539   successfully parsed), will leave an OP for the substring scanned
2540   in pl_yylval. Caller must intuit reason for not parsing further
2541   by looking at the next characters herself.
2542
2543   In patterns:
2544     backslashes:
2545       constants: \N{NAME} only
2546       case and quoting: \U \Q \E
2547     stops on @ and $, but not for $ as tail anchor
2548
2549   In transliterations:
2550     characters are VERY literal, except for - not at the start or end
2551     of the string, which indicates a range. If the range is in bytes,
2552     scan_const expands the range to the full set of intermediate
2553     characters. If the range is in utf8, the hyphen is replaced with
2554     a certain range mark which will be handled by pmtrans() in op.c.
2555
2556   In double-quoted strings:
2557     backslashes:
2558       double-quoted style: \r and \n
2559       constants: \x31, etc.
2560       deprecated backrefs: \1 (in substitution replacements)
2561       case and quoting: \U \Q \E
2562     stops on @ and $
2563
2564   scan_const does *not* construct ops to handle interpolated strings.
2565   It stops processing as soon as it finds an embedded $ or @ variable
2566   and leaves it to the caller to work out what's going on.
2567
2568   embedded arrays (whether in pattern or not) could be:
2569       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2570
2571   $ in double-quoted strings must be the symbol of an embedded scalar.
2572
2573   $ in pattern could be $foo or could be tail anchor.  Assumption:
2574   it's a tail anchor if $ is the last thing in the string, or if it's
2575   followed by one of "()| \r\n\t"
2576
2577   \1 (backreferences) are turned into $1
2578
2579   The structure of the code is
2580       while (there's a character to process) {
2581           handle transliteration ranges
2582           skip regexp comments /(?#comment)/ and codes /(?{code})/
2583           skip #-initiated comments in //x patterns
2584           check for embedded arrays
2585           check for embedded scalars
2586           if (backslash) {
2587               deprecate \1 in substitution replacements
2588               handle string-changing backslashes \l \U \Q \E, etc.
2589               switch (what was escaped) {
2590                   handle \- in a transliteration (becomes a literal -)
2591                   if a pattern and not \N{, go treat as regular character
2592                   handle \132 (octal characters)
2593                   handle \x15 and \x{1234} (hex characters)
2594                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2595                   handle \cV (control characters)
2596                   handle printf-style backslashes (\f, \r, \n, etc)
2597               } (end switch)
2598               continue
2599           } (end if backslash)
2600           handle regular character
2601     } (end while character to read)
2602
2603 */
2604
2605 STATIC char *
2606 S_scan_const(pTHX_ char *start)
2607 {
2608     dVAR;
2609     register char *send = PL_bufend;            /* end of the constant */
2610     SV *sv = newSV(send - start);               /* sv for the constant.  See
2611                                                    note below on sizing. */
2612     register char *s = start;                   /* start of the constant */
2613     register char *d = SvPVX(sv);               /* destination for copies */
2614     bool dorange = FALSE;                       /* are we in a translit range? */
2615     bool didrange = FALSE;                      /* did we just finish a range? */
2616     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2617     I32  this_utf8 = UTF;                       /* Is the source string assumed
2618                                                    to be UTF8?  But, this can
2619                                                    show as true when the source
2620                                                    isn't utf8, as for example
2621                                                    when it is entirely composed
2622                                                    of hex constants */
2623
2624     /* Note on sizing:  The scanned constant is placed into sv, which is
2625      * initialized by newSV() assuming one byte of output for every byte of
2626      * input.  This routine expects newSV() to allocate an extra byte for a
2627      * trailing NUL, which this routine will append if it gets to the end of
2628      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2629      * CAPITAL LETTER A}), or more output than input if the constant ends up
2630      * recoded to utf8, but each time a construct is found that might increase
2631      * the needed size, SvGROW() is called.  Its size parameter each time is
2632      * based on the best guess estimate at the time, namely the length used so
2633      * far, plus the length the current construct will occupy, plus room for
2634      * the trailing NUL, plus one byte for every input byte still unscanned */
2635
2636     UV uv;
2637 #ifdef EBCDIC
2638     UV literal_endpoint = 0;
2639     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2640 #endif
2641
2642     PERL_ARGS_ASSERT_SCAN_CONST;
2643
2644     assert(PL_lex_inwhat != OP_TRANSR);
2645     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2646         /* If we are doing a trans and we know we want UTF8 set expectation */
2647         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2648         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2649     }
2650
2651
2652     while (s < send || dorange) {
2653
2654         /* get transliterations out of the way (they're most literal) */
2655         if (PL_lex_inwhat == OP_TRANS) {
2656             /* expand a range A-Z to the full set of characters.  AIE! */
2657             if (dorange) {
2658                 I32 i;                          /* current expanded character */
2659                 I32 min;                        /* first character in range */
2660                 I32 max;                        /* last character in range */
2661
2662 #ifdef EBCDIC
2663                 UV uvmax = 0;
2664 #endif
2665
2666                 if (has_utf8
2667 #ifdef EBCDIC
2668                     && !native_range
2669 #endif
2670                     ) {
2671                     char * const c = (char*)utf8_hop((U8*)d, -1);
2672                     char *e = d++;
2673                     while (e-- > c)
2674                         *(e + 1) = *e;
2675                     *c = (char)UTF_TO_NATIVE(0xff);
2676                     /* mark the range as done, and continue */
2677                     dorange = FALSE;
2678                     didrange = TRUE;
2679                     continue;
2680                 }
2681
2682                 i = d - SvPVX_const(sv);                /* remember current offset */
2683 #ifdef EBCDIC
2684                 SvGROW(sv,
2685                        SvLEN(sv) + (has_utf8 ?
2686                                     (512 - UTF_CONTINUATION_MARK +
2687                                      UNISKIP(0x100))
2688                                     : 256));
2689                 /* How many two-byte within 0..255: 128 in UTF-8,
2690                  * 96 in UTF-8-mod. */
2691 #else
2692                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2693 #endif
2694                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2695 #ifdef EBCDIC
2696                 if (has_utf8) {
2697                     int j;
2698                     for (j = 0; j <= 1; j++) {
2699                         char * const c = (char*)utf8_hop((U8*)d, -1);
2700                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2701                         if (j)
2702                             min = (U8)uv;
2703                         else if (uv < 256)
2704                             max = (U8)uv;
2705                         else {
2706                             max = (U8)0xff; /* only to \xff */
2707                             uvmax = uv; /* \x{100} to uvmax */
2708                         }
2709                         d = c; /* eat endpoint chars */
2710                      }
2711                 }
2712                else {
2713 #endif
2714                    d -= 2;              /* eat the first char and the - */
2715                    min = (U8)*d;        /* first char in range */
2716                    max = (U8)d[1];      /* last char in range  */
2717 #ifdef EBCDIC
2718                }
2719 #endif
2720
2721                 if (min > max) {
2722                     Perl_croak(aTHX_
2723                                "Invalid range \"%c-%c\" in transliteration operator",
2724                                (char)min, (char)max);
2725                 }
2726
2727 #ifdef EBCDIC
2728                 if (literal_endpoint == 2 &&
2729                     ((isLOWER(min) && isLOWER(max)) ||
2730                      (isUPPER(min) && isUPPER(max)))) {
2731                     if (isLOWER(min)) {
2732                         for (i = min; i <= max; i++)
2733                             if (isLOWER(i))
2734                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2735                     } else {
2736                         for (i = min; i <= max; i++)
2737                             if (isUPPER(i))
2738                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2739                     }
2740                 }
2741                 else
2742 #endif
2743                     for (i = min; i <= max; i++)
2744 #ifdef EBCDIC
2745                         if (has_utf8) {
2746                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2747                             if (UNI_IS_INVARIANT(ch))
2748                                 *d++ = (U8)i;
2749                             else {
2750                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2751                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2752                             }
2753                         }
2754                         else
2755 #endif
2756                             *d++ = (char)i;
2757
2758 #ifdef EBCDIC
2759                 if (uvmax) {
2760                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2761                     if (uvmax > 0x101)
2762                         *d++ = (char)UTF_TO_NATIVE(0xff);
2763                     if (uvmax > 0x100)
2764                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2765                 }
2766 #endif
2767
2768                 /* mark the range as done, and continue */
2769                 dorange = FALSE;
2770                 didrange = TRUE;
2771 #ifdef EBCDIC
2772                 literal_endpoint = 0;
2773 #endif
2774                 continue;
2775             }
2776
2777             /* range begins (ignore - as first or last char) */
2778             else if (*s == '-' && s+1 < send  && s != start) {
2779                 if (didrange) {
2780                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2781                 }
2782                 if (has_utf8
2783 #ifdef EBCDIC
2784                     && !native_range
2785 #endif
2786                     ) {
2787                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2788                     s++;
2789                     continue;
2790                 }
2791                 dorange = TRUE;
2792                 s++;
2793             }
2794             else {
2795                 didrange = FALSE;
2796 #ifdef EBCDIC
2797                 literal_endpoint = 0;
2798                 native_range = TRUE;
2799 #endif
2800             }
2801         }
2802
2803         /* if we get here, we're not doing a transliteration */
2804
2805         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2806            except for the last char, which will be done separately. */
2807         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2808             if (s[2] == '#') {
2809                 while (s+1 < send && *s != ')')
2810                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2811             }
2812             else if (s[2] == '{' /* This should match regcomp.c */
2813                     || (s[2] == '?' && s[3] == '{'))
2814             {
2815                 I32 count = 1;
2816                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2817                 char c;
2818
2819                 while (count && (c = *regparse)) {
2820                     if (c == '\\' && regparse[1])
2821                         regparse++;
2822                     else if (c == '{')
2823                         count++;
2824                     else if (c == '}')
2825                         count--;
2826                     regparse++;
2827                 }
2828                 if (*regparse != ')')
2829                     regparse--;         /* Leave one char for continuation. */
2830                 while (s < regparse)
2831                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2832             }
2833         }
2834
2835         /* likewise skip #-initiated comments in //x patterns */
2836         else if (*s == '#' && PL_lex_inpat &&
2837           ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2838             while (s+1 < send && *s != '\n')
2839                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2840         }
2841
2842         /* check for embedded arrays
2843            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2844            */
2845         else if (*s == '@' && s[1]) {
2846             if (isALNUM_lazy_if(s+1,UTF))
2847                 break;
2848             if (strchr(":'{$", s[1]))
2849                 break;
2850             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2851                 break; /* in regexp, neither @+ nor @- are interpolated */
2852         }
2853
2854         /* check for embedded scalars.  only stop if we're sure it's a
2855            variable.
2856         */
2857         else if (*s == '$') {
2858             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2859                 break;
2860             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2861                 if (s[1] == '\\') {
2862                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2863                                    "Possible unintended interpolation of $\\ in regex");
2864                 }
2865                 break;          /* in regexp, $ might be tail anchor */
2866             }
2867         }
2868
2869         /* End of else if chain - OP_TRANS rejoin rest */
2870
2871         /* backslashes */
2872         if (*s == '\\' && s+1 < send) {
2873             char* e;    /* Can be used for ending '}', etc. */
2874
2875             s++;
2876
2877             /* warn on \1 - \9 in substitution replacements, but note that \11
2878              * is an octal; and \19 is \1 followed by '9' */
2879             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2880                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2881             {
2882                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2883                 *--s = '$';
2884                 break;
2885             }
2886
2887             /* string-change backslash escapes */
2888             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2889                 --s;
2890                 break;
2891             }
2892             /* In a pattern, process \N, but skip any other backslash escapes.
2893              * This is because we don't want to translate an escape sequence
2894              * into a meta symbol and have the regex compiler use the meta
2895              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2896              * in spite of this, we do have to process \N here while the proper
2897              * charnames handler is in scope.  See bugs #56444 and #62056.
2898              * There is a complication because \N in a pattern may also stand
2899              * for 'match a non-nl', and not mean a charname, in which case its
2900              * processing should be deferred to the regex compiler.  To be a
2901              * charname it must be followed immediately by a '{', and not look
2902              * like \N followed by a curly quantifier, i.e., not something like
2903              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2904              * quantifier */
2905             else if (PL_lex_inpat
2906                     && (*s != 'N'
2907                         || s[1] != '{'
2908                         || regcurly(s + 1)))
2909             {
2910                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2911                 goto default_action;
2912             }
2913
2914             switch (*s) {
2915
2916             /* quoted - in transliterations */
2917             case '-':
2918                 if (PL_lex_inwhat == OP_TRANS) {
2919                     *d++ = *s++;
2920                     continue;
2921                 }
2922                 /* FALL THROUGH */
2923             default:
2924                 {
2925                     if ((isALPHA(*s) || isDIGIT(*s)))
2926                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2927                                        "Unrecognized escape \\%c passed through",
2928                                        *s);
2929                     /* default action is to copy the quoted character */
2930                     goto default_action;
2931                 }
2932
2933             /* eg. \132 indicates the octal constant 0132 */
2934             case '0': case '1': case '2': case '3':
2935             case '4': case '5': case '6': case '7':
2936                 {
2937                     I32 flags = 0;
2938                     STRLEN len = 3;
2939                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2940                     s += len;
2941                 }
2942                 goto NUM_ESCAPE_INSERT;
2943
2944             /* eg. \o{24} indicates the octal constant \024 */
2945             case 'o':
2946                 {
2947                     STRLEN len;
2948                     const char* error;
2949
2950                     bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2951                     s += len;
2952                     if (! valid) {
2953                         yyerror(error);
2954                         continue;
2955                     }
2956                     goto NUM_ESCAPE_INSERT;
2957                 }
2958
2959             /* eg. \x24 indicates the hex constant 0x24 */
2960             case 'x':
2961                 ++s;
2962                 if (*s == '{') {
2963                     char* const e = strchr(s, '}');
2964                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2965                       PERL_SCAN_DISALLOW_PREFIX;
2966                     STRLEN len;
2967
2968                     ++s;
2969                     if (!e) {
2970                         yyerror("Missing right brace on \\x{}");
2971                         continue;
2972                     }
2973                     len = e - s;
2974                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2975                     s = e + 1;
2976                 }
2977                 else {
2978                     {
2979                         STRLEN len = 2;
2980                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2981                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2982                         s += len;
2983                     }
2984                 }
2985
2986               NUM_ESCAPE_INSERT:
2987                 /* Insert oct or hex escaped character.  There will always be
2988                  * enough room in sv since such escapes will be longer than any
2989                  * UTF-8 sequence they can end up as, except if they force us
2990                  * to recode the rest of the string into utf8 */
2991
2992                 /* Here uv is the ordinal of the next character being added in
2993                  * unicode (converted from native). */
2994                 if (!UNI_IS_INVARIANT(uv)) {
2995                     if (!has_utf8 && uv > 255) {
2996                         /* Might need to recode whatever we have accumulated so
2997                          * far if it contains any chars variant in utf8 or
2998                          * utf-ebcdic. */
2999
3000                         SvCUR_set(sv, d - SvPVX_const(sv));
3001                         SvPOK_on(sv);
3002                         *d = '\0';
3003                         /* See Note on sizing above.  */
3004                         sv_utf8_upgrade_flags_grow(sv,
3005                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3006                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
3007                         d = SvPVX(sv) + SvCUR(sv);
3008                         has_utf8 = TRUE;
3009                     }
3010
3011                     if (has_utf8) {
3012                         d = (char*)uvuni_to_utf8((U8*)d, uv);
3013                         if (PL_lex_inwhat == OP_TRANS &&
3014                             PL_sublex_info.sub_op) {
3015                             PL_sublex_info.sub_op->op_private |=
3016                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3017                                              : OPpTRANS_TO_UTF);
3018                         }
3019 #ifdef EBCDIC
3020                         if (uv > 255 && !dorange)
3021                             native_range = FALSE;
3022 #endif
3023                     }
3024                     else {
3025                         *d++ = (char)uv;
3026                     }
3027                 }
3028                 else {
3029                     *d++ = (char) uv;
3030                 }
3031                 continue;
3032
3033             case 'N':
3034                 /* In a non-pattern \N must be a named character, like \N{LATIN
3035                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
3036                  * mean to match a non-newline.  For non-patterns, named
3037                  * characters are converted to their string equivalents. In
3038                  * patterns, named characters are not converted to their
3039                  * ultimate forms for the same reasons that other escapes
3040                  * aren't.  Instead, they are converted to the \N{U+...} form
3041                  * to get the value from the charnames that is in effect right
3042                  * now, while preserving the fact that it was a named character
3043                  * so that the regex compiler knows this */
3044
3045                 /* This section of code doesn't generally use the
3046                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
3047                  * a close examination of this macro and determined it is a
3048                  * no-op except on utfebcdic variant characters.  Every
3049                  * character generated by this that would normally need to be
3050                  * enclosed by this macro is invariant, so the macro is not
3051                  * needed, and would complicate use of copy().  XXX There are
3052                  * other parts of this file where the macro is used
3053                  * inconsistently, but are saved by it being a no-op */
3054
3055                 /* The structure of this section of code (besides checking for
3056                  * errors and upgrading to utf8) is:
3057                  *  Further disambiguate between the two meanings of \N, and if
3058                  *      not a charname, go process it elsewhere
3059                  *  If of form \N{U+...}, pass it through if a pattern;
3060                  *      otherwise convert to utf8
3061                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3062                  *  pattern; otherwise convert to utf8 */
3063
3064                 /* Here, s points to the 'N'; the test below is guaranteed to
3065                  * succeed if we are being called on a pattern as we already
3066                  * know from a test above that the next character is a '{'.
3067                  * On a non-pattern \N must mean 'named sequence, which
3068                  * requires braces */
3069                 s++;
3070                 if (*s != '{') {
3071                     yyerror("Missing braces on \\N{}");
3072                     continue;
3073                 }
3074                 s++;
3075
3076                 /* If there is no matching '}', it is an error. */
3077                 if (! (e = strchr(s, '}'))) {
3078                     if (! PL_lex_inpat) {
3079                         yyerror("Missing right brace on \\N{}");
3080                     } else {
3081                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3082                     }
3083                     continue;
3084                 }
3085
3086                 /* Here it looks like a named character */
3087
3088                 if (PL_lex_inpat) {
3089
3090                     /* XXX This block is temporary code.  \N{} implies that the
3091                      * pattern is to have Unicode semantics, and therefore
3092                      * currently has to be encoded in utf8.  By putting it in
3093                      * utf8 now, we save a whole pass in the regular expression
3094                      * compiler.  Once that code is changed so Unicode
3095                      * semantics doesn't necessarily have to be in utf8, this
3096                      * block should be removed */
3097                     if (!has_utf8) {
3098                         SvCUR_set(sv, d - SvPVX_const(sv));
3099                         SvPOK_on(sv);
3100                         *d = '\0';
3101                         /* See Note on sizing above.  */
3102                         sv_utf8_upgrade_flags_grow(sv,
3103                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3104                                         /* 5 = '\N{' + cur char + NUL */
3105                                         (STRLEN)(send - s) + 5);
3106                         d = SvPVX(sv) + SvCUR(sv);
3107                         has_utf8 = TRUE;
3108                     }
3109                 }
3110
3111                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3112                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3113                                 | PERL_SCAN_DISALLOW_PREFIX;
3114                     STRLEN len;
3115
3116                     /* For \N{U+...}, the '...' is a unicode value even on
3117                      * EBCDIC machines */
3118                     s += 2;         /* Skip to next char after the 'U+' */
3119                     len = e - s;
3120                     uv = grok_hex(s, &len, &flags, NULL);
3121                     if (len == 0 || len != (STRLEN)(e - s)) {
3122                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3123                         s = e + 1;
3124                         continue;
3125                     }
3126
3127                     if (PL_lex_inpat) {
3128
3129                         /* Pass through to the regex compiler unchanged.  The
3130                          * reason we evaluated the number above is to make sure
3131                          * there wasn't a syntax error. */
3132                         s -= 5;     /* Include the '\N{U+' */
3133                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3134                         d += e - s + 1;
3135                     }
3136                     else {  /* Not a pattern: convert the hex to string */
3137
3138                          /* If destination is not in utf8, unconditionally
3139                           * recode it to be so.  This is because \N{} implies
3140                           * Unicode semantics, and scalars have to be in utf8
3141                           * to guarantee those semantics */
3142                         if (! has_utf8) {
3143                             SvCUR_set(sv, d - SvPVX_const(sv));
3144                             SvPOK_on(sv);
3145                             *d = '\0';
3146                             /* See Note on sizing above.  */
3147                             sv_utf8_upgrade_flags_grow(
3148                                         sv,
3149                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3150                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3151                             d = SvPVX(sv) + SvCUR(sv);
3152                             has_utf8 = TRUE;
3153                         }
3154
3155                         /* Add the string to the output */
3156                         if (UNI_IS_INVARIANT(uv)) {
3157                             *d++ = (char) uv;
3158                         }
3159                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3160                     }
3161                 }
3162                 else { /* Here is \N{NAME} but not \N{U+...}. */
3163
3164                     SV *res;            /* result from charnames */
3165                     const char *str;    /* the string in 'res' */
3166                     STRLEN len;         /* its length */
3167
3168                     /* Get the value for NAME */
3169                     res = newSVpvn(s, e - s);
3170                     res = new_constant( NULL, 0, "charnames",
3171                                         /* includes all of: \N{...} */
3172                                         res, NULL, s - 3, e - s + 4 );
3173
3174                     /* Most likely res will be in utf8 already since the
3175                      * standard charnames uses pack U, but a custom translator
3176                      * can leave it otherwise, so make sure.  XXX This can be
3177                      * revisited to not have charnames use utf8 for characters
3178                      * that don't need it when regexes don't have to be in utf8
3179                      * for Unicode semantics.  If doing so, remember EBCDIC */
3180                     sv_utf8_upgrade(res);
3181                     str = SvPV_const(res, len);
3182
3183                     /* Don't accept malformed input */
3184                     if (! is_utf8_string((U8 *) str, len)) {
3185                         yyerror("Malformed UTF-8 returned by \\N");
3186                     }
3187                     else if (PL_lex_inpat) {
3188
3189                         if (! len) { /* The name resolved to an empty string */
3190                             Copy("\\N{}", d, 4, char);
3191                             d += 4;
3192                         }
3193                         else {
3194                             /* In order to not lose information for the regex
3195                             * compiler, pass the result in the specially made
3196                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3197                             * the code points in hex of each character
3198                             * returned by charnames */
3199
3200                             const char *str_end = str + len;
3201                             STRLEN char_length;     /* cur char's byte length */
3202                             STRLEN output_length;   /* and the number of bytes
3203                                                        after this is translated
3204                                                        into hex digits */
3205                             const STRLEN off = d - SvPVX_const(sv);
3206
3207                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3208                              * max('U+', '.'); and 1 for NUL */
3209                             char hex_string[2 * UTF8_MAXBYTES + 5];
3210
3211                             /* Get the first character of the result. */
3212                             U32 uv = utf8n_to_uvuni((U8 *) str,
3213                                                     len,
3214                                                     &char_length,
3215                                                     UTF8_ALLOW_ANYUV);
3216
3217                             /* The call to is_utf8_string() above hopefully
3218                              * guarantees that there won't be an error.  But
3219                              * it's easy here to make sure.  The function just
3220                              * above warns and returns 0 if invalid utf8, but
3221                              * it can also return 0 if the input is validly a
3222                              * NUL. Disambiguate */
3223                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3224                                 uv = UNICODE_REPLACEMENT;
3225                             }
3226
3227                             /* Convert first code point to hex, including the
3228                              * boiler plate before it */
3229                             output_length =
3230                                 my_snprintf(hex_string, sizeof(hex_string),
3231                                             "\\N{U+%X", (unsigned int) uv);
3232
3233                             /* Make sure there is enough space to hold it */
3234                             d = off + SvGROW(sv, off
3235                                                  + output_length
3236                                                  + (STRLEN)(send - e)
3237                                                  + 2);  /* '}' + NUL */
3238                             /* And output it */
3239                             Copy(hex_string, d, output_length, char);
3240                             d += output_length;
3241
3242                             /* For each subsequent character, append dot and
3243                              * its ordinal in hex */
3244                             while ((str += char_length) < str_end) {
3245                                 const STRLEN off = d - SvPVX_const(sv);
3246                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3247                                                         str_end - str,
3248                                                         &char_length,
3249                                                         UTF8_ALLOW_ANYUV);
3250                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3251                                     uv = UNICODE_REPLACEMENT;
3252                                 }
3253
3254                                 output_length =
3255                                     my_snprintf(hex_string, sizeof(hex_string),
3256                                                 ".%X", (unsigned int) uv);
3257
3258                                 d = off + SvGROW(sv, off
3259                                                      + output_length
3260                                                      + (STRLEN)(send - e)
3261                                                      + 2);      /* '}' +  NUL */
3262                                 Copy(hex_string, d, output_length, char);
3263                                 d += output_length;
3264                             }
3265
3266                             *d++ = '}'; /* Done.  Add the trailing brace */
3267                         }
3268                     }
3269                     else { /* Here, not in a pattern.  Convert the name to a
3270                             * string. */
3271
3272                          /* If destination is not in utf8, unconditionally
3273                           * recode it to be so.  This is because \N{} implies
3274                           * Unicode semantics, and scalars have to be in utf8
3275                           * to guarantee those semantics */
3276                         if (! has_utf8) {
3277                             SvCUR_set(sv, d - SvPVX_const(sv));
3278                             SvPOK_on(sv);
3279                             *d = '\0';
3280                             /* See Note on sizing above.  */
3281                             sv_utf8_upgrade_flags_grow(sv,
3282                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3283                                                 len + (STRLEN)(send - s) + 1);
3284                             d = SvPVX(sv) + SvCUR(sv);
3285                             has_utf8 = TRUE;
3286                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3287
3288                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3289                              * set correctly here). */
3290                             const STRLEN off = d - SvPVX_const(sv);
3291                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3292                         }
3293                         Copy(str, d, len, char);
3294                         d += len;
3295                     }
3296                     SvREFCNT_dec(res);
3297
3298                     /* Deprecate non-approved name syntax */
3299                     if (ckWARN_d(WARN_DEPRECATED)) {
3300                         bool problematic = FALSE;
3301                         char* i = s;
3302
3303                         /* For non-ut8 input, look to see that the first
3304                          * character is an alpha, then loop through the rest
3305                          * checking that each is a continuation */
3306                         if (! this_utf8) {
3307                             if (! isALPHAU(*i)) problematic = TRUE;
3308                             else for (i = s + 1; i < e; i++) {
3309                                 if (isCHARNAME_CONT(*i)) continue;
3310                                 problematic = TRUE;
3311                                 break;
3312                             }
3313                         }
3314                         else {
3315                             /* Similarly for utf8.  For invariants can check
3316                              * directly.  We accept anything above the latin1
3317                              * range because it is immaterial to Perl if it is
3318                              * correct or not, and is expensive to check.  But
3319                              * it is fairly easy in the latin1 range to convert
3320                              * the variants into a single character and check
3321                              * those */
3322                             if (UTF8_IS_INVARIANT(*i)) {
3323                                 if (! isALPHAU(*i)) problematic = TRUE;
3324                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3325                                 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3326                                                                             *(i+1)))))
3327                                 {
3328                                     problematic = TRUE;
3329                                 }
3330                             }
3331                             if (! problematic) for (i = s + UTF8SKIP(s);
3332                                                     i < e;
3333                                                     i+= UTF8SKIP(i))
3334                             {
3335                                 if (UTF8_IS_INVARIANT(*i)) {
3336                                     if (isCHARNAME_CONT(*i)) continue;
3337                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3338                                     continue;
3339                                 } else if (isCHARNAME_CONT(
3340                                             UNI_TO_NATIVE(
3341                                             TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3342                                 {
3343                                     continue;
3344                                 }
3345                                 problematic = TRUE;
3346                                 break;
3347                             }
3348                         }
3349                         if (problematic) {
3350                             /* The e-i passed to the final %.*s makes sure that
3351                              * should the trailing NUL be missing that this
3352                              * print won't run off the end of the string */
3353                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3354                                         "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s",
3355                                         (int)(i - s + 1), s, (int)(e - i), i + 1);
3356                         }
3357                     }
3358                 } /* End \N{NAME} */
3359 #ifdef EBCDIC
3360                 if (!dorange)
3361                     native_range = FALSE; /* \N{} is defined to be Unicode */
3362 #endif
3363                 s = e + 1;  /* Point to just after the '}' */
3364                 continue;
3365
3366             /* \c is a control character */
3367             case 'c':
3368                 s++;
3369                 if (s < send) {
3370                     *d++ = grok_bslash_c(*s++, 1);
3371                 }
3372                 else {
3373                     yyerror("Missing control char name in \\c");
3374                 }
3375                 continue;
3376
3377             /* printf-style backslashes, formfeeds, newlines, etc */
3378             case 'b':
3379                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3380                 break;
3381             case 'n':
3382                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3383                 break;
3384             case 'r':
3385                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3386                 break;
3387             case 'f':
3388                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3389                 break;
3390             case 't':
3391                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3392                 break;
3393             case 'e':
3394                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3395                 break;
3396             case 'a':
3397                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3398                 break;
3399             } /* end switch */
3400
3401             s++;
3402             continue;
3403         } /* end if (backslash) */
3404 #ifdef EBCDIC
3405         else
3406             literal_endpoint++;
3407 #endif
3408
3409     default_action:
3410         /* If we started with encoded form, or already know we want it,
3411            then encode the next character */
3412         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3413             STRLEN len  = 1;
3414
3415
3416             /* One might think that it is wasted effort in the case of the
3417              * source being utf8 (this_utf8 == TRUE) to take the next character
3418              * in the source, convert it to an unsigned value, and then convert
3419              * it back again.  But the source has not been validated here.  The
3420              * routine that does the conversion checks for errors like
3421              * malformed utf8 */
3422
3423             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3424             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3425             if (!has_utf8) {
3426                 SvCUR_set(sv, d - SvPVX_const(sv));
3427                 SvPOK_on(sv);
3428                 *d = '\0';
3429                 /* See Note on sizing above.  */
3430                 sv_utf8_upgrade_flags_grow(sv,
3431                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3432                                         need + (STRLEN)(send - s) + 1);
3433                 d = SvPVX(sv) + SvCUR(sv);
3434                 has_utf8 = TRUE;
3435             } else if (need > len) {
3436                 /* encoded value larger than old, may need extra space (NOTE:
3437                  * SvCUR() is not set correctly here).   See Note on sizing
3438                  * above.  */
3439                 const STRLEN off = d - SvPVX_const(sv);
3440                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3441             }
3442             s += len;
3443
3444             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3445 #ifdef EBCDIC
3446             if (uv > 255 && !dorange)
3447                 native_range = FALSE;
3448 #endif
3449         }
3450         else {
3451             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3452         }
3453     } /* while loop to process each character */
3454
3455     /* terminate the string and set up the sv */
3456     *d = '\0';
3457     SvCUR_set(sv, d - SvPVX_const(sv));
3458     if (SvCUR(sv) >= SvLEN(sv))
3459         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3460
3461     SvPOK_on(sv);
3462     if (PL_encoding && !has_utf8) {
3463         sv_recode_to_utf8(sv, PL_encoding);
3464         if (SvUTF8(sv))
3465             has_utf8 = TRUE;
3466     }
3467     if (has_utf8) {
3468         SvUTF8_on(sv);
3469         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3470             PL_sublex_info.sub_op->op_private |=
3471                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3472         }
3473     }
3474
3475     /* shrink the sv if we allocated more than we used */
3476     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3477         SvPV_shrink_to_cur(sv);
3478     }
3479
3480     /* return the substring (via pl_yylval) only if we parsed anything */
3481     if (s > PL_bufptr) {
3482         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3483             const char *const key = PL_lex_inpat ? "qr" : "q";
3484             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3485             const char *type;
3486             STRLEN typelen;
3487
3488             if (PL_lex_inwhat == OP_TRANS) {
3489                 type = "tr";
3490                 typelen = 2;
3491             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3492                 type = "s";
3493                 typelen = 1;
3494             } else  {
3495                 type = "qq";
3496                 typelen = 2;
3497             }
3498
3499             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3500                                 type, typelen);
3501         }
3502         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3503     } else
3504         SvREFCNT_dec(sv);
3505     return s;
3506 }
3507
3508 /* S_intuit_more
3509  * Returns TRUE if there's more to the expression (e.g., a subscript),
3510  * FALSE otherwise.
3511  *
3512  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3513  *
3514  * ->[ and ->{ return TRUE
3515  * { and [ outside a pattern are always subscripts, so return TRUE
3516  * if we're outside a pattern and it's not { or [, then return FALSE
3517  * if we're in a pattern and the first char is a {
3518  *   {4,5} (any digits around the comma) returns FALSE
3519  * if we're in a pattern and the first char is a [
3520  *   [] returns FALSE
3521  *   [SOMETHING] has a funky algorithm to decide whether it's a
3522  *      character class or not.  It has to deal with things like
3523  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3524  * anything else returns TRUE
3525  */
3526
3527 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3528
3529 STATIC int
3530 S_intuit_more(pTHX_ register char *s)
3531 {
3532     dVAR;
3533
3534     PERL_ARGS_ASSERT_INTUIT_MORE;
3535
3536     if (PL_lex_brackets)
3537         return TRUE;
3538     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3539         return TRUE;
3540     if (*s != '{' && *s != '[')
3541         return FALSE;
3542     if (!PL_lex_inpat)
3543         return TRUE;
3544
3545     /* In a pattern, so maybe we have {n,m}. */
3546     if (*s == '{') {
3547         if (regcurly(s)) {
3548             return FALSE;
3549         }
3550         return TRUE;
3551     }
3552
3553     /* On the other hand, maybe we have a character class */
3554
3555     s++;
3556     if (*s == ']' || *s == '^')
3557         return FALSE;
3558     else {
3559         /* this is terrifying, and it works */
3560         int weight = 2;         /* let's weigh the evidence */
3561         char seen[256];
3562         unsigned char un_char = 255, last_un_char;
3563         const char * const send = strchr(s,']');
3564         char tmpbuf[sizeof PL_tokenbuf * 4];
3565
3566         if (!send)              /* has to be an expression */
3567             return TRUE;
3568
3569         Zero(seen,256,char);
3570         if (*s == '$')
3571             weight -= 3;
3572         else if (isDIGIT(*s)) {
3573             if (s[1] != ']') {
3574                 if (isDIGIT(s[1]) && s[2] == ']')
3575                     weight -= 10;
3576             }
3577             else
3578                 weight -= 100;
3579         }
3580         for (; s < send; s++) {
3581             last_un_char = un_char;
3582             un_char = (unsigned char)*s;
3583             switch (*s) {
3584             case '@':
3585             case '&':
3586             case '$':
3587                 weight -= seen[un_char] * 10;
3588                 if (isALNUM_lazy_if(s+1,UTF)) {
3589                     int len;
3590                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3591                     len = (int)strlen(tmpbuf);
3592                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3593                         weight -= 100;
3594                     else
3595                         weight -= 10;
3596                 }
3597                 else if (*s == '$' && s[1] &&
3598                   strchr("[#!%*<>()-=",s[1])) {
3599                     if (/*{*/ strchr("])} =",s[2]))
3600                         weight -= 10;
3601                     else
3602                         weight -= 1;
3603                 }
3604                 break;
3605             case '\\':
3606                 un_char = 254;
3607                 if (s[1]) {
3608                     if (strchr("wds]",s[1]))
3609                         weight += 100;
3610                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3611                         weight += 1;
3612                     else if (strchr("rnftbxcav",s[1]))
3613                         weight += 40;
3614                     else if (isDIGIT(s[1])) {
3615                         weight += 40;
3616                         while (s[1] && isDIGIT(s[1]))
3617                             s++;
3618                     }
3619                 }
3620                 else
3621                     weight += 100;
3622                 break;
3623             case '-':
3624                 if (s[1] == '\\')
3625                     weight += 50;
3626                 if (strchr("aA01! ",last_un_char))
3627                     weight += 30;
3628                 if (strchr("zZ79~",s[1]))
3629                     weight += 30;
3630                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3631                     weight -= 5;        /* cope with negative subscript */
3632                 break;
3633             default:
3634                 if (!isALNUM(last_un_char)
3635                     && !(last_un_char == '$' || last_un_char == '@'
3636                          || last_un_char == '&')
3637                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3638                     char *d = tmpbuf;
3639                     while (isALPHA(*s))
3640                         *d++ = *s++;
3641                     *d = '\0';
3642                     if (keyword(tmpbuf, d - tmpbuf, 0))
3643                         weight -= 150;
3644                 }
3645                 if (un_char == last_un_char + 1)
3646                     weight += 5;
3647                 weight -= seen[un_char];
3648                 break;
3649             }
3650             seen[un_char]++;
3651         }
3652         if (weight >= 0)        /* probably a character class */
3653             return FALSE;
3654     }
3655
3656     return TRUE;
3657 }
3658
3659 /*
3660  * S_intuit_method
3661  *
3662  * Does all the checking to disambiguate
3663  *   foo bar
3664  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3665  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3666  *
3667  * First argument is the stuff after the first token, e.g. "bar".
3668  *
3669  * Not a method if bar is a filehandle.
3670  * Not a method if foo is a subroutine prototyped to take a filehandle.
3671  * Not a method if it's really "Foo $bar"
3672  * Method if it's "foo $bar"
3673  * Not a method if it's really "print foo $bar"
3674  * Method if it's really "foo package::" (interpreted as package->foo)
3675  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3676  * Not a method if bar is a filehandle or package, but is quoted with
3677  *   =>
3678  */
3679
3680 STATIC int
3681 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3682 {
3683     dVAR;
3684     char *s = start + (*start == '$');
3685     char tmpbuf[sizeof PL_tokenbuf];
3686     STRLEN len;
3687     GV* indirgv;
3688 #ifdef PERL_MAD
3689     int soff;
3690 #endif
3691
3692     PERL_ARGS_ASSERT_INTUIT_METHOD;
3693
3694     if (gv) {
3695         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3696             return 0;
3697         if (cv) {
3698             if (SvPOK(cv)) {
3699                 const char *proto = SvPVX_const(cv);
3700                 if (proto) {
3701                     if (*proto == ';')
3702                         proto++;
3703                     if (*proto == '*')
3704                         return 0;
3705                 }
3706             }
3707         } else
3708             gv = NULL;
3709     }
3710     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3711     /* start is the beginning of the possible filehandle/object,
3712      * and s is the end of it
3713      * tmpbuf is a copy of it
3714      */
3715
3716     if (*start == '$') {
3717         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3718                 isUPPER(*PL_tokenbuf))
3719             return 0;
3720 #ifdef PERL_MAD
3721         len = start - SvPVX(PL_linestr);
3722 #endif
3723         s = PEEKSPACE(s);
3724 #ifdef PERL_MAD
3725         start = SvPVX(PL_linestr) + len;
3726 #endif
3727         PL_bufptr = start;
3728         PL_expect = XREF;
3729         return *s == '(' ? FUNCMETH : METHOD;
3730     }
3731     if (!keyword(tmpbuf, len, 0)) {
3732         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3733             len -= 2;
3734             tmpbuf[len] = '\0';
3735 #ifdef PERL_MAD
3736             soff = s - SvPVX(PL_linestr);
3737 #endif
3738             goto bare_package;
3739         }
3740         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3741         if (indirgv && GvCVu(indirgv))
3742             return 0;
3743         /* filehandle or package name makes it a method */
3744         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3745 #ifdef PERL_MAD
3746             soff = s - SvPVX(PL_linestr);
3747 #endif
3748             s = PEEKSPACE(s);
3749             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3750                 return 0;       /* no assumptions -- "=>" quotes bareword */
3751       bare_package:
3752             start_force(PL_curforce);
3753             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3754                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3755             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3756             if (PL_madskills)
3757                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3758             PL_expect = XTERM;
3759             force_next(WORD);
3760             PL_bufptr = s;
3761 #ifdef PERL_MAD
3762             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3763 #endif
3764             return *s == '(' ? FUNCMETH : METHOD;
3765         }
3766     }
3767     return 0;
3768 }
3769
3770 /* Encoded script support. filter_add() effectively inserts a
3771  * 'pre-processing' function into the current source input stream.
3772  * Note that the filter function only applies to the current source file
3773  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3774  *
3775  * The datasv parameter (which may be NULL) can be used to pass
3776  * private data to this instance of the filter. The filter function
3777  * can recover the SV using the FILTER_DATA macro and use it to
3778  * store private buffers and state information.
3779  *
3780  * The supplied datasv parameter is upgraded to a PVIO type
3781  * and the IoDIRP/IoANY field is used to store the function pointer,
3782  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3783  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3784  * private use must be set using malloc'd pointers.
3785  */
3786
3787 SV *
3788 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3789 {
3790     dVAR;
3791     if (!funcp)
3792         return NULL;
3793
3794     if (!PL_parser)
3795         return NULL;
3796
3797     if (!PL_rsfp_filters)
3798         PL_rsfp_filters = newAV();
3799     if (!datasv)
3800         datasv = newSV(0);
3801     SvUPGRADE(datasv, SVt_PVIO);
3802     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3803     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3804     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3805                           FPTR2DPTR(void *, IoANY(datasv)),
3806                           SvPV_nolen(datasv)));
3807     av_unshift(PL_rsfp_filters, 1);
3808     av_store(PL_rsfp_filters, 0, datasv) ;
3809     return(datasv);
3810 }
3811
3812
3813 /* Delete most recently added instance of this filter function. */
3814 void
3815 Perl_filter_del(pTHX_ filter_t funcp)
3816 {
3817     dVAR;
3818     SV *datasv;
3819
3820     PERL_ARGS_ASSERT_FILTER_DEL;
3821
3822 #ifdef DEBUGGING
3823     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3824                           FPTR2DPTR(void*, funcp)));
3825 #endif
3826     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3827         return;
3828     /* if filter is on top of stack (usual case) just pop it off */
3829     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3830     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3831         sv_free(av_pop(PL_rsfp_filters));
3832
3833         return;
3834     }
3835     /* we need to search for the correct entry and clear it     */
3836     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3837 }
3838
3839
3840 /* Invoke the idxth filter function for the current rsfp.        */
3841 /* maxlen 0 = read one text line */
3842 I32
3843 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3844 {
3845     dVAR;
3846     filter_t funcp;
3847     SV *datasv = NULL;
3848     /* This API is bad. It should have been using unsigned int for maxlen.
3849        Not sure if we want to change the API, but if not we should sanity
3850        check the value here.  */
3851     const unsigned int correct_length
3852         = maxlen < 0 ?
3853 #ifdef PERL_MICRO
3854         0x7FFFFFFF
3855 #else
3856         INT_MAX
3857 #endif
3858         : maxlen;
3859
3860     PERL_ARGS_ASSERT_FILTER_READ;
3861
3862     if (!PL_parser || !PL_rsfp_filters)
3863         return -1;
3864     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3865         /* Provide a default input filter to make life easy.    */
3866         /* Note that we append to the line. This is handy.      */
3867         DEBUG_P(PerlIO_printf(Perl_debug_log,
3868                               "filter_read %d: from rsfp\n", idx));
3869         if (correct_length) {
3870             /* Want a block */
3871             int len ;
3872             const int old_len = SvCUR(buf_sv);
3873
3874             /* ensure buf_sv is large enough */
3875             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3876             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3877                                    correct_length)) <= 0) {
3878                 if (PerlIO_error(PL_rsfp))
3879                     return -1;          /* error */
3880                 else
3881                     return 0 ;          /* end of file */
3882             }
3883             SvCUR_set(buf_sv, old_len + len) ;
3884             SvPVX(buf_sv)[old_len + len] = '\0';
3885         } else {
3886             /* Want a line */
3887             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3888                 if (PerlIO_error(PL_rsfp))
3889                     return -1;          /* error */
3890                 else
3891                     return 0 ;          /* end of file */
3892             }
3893         }
3894         return SvCUR(buf_sv);
3895     }
3896     /* Skip this filter slot if filter has been deleted */
3897     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3898         DEBUG_P(PerlIO_printf(Perl_debug_log,
3899                               "filter_read %d: skipped (filter deleted)\n",
3900                               idx));
3901         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3902     }
3903     /* Get function pointer hidden within datasv        */
3904     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3905     DEBUG_P(PerlIO_printf(Perl_debug_log,
3906                           "filter_read %d: via function %p (%s)\n",
3907                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3908     /* Call function. The function is expected to       */
3909     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3910     /* Return: <0:error, =0:eof, >0:not eof             */
3911     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3912 }
3913
3914 STATIC char *
3915 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3916 {
3917     dVAR;
3918
3919     PERL_ARGS_ASSERT_FILTER_GETS;
3920
3921 #ifdef PERL_CR_FILTER
3922     if (!PL_rsfp_filters) {
3923         filter_add(S_cr_textfilter,NULL);
3924     }
3925 #endif
3926     if (PL_rsfp_filters) {
3927         if (!append)
3928             SvCUR_set(sv, 0);   /* start with empty line        */
3929         if (FILTER_READ(0, sv, 0) > 0)
3930             return ( SvPVX(sv) ) ;
3931         else
3932             return NULL ;
3933     }
3934     else
3935         return (sv_gets(sv, PL_rsfp, append));
3936 }
3937
3938 STATIC HV *
3939 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3940 {
3941     dVAR;
3942     GV *gv;
3943
3944     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3945
3946     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3947         return PL_curstash;
3948
3949     if (len > 2 &&
3950         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3951         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3952     {
3953         return GvHV(gv);                        /* Foo:: */
3954     }
3955
3956     /* use constant CLASS => 'MyClass' */
3957     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3958     if (gv && GvCV(gv)) {
3959         SV * const sv = cv_const_sv(GvCV(gv));
3960         if (sv)
3961             pkgname = SvPV_const(sv, len);
3962     }
3963
3964     return gv_stashpvn(pkgname, len, 0);
3965 }
3966
3967 /*
3968  * S_readpipe_override
3969  * Check whether readpipe() is overridden, and generates the appropriate
3970  * optree, provided sublex_start() is called afterwards.
3971  */
3972 STATIC void
3973 S_readpipe_override(pTHX)
3974 {
3975     GV **gvp;
3976     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3977     pl_yylval.ival = OP_BACKTICK;
3978     if ((gv_readpipe
3979                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3980             ||
3981             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3982              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3983              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3984     {
3985         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3986             op_append_elem(OP_LIST,
3987                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3988                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3989     }
3990 }
3991
3992 #ifdef PERL_MAD
3993  /*
3994  * Perl_madlex
3995  * The intent of this yylex wrapper is to minimize the changes to the
3996  * tokener when we aren't interested in collecting madprops.  It remains
3997  * to be seen how successful this strategy will be...
3998  */
3999
4000 int
4001 Perl_madlex(pTHX)
4002 {
4003     int optype;
4004     char *s = PL_bufptr;
4005
4006     /* make sure PL_thiswhite is initialized */
4007     PL_thiswhite = 0;
4008     PL_thismad = 0;
4009
4010     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4011     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4012         return S_pending_ident(aTHX);
4013
4014     /* previous token ate up our whitespace? */
4015     if (!PL_lasttoke && PL_nextwhite) {
4016         PL_thiswhite = PL_nextwhite;
4017         PL_nextwhite = 0;
4018     }
4019
4020     /* isolate the token, and figure out where it is without whitespace */
4021     PL_realtokenstart = -1;
4022     PL_thistoken = 0;
4023     optype = yylex();
4024     s = PL_bufptr;
4025     assert(PL_curforce < 0);
4026
4027     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
4028         if (!PL_thistoken) {
4029             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4030                 PL_thistoken = newSVpvs("");
4031             else {
4032                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4033                 PL_thistoken = newSVpvn(tstart, s - tstart);
4034             }
4035         }
4036         if (PL_thismad) /* install head */
4037             CURMAD('X', PL_thistoken);
4038     }
4039
4040     /* last whitespace of a sublex? */
4041     if (optype == ')' && PL_endwhite) {
4042         CURMAD('X', PL_endwhite);
4043     }
4044
4045     if (!PL_thismad) {
4046
4047         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
4048         if (!PL_thiswhite && !PL_endwhite && !optype) {
4049             sv_free(PL_thistoken);
4050             PL_thistoken = 0;
4051             return 0;
4052         }
4053
4054         /* put off final whitespace till peg */
4055         if (optype == ';' && !PL_rsfp) {
4056             PL_nextwhite = PL_thiswhite;
4057             PL_thiswhite = 0;
4058         }
4059         else if (PL_thisopen) {
4060             CURMAD('q', PL_thisopen);
4061             if (PL_thistoken)
4062                 sv_free(PL_thistoken);
4063             PL_thistoken = 0;
4064         }
4065         else {
4066             /* Store actual token text as madprop X */
4067             CURMAD('X', PL_thistoken);
4068         }
4069
4070         if (PL_thiswhite) {
4071             /* add preceding whitespace as madprop _ */
4072             CURMAD('_', PL_thiswhite);
4073         }
4074
4075         if (PL_thisstuff) {
4076             /* add quoted material as madprop = */
4077             CURMAD('=', PL_thisstuff);
4078         }
4079
4080         if (PL_thisclose) {
4081             /* add terminating quote as madprop Q */
4082             CURMAD('Q', PL_thisclose);
4083         }
4084     }
4085
4086     /* special processing based on optype */
4087
4088     switch (optype) {
4089
4090     /* opval doesn't need a TOKEN since it can already store mp */
4091     case WORD:
4092     case METHOD:
4093     case FUNCMETH:
4094     case THING:
4095     case PMFUNC:
4096     case PRIVATEREF:
4097     case FUNC0SUB:
4098     case UNIOPSUB:
4099     case LSTOPSUB:
4100         if (pl_yylval.opval)
4101             append_madprops(PL_thismad, pl_yylval.opval, 0);
4102         PL_thismad = 0;
4103         return optype;
4104
4105     /* fake EOF */
4106     case 0:
4107         optype = PEG;
4108         if (PL_endwhite) {
4109             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4110             PL_endwhite = 0;
4111         }
4112         break;
4113
4114     case ']':
4115     case '}':
4116         if (PL_faketokens)
4117             break;
4118         /* remember any fake bracket that lexer is about to discard */
4119         if (PL_lex_brackets == 1 &&
4120             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4121         {
4122             s = PL_bufptr;
4123             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4124                 s++;
4125             if (*s == '}') {
4126                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4127                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4128                 PL_thiswhite = 0;
4129                 PL_bufptr = s - 1;
4130                 break;  /* don't bother looking for trailing comment */
4131             }
4132             else
4133                 s = PL_bufptr;
4134         }
4135         if (optype == ']')
4136             break;
4137         /* FALLTHROUGH */
4138
4139     /* attach a trailing comment to its statement instead of next token */
4140     case ';':
4141         if (PL_faketokens)
4142             break;
4143         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4144             s = PL_bufptr;
4145             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4146                 s++;
4147             if (*s == '\n' || *s == '#') {
4148                 while (s < PL_bufend && *s != '\n')
4149                     s++;
4150                 if (s < PL_bufend)
4151                     s++;
4152                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4153                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4154                 PL_thiswhite = 0;
4155                 PL_bufptr = s;
4156             }
4157         }
4158         break;
4159
4160     /* pval */
4161     case LABEL:
4162         break;
4163
4164     /* ival */
4165     default:
4166         break;
4167
4168     }
4169
4170     /* Create new token struct.  Note: opvals return early above. */
4171     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4172     PL_thismad = 0;
4173     return optype;
4174 }
4175 #endif
4176
4177 STATIC char *
4178 S_tokenize_use(pTHX_ int is_use, char *s) {
4179     dVAR;
4180
4181     PERL_ARGS_ASSERT_TOKENIZE_USE;
4182
4183     if (PL_expect != XSTATE)
4184         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4185                     is_use ? "use" : "no"));
4186     s = SKIPSPACE1(s);
4187     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4188         s = force_version(s, TRUE);
4189         if (*s == ';' || *s == '}'
4190                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4191             start_force(PL_curforce);
4192             NEXTVAL_NEXTTOKE.opval = NULL;
4193             force_next(WORD);
4194         }
4195         else if (*s == 'v') {
4196             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4197             s = force_version(s, FALSE);
4198         }
4199     }
4200     else {
4201         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4202         s = force_version(s, FALSE);
4203     }
4204     pl_yylval.ival = is_use;
4205     return s;
4206 }
4207 #ifdef DEBUGGING
4208     static const char* const exp_name[] =
4209         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4210           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4211         };
4212 #endif
4213
4214 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4215 STATIC bool
4216 S_word_takes_any_delimeter(char *p, STRLEN len)
4217 {
4218     return (len == 1 && strchr("msyq", p[0])) ||
4219            (len == 2 && (
4220             (p[0] == 't' && p[1] == 'r') ||
4221             (p[0] == 'q' && strchr("qwxr", p[1]))));
4222 }
4223
4224 /*
4225   yylex
4226
4227   Works out what to call the token just pulled out of the input
4228   stream.  The yacc parser takes care of taking the ops we return and
4229   stitching them into a tree.
4230
4231   Returns:
4232     PRIVATEREF
4233
4234   Structure:
4235       if read an identifier
4236           if we're in a my declaration
4237               croak if they tried to say my($foo::bar)
4238               build the ops for a my() declaration
4239           if it's an access to a my() variable
4240               are we in a sort block?
4241                   croak if my($a); $a <=> $b
4242               build ops for access to a my() variable
4243           if in a dq string, and they've said @foo and we can't find @foo
4244               croak
4245           build ops for a bareword
4246       if we already built the token before, use it.
4247 */
4248
4249
4250 #ifdef __SC__
4251 #pragma segment Perl_yylex
4252 #endif
4253 int
4254 Perl_yylex(pTHX)
4255 {
4256     dVAR;
4257     register char *s = PL_bufptr;
4258     register char *d;
4259     STRLEN len;
4260     bool bof = FALSE;
4261     U32 fake_eof = 0;
4262
4263     /* orig_keyword, gvp, and gv are initialized here because
4264      * jump to the label just_a_word_zero can bypass their
4265      * initialization later. */
4266     I32 orig_keyword = 0;
4267     GV *gv = NULL;
4268     GV **gvp = NULL;
4269
4270     DEBUG_T( {
4271         SV* tmp = newSVpvs("");
4272         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4273             (IV)CopLINE(PL_curcop),
4274             lex_state_names[PL_lex_state],
4275             exp_name[PL_expect],
4276             pv_display(tmp, s, strlen(s), 0, 60));
4277         SvREFCNT_dec(tmp);
4278     } );
4279     /* check if there's an identifier for us to look at */
4280     if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4281         return REPORT(S_pending_ident(aTHX));
4282
4283     /* no identifier pending identification */
4284
4285     switch (PL_lex_state) {
4286 #ifdef COMMENTARY
4287     case LEX_NORMAL:            /* Some compilers will produce faster */
4288     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4289         break;
4290 #endif
4291
4292     /* when we've already built the next token, just pull it out of the queue */
4293     case LEX_KNOWNEXT:
4294 #ifdef PERL_MAD
4295         PL_lasttoke--;
4296         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4297         if (PL_madskills) {
4298             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4299             PL_nexttoke[PL_lasttoke].next_mad = 0;
4300             if (PL_thismad && PL_thismad->mad_key == '_') {
4301                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4302                 PL_thismad->mad_val = 0;
4303                 mad_free(PL_thismad);
4304                 PL_thismad = 0;
4305             }
4306         }
4307         if (!PL_lasttoke) {
4308             PL_lex_state = PL_lex_defer;
4309             PL_expect = PL_lex_expect;
4310             PL_lex_defer = LEX_NORMAL;
4311             if (!PL_nexttoke[PL_lasttoke].next_type)
4312                 return yylex();
4313         }
4314 #else
4315         PL_nexttoke--;
4316         pl_yylval = PL_nextval[PL_nexttoke];
4317         if (!PL_nexttoke) {
4318             PL_lex_state = PL_lex_defer;
4319             PL_expect = PL_lex_expect;
4320             PL_lex_defer = LEX_NORMAL;
4321         }
4322 #endif
4323         {
4324             I32 next_type;
4325 #ifdef PERL_MAD
4326             next_type = PL_nexttoke[PL_lasttoke].next_type;
4327 #else
4328             next_type = PL_nexttype[PL_nexttoke];
4329 #endif
4330             if (next_type & (7<<24)) {
4331                 if (next_type & (1<<24)) {
4332                     if (PL_lex_brackets > 100)
4333                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4334                     PL_lex_brackstack[PL_lex_brackets++] =
4335                         (next_type >> 16) & 0xff;
4336                 }
4337                 if (next_type & (2<<24))
4338                     PL_lex_allbrackets++;
4339                 if (next_type & (4<<24))
4340                     PL_lex_allbrackets--;
4341                 next_type &= 0xffff;
4342             }
4343 #ifdef PERL_MAD
4344             /* FIXME - can these be merged?  */
4345             return next_type;
4346 #else
4347             return REPORT(next_type);
4348 #endif
4349         }
4350
4351     /* interpolated case modifiers like \L \U, including \Q and \E.
4352        when we get here, PL_bufptr is at the \
4353     */
4354     case LEX_INTERPCASEMOD:
4355 #ifdef DEBUGGING
4356         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4357             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4358 #endif
4359         /* handle \E or end of string */
4360         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4361             /* if at a \E */
4362             if (PL_lex_casemods) {
4363                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4364                 PL_lex_casestack[PL_lex_casemods] = '\0';
4365
4366                 if (PL_bufptr != PL_bufend
4367                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4368                     PL_bufptr += 2;
4369                     PL_lex_state = LEX_INTERPCONCAT;
4370 #ifdef PERL_MAD
4371                     if (PL_madskills)
4372                         PL_thistoken = newSVpvs("\\E");
4373 #endif
4374                 }
4375                 PL_lex_allbrackets--;
4376                 return REPORT(')');
4377             }
4378 #ifdef PERL_MAD
4379             while (PL_bufptr != PL_bufend &&
4380               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4381                 if (!PL_thiswhite)
4382                     PL_thiswhite = newSVpvs("");
4383                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4384                 PL_bufptr += 2;
4385             }
4386 #else
4387             if (PL_bufptr != PL_bufend)
4388                 PL_bufptr += 2;
4389 #endif
4390             PL_lex_state = LEX_INTERPCONCAT;
4391             return yylex();
4392         }
4393         else {
4394             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4395               "### Saw case modifier\n"); });
4396             s = PL_bufptr + 1;
4397             if (s[1] == '\\' && s[2] == 'E') {
4398 #ifdef PERL_MAD
4399                 if (!PL_thiswhite)
4400                     PL_thiswhite = newSVpvs("");
4401                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4402 #endif
4403                 PL_bufptr = s + 3;
4404                 PL_lex_state = LEX_INTERPCONCAT;
4405                 return yylex();
4406             }
4407             else {
4408                 I32 tmp;
4409                 if (!PL_madskills) /* when just compiling don't need correct */
4410                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4411                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4412                 if ((*s == 'L' || *s == 'U') &&
4413                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4414                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4415                     PL_lex_allbrackets--;
4416                     return REPORT(')');
4417                 }
4418                 if (PL_lex_casemods > 10)
4419                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4420                 PL_lex_casestack[PL_lex_casemods++] = *s;
4421                 PL_lex_casestack[PL_lex_casemods] = '\0';
4422                 PL_lex_state = LEX_INTERPCONCAT;
4423                 start_force(PL_curforce);
4424                 NEXTVAL_NEXTTOKE.ival = 0;
4425                 force_next((2<<24)|'(');
4426                 start_force(PL_curforce);
4427                 if (*s == 'l')
4428                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4429                 else if (*s == 'u')
4430                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4431                 else if (*s == 'L')
4432                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4433                 else if (*s == 'U')
4434                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4435                 else if (*s == 'Q')
4436                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4437                 else
4438                     Perl_croak(aTHX_ "panic: yylex");
4439                 if (PL_madskills) {
4440                     SV* const tmpsv = newSVpvs("\\ ");
4441                     /* replace the space with the character we want to escape
4442                      */
4443                     SvPVX(tmpsv)[1] = *s;
4444                     curmad('_', tmpsv);
4445                 }
4446                 PL_bufptr = s + 1;
4447             }
4448             force_next(FUNC);
4449             if (PL_lex_starts) {
4450                 s = PL_bufptr;
4451                 PL_lex_starts = 0;
4452 #ifdef PERL_MAD
4453                 if (PL_madskills) {
4454                     if (PL_thistoken)
4455                         sv_free(PL_thistoken);
4456                     PL_thistoken = newSVpvs("");
4457                 }
4458 #endif
4459                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4460                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4461                     OPERATOR(',');
4462                 else
4463                     Aop(OP_CONCAT);
4464             }
4465             else
4466                 return yylex();
4467         }
4468
4469     case LEX_INTERPPUSH:
4470         return REPORT(sublex_push());
4471
4472     case LEX_INTERPSTART:
4473         if (PL_bufptr == PL_bufend)
4474             return REPORT(sublex_done());
4475         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4476               "### Interpolated variable\n"); });
4477         PL_expect = XTERM;
4478         PL_lex_dojoin = (*PL_bufptr == '@');
4479         PL_lex_state = LEX_INTERPNORMAL;
4480         if (PL_lex_dojoin) {
4481             start_force(PL_curforce);
4482             NEXTVAL_NEXTTOKE.ival = 0;
4483             force_next(',');
4484             start_force(PL_curforce);
4485             force_ident("\"", '$');
4486             start_force(PL_curforce);
4487             NEXTVAL_NEXTTOKE.ival = 0;
4488             force_next('$');
4489             start_force(PL_curforce);
4490             NEXTVAL_NEXTTOKE.ival = 0;
4491             force_next((2<<24)|'(');
4492             start_force(PL_curforce);
4493             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4494             force_next(FUNC);
4495         }
4496         if (PL_lex_starts++) {
4497             s = PL_bufptr;
4498 #ifdef PERL_MAD
4499             if (PL_madskills) {
4500                 if (PL_thistoken)
4501                     sv_free(PL_thistoken);
4502                 PL_thistoken = newSVpvs("");
4503             }
4504 #endif
4505             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4506             if (!PL_lex_casemods && PL_lex_inpat)
4507                 OPERATOR(',');
4508             else
4509                 Aop(OP_CONCAT);
4510         }
4511         return yylex();
4512
4513     case LEX_INTERPENDMAYBE:
4514         if (intuit_more(PL_bufptr)) {
4515             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4516             break;
4517         }
4518         /* FALL THROUGH */
4519
4520     case LEX_INTERPEND:
4521         if (PL_lex_dojoin) {
4522             PL_lex_dojoin = FALSE;
4523             PL_lex_state = LEX_INTERPCONCAT;
4524 #ifdef PERL_MAD
4525             if (PL_madskills) {
4526                 if (PL_thistoken)
4527                     sv_free(PL_thistoken);
4528                 PL_thistoken = newSVpvs("");
4529             }
4530 #endif
4531             PL_lex_allbrackets--;
4532             return REPORT(')');
4533         }
4534         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4535             && SvEVALED(PL_lex_repl))
4536         {
4537             if (PL_bufptr != PL_bufend)
4538                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4539             PL_lex_repl = NULL;
4540         }
4541         /* FALLTHROUGH */
4542     case LEX_INTERPCONCAT:
4543 #ifdef DEBUGGING
4544         if (PL_lex_brackets)
4545             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4546 #endif
4547         if (PL_bufptr == PL_bufend)
4548             return REPORT(sublex_done());
4549
4550         if (SvIVX(PL_linestr) == '\'') {
4551             SV *sv = newSVsv(PL_linestr);
4552             if (!PL_lex_inpat)
4553                 sv = tokeq(sv);
4554             else if ( PL_hints & HINT_NEW_RE )
4555                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4556             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4557             s = PL_bufend;
4558         }
4559         else {
4560             s = scan_const(PL_bufptr);
4561             if (*s == '\\')
4562                 PL_lex_state = LEX_INTERPCASEMOD;
4563             else
4564                 PL_lex_state = LEX_INTERPSTART;
4565         }
4566
4567         if (s != PL_bufptr) {
4568             start_force(PL_curforce);
4569             if (PL_madskills) {
4570                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4571             }
4572             NEXTVAL_NEXTTOKE = pl_yylval;
4573             PL_expect = XTERM;
4574             force_next(THING);
4575             if (PL_lex_starts++) {
4576 #ifdef PERL_MAD
4577                 if (PL_madskills) {
4578                     if (PL_thistoken)
4579                         sv_free(PL_thistoken);
4580                     PL_thistoken = newSVpvs("");
4581                 }
4582 #endif
4583                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4584                 if (!PL_lex_casemods && PL_lex_inpat)
4585                     OPERATOR(',');
4586                 else
4587                     Aop(OP_CONCAT);
4588             }
4589             else {
4590                 PL_bufptr = s;
4591                 return yylex();
4592             }
4593         }
4594
4595         return yylex();
4596     case LEX_FORMLINE:
4597         PL_lex_state = LEX_NORMAL;
4598         s = scan_formline(PL_bufptr);
4599         if (!PL_lex_formbrack)
4600             goto rightbracket;
4601         OPERATOR(';');
4602     }
4603
4604     s = PL_bufptr;
4605     PL_oldoldbufptr = PL_oldbufptr;
4606     PL_oldbufptr = s;
4607
4608   retry:
4609 #ifdef PERL_MAD
4610     if (PL_thistoken) {
4611         sv_free(PL_thistoken);
4612         PL_thistoken = 0;
4613     }
4614     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4615 #endif
4616     switch (*s) {
4617     default:
4618         if (isIDFIRST_lazy_if(s,UTF))
4619             goto keylookup;
4620         {
4621         unsigned char c = *s;
4622         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4623         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4624             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4625         } else {
4626             d = PL_linestart;
4627         }
4628         *s = '\0';
4629         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4630     }
4631     case 4:
4632     case 26:
4633         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4634     case 0:
4635 #ifdef PERL_MAD
4636         if (PL_madskills)
4637             PL_faketokens = 0;
4638 #endif
4639         if (!PL_rsfp) {
4640             PL_last_uni = 0;
4641             PL_last_lop = 0;
4642             if (PL_lex_brackets &&
4643                     PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4644                 yyerror((const char *)
4645                         (PL_lex_formbrack
4646                          ? "Format not terminated"
4647                          : "Missing right curly or square bracket"));
4648             }
4649             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4650                         "### Tokener got EOF\n");
4651             } );
4652             TOKEN(0);
4653         }
4654         if (s++ < PL_bufend)
4655             goto retry;                 /* ignore stray nulls */
4656         PL_last_uni = 0;
4657         PL_last_lop = 0;
4658         if (!PL_in_eval && !PL_preambled) {
4659             PL_preambled = TRUE;
4660 #ifdef PERL_MAD
4661             if (PL_madskills)
4662                 PL_faketokens = 1;
4663 #endif
4664             if (PL_perldb) {
4665                 /* Generate a string of Perl code to load the debugger.
4666                  * If PERL5DB is set, it will return the contents of that,
4667                  * otherwise a compile-time require of perl5db.pl.  */
4668
4669                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4670
4671                 if (pdb) {
4672                     sv_setpv(PL_linestr, pdb);
4673                     sv_catpvs(PL_linestr,";");
4674                 } else {
4675                     SETERRNO(0,SS_NORMAL);
4676                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4677                 }
4678             } else
4679                 sv_setpvs(PL_linestr,"");
4680             if (PL_preambleav) {
4681                 SV **svp = AvARRAY(PL_preambleav);
4682                 SV **const end = svp + AvFILLp(PL_preambleav);
4683                 while(svp <= end) {
4684                     sv_catsv(PL_linestr, *svp);
4685                     ++svp;
4686                     sv_catpvs(PL_linestr, ";");
4687                 }
4688                 sv_free(MUTABLE_SV(PL_preambleav));
4689                 PL_preambleav = NULL;
4690             }
4691             if (PL_minus_E)
4692                 sv_catpvs(PL_linestr,
4693                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4694             if (PL_minus_n || PL_minus_p) {
4695                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4696                 if (PL_minus_l)
4697                     sv_catpvs(PL_linestr,"chomp;");
4698                 if (PL_minus_a) {
4699                     if (PL_minus_F) {
4700                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4701                              || *PL_splitstr == '"')
4702                               && strchr(PL_splitstr + 1, *PL_splitstr))
4703                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4704                         else {
4705                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4706                                bytes can be used as quoting characters.  :-) */
4707                             const char *splits = PL_splitstr;
4708                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4709                             do {
4710                                 /* Need to \ \s  */
4711                                 if (*splits == '\\')
4712                                     sv_catpvn(PL_linestr, splits, 1);
4713                                 sv_catpvn(PL_linestr, splits, 1);
4714                             } while (*splits++);
4715                             /* This loop will embed the trailing NUL of
4716                                PL_linestr as the last thing it does before
4717                                terminating.  */
4718                             sv_catpvs(PL_linestr, ");");
4719                         }
4720                     }
4721                     else
4722                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4723                 }
4724             }
4725             sv_catpvs(PL_linestr, "\n");
4726             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4727             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4728             PL_last_lop = PL_last_uni = NULL;
4729             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4730                 update_debugger_info(PL_linestr, NULL, 0);
4731             goto retry;
4732         }
4733         do {
4734             fake_eof = 0;
4735             bof = PL_rsfp ? TRUE : FALSE;
4736             if (0) {
4737               fake_eof:
4738                 fake_eof = LEX_FAKE_EOF;
4739             }
4740             PL_bufptr = PL_bufend;
4741             CopLINE_inc(PL_curcop);
4742             if (!lex_next_chunk(fake_eof)) {
4743                 CopLINE_dec(PL_curcop);
4744                 s = PL_bufptr;
4745                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4746             }
4747             CopLINE_dec(PL_curcop);
4748 #ifdef PERL_MAD
4749             if (!PL_rsfp)
4750                 PL_realtokenstart = -1;
4751 #endif
4752             s = PL_bufptr;
4753             /* If it looks like the start of a BOM or raw UTF-16,
4754              * check if it in fact is. */
4755             if (bof && PL_rsfp &&
4756                      (*s == 0 ||
4757                       *(U8*)s == 0xEF ||
4758                       *(U8*)s >= 0xFE ||
4759                       s[1] == 0)) {
4760                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4761                 if (bof) {
4762                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4763                     s = swallow_bom((U8*)s);
4764                 }
4765             }
4766             if (PL_parser->in_pod) {
4767                 /* Incest with pod. */
4768 #ifdef PERL_MAD
4769                 if (PL_madskills)
4770                     sv_catsv(PL_thiswhite, PL_linestr);
4771 #endif
4772                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4773                     sv_setpvs(PL_linestr, "");
4774                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4775                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4776                     PL_last_lop = PL_last_uni = NULL;
4777                     PL_parser->in_pod = 0;
4778                 }
4779             }
4780             if (PL_rsfp)
4781                 incline(s);
4782         } while (PL_parser->in_pod);
4783         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4784         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4785         PL_last_lop = PL_last_uni = NULL;
4786         if (CopLINE(PL_curcop) == 1) {
4787             while (s < PL_bufend && isSPACE(*s))
4788                 s++;
4789             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4790                 s++;
4791 #ifdef PERL_MAD
4792             if (PL_madskills)
4793                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4794 #endif
4795             d = NULL;
4796             if (!PL_in_eval) {
4797                 if (*s == '#' && *(s+1) == '!')
4798                     d = s + 2;
4799 #ifdef ALTERNATE_SHEBANG
4800                 else {
4801                     static char const as[] = ALTERNATE_SHEBANG;
4802                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4803                         d = s + (sizeof(as) - 1);
4804                 }
4805 #endif /* ALTERNATE_SHEBANG */
4806             }
4807             if (d) {
4808                 char *ipath;
4809                 char *ipathend;
4810
4811                 while (isSPACE(*d))
4812                     d++;
4813                 ipath = d;
4814                 while (*d && !isSPACE(*d))
4815                     d++;
4816                 ipathend = d;
4817
4818 #ifdef ARG_ZERO_IS_SCRIPT
4819                 if (ipathend > ipath) {
4820                     /*
4821                      * HP-UX (at least) sets argv[0] to the script name,
4822                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4823                      * at least, set argv[0] to the basename of the Perl
4824                      * interpreter. So, having found "#!", we'll set it right.
4825                      */
4826                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4827                                                     SVt_PV)); /* $^X */
4828                     assert(SvPOK(x) || SvGMAGICAL(x));
4829                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4830                         sv_setpvn(x, ipath, ipathend - ipath);
4831                         SvSETMAGIC(x);
4832                     }
4833                     else {
4834                         STRLEN blen;
4835                         STRLEN llen;
4836                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4837                         const char * const lstart = SvPV_const(x,llen);
4838                         if (llen < blen) {
4839                             bstart += blen - llen;
4840                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4841                                 sv_setpvn(x, ipath, ipathend - ipath);
4842                                 SvSETMAGIC(x);
4843                             }
4844                         }
4845                     }
4846                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4847                 }
4848 #endif /* ARG_ZERO_IS_SCRIPT */
4849
4850                 /*
4851                  * Look for options.
4852                  */
4853                 d = instr(s,"perl -");
4854                 if (!d) {
4855                     d = instr(s,"perl");
4856 #if defined(DOSISH)
4857                     /* avoid getting into infinite loops when shebang
4858                      * line contains "Perl" rather than "perl" */
4859                     if (!d) {
4860                         for (d = ipathend-4; d >= ipath; --d) {
4861                             if ((*d == 'p' || *d == 'P')
4862                                 && !ibcmp(d, "perl", 4))
4863                             {
4864                                 break;
4865                             }
4866                         }
4867                         if (d < ipath)
4868                             d = NULL;
4869                     }
4870 #endif
4871                 }
4872 #ifdef ALTERNATE_SHEBANG
4873                 /*
4874                  * If the ALTERNATE_SHEBANG on this system starts with a
4875                  * character that can be part of a Perl expression, then if
4876                  * we see it but not "perl", we're probably looking at the
4877                  * start of Perl code, not a request to hand off to some
4878                  * other interpreter.  Similarly, if "perl" is there, but
4879                  * not in the first 'word' of the line, we assume the line
4880                  * contains the start of the Perl program.
4881                  */
4882                 if (d && *s != '#') {
4883                     const char *c = ipath;
4884                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4885                         c++;
4886                     if (c < d)
4887                         d = NULL;       /* "perl" not in first word; ignore */
4888                     else
4889                         *s = '#';       /* Don't try to parse shebang line */
4890                 }
4891 #endif /* ALTERNATE_SHEBANG */
4892                 if (!d &&
4893                     *s == '#' &&
4894                     ipathend > ipath &&
4895                     !PL_minus_c &&
4896                     !instr(s,"indir") &&
4897                     instr(PL_origargv[0],"perl"))
4898                 {
4899                     dVAR;
4900                     char **newargv;
4901
4902                     *ipathend = '\0';
4903                     s = ipathend + 1;
4904                     while (s < PL_bufend && isSPACE(*s))
4905                         s++;
4906                     if (s < PL_bufend) {
4907                         Newx(newargv,PL_origargc+3,char*);
4908                         newargv[1] = s;
4909                         while (s < PL_bufend && !isSPACE(*s))
4910                             s++;
4911                         *s = '\0';
4912                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4913                     }
4914                     else
4915                         newargv = PL_origargv;
4916                     newargv[0] = ipath;
4917                     PERL_FPU_PRE_EXEC
4918                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4919                     PERL_FPU_POST_EXEC
4920                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4921                 }
4922                 if (d) {
4923                     while (*d && !isSPACE(*d))
4924                         d++;
4925                     while (SPACE_OR_TAB(*d))
4926                         d++;
4927
4928                     if (*d++ == '-') {
4929                         const bool switches_done = PL_doswitches;
4930                         const U32 oldpdb = PL_perldb;
4931                         const bool oldn = PL_minus_n;
4932                         const bool oldp = PL_minus_p;
4933                         const char *d1 = d;
4934
4935                         do {
4936                             bool baduni = FALSE;
4937                             if (*d1 == 'C') {
4938                                 const char *d2 = d1 + 1;
4939                                 if (parse_unicode_opts((const char **)&d2)
4940                                     != PL_unicode)
4941                                     baduni = TRUE;
4942                             }
4943                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4944                                 const char * const m = d1;
4945                                 while (*d1 && !isSPACE(*d1))
4946                                     d1++;
4947                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4948                                       (int)(d1 - m), m);
4949                             }
4950                             d1 = moreswitches(d1);
4951                         } while (d1);
4952                         if (PL_doswitches && !switches_done) {
4953                             int argc = PL_origargc;
4954                             char **argv = PL_origargv;
4955                             do {
4956                                 argc--,argv++;
4957                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4958                             init_argv_symbols(argc,argv);
4959                         }
4960                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4961                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4962                               /* if we have already added "LINE: while (<>) {",
4963                                  we must not do it again */
4964                         {
4965                             sv_setpvs(PL_linestr, "");
4966                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4967                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4968                             PL_last_lop = PL_last_uni = NULL;
4969                             PL_preambled = FALSE;
4970                             if (PERLDB_LINE || PERLDB_SAVESRC)
4971                                 (void)gv_fetchfile(PL_origfilename);
4972                             goto retry;
4973                         }
4974                     }
4975                 }
4976             }
4977         }
4978         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4979             PL_bufptr = s;
4980             PL_lex_state = LEX_FORMLINE;
4981             return yylex();
4982         }
4983         goto retry;
4984     case '\r':
4985 #ifdef PERL_STRICT_CR
4986         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4987         Perl_croak(aTHX_
4988       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4989 #endif
4990     case ' ': case '\t': case '\f': case 013:
4991 #ifdef PERL_MAD
4992         PL_realtokenstart = -1;
4993         if (!PL_thiswhite)
4994             PL_thiswhite = newSVpvs("");
4995         sv_catpvn(PL_thiswhite, s, 1);
4996 #endif
4997         s++;
4998         goto retry;
4999     case '#':
5000     case '\n':
5001 #ifdef PERL_MAD
5002         PL_realtokenstart = -1;
5003         if (PL_madskills)
5004             PL_faketokens = 0;
5005 #endif
5006         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
5007             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5008                 /* handle eval qq[#line 1 "foo"\n ...] */
5009                 CopLINE_dec(PL_curcop);
5010                 incline(s);
5011             }
5012             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5013                 s = SKIPSPACE0(s);
5014                 if (!PL_in_eval || PL_rsfp)
5015                     incline(s);
5016             }
5017             else {
5018                 d = s;
5019                 while (d < PL_bufend && *d != '\n')
5020                     d++;
5021                 if (d < PL_bufend)
5022                     d++;
5023                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5024                   Perl_croak(aTHX_ "panic: input overflow");
5025 #ifdef PERL_MAD
5026                 if (PL_madskills)
5027                     PL_thiswhite = newSVpvn(s, d - s);
5028 #endif
5029                 s = d;
5030                 incline(s);
5031             }
5032             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5033                 PL_bufptr = s;
5034                 PL_lex_state = LEX_FORMLINE;
5035                 return yylex();
5036             }
5037         }
5038         else {
5039 #ifdef PERL_MAD
5040             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5041                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5042                     PL_faketokens = 0;
5043                     s = SKIPSPACE0(s);
5044                     TOKEN(PEG); /* make sure any #! line is accessible */
5045                 }
5046                 s = SKIPSPACE0(s);
5047             }
5048             else {
5049 /*              if (PL_madskills && PL_lex_formbrack) { */
5050                     d = s;
5051                     while (d < PL_bufend && *d != '\n')
5052                         d++;
5053                     if (d < PL_bufend)
5054                         d++;
5055                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5056                       Perl_croak(aTHX_ "panic: input overflow");
5057                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5058                         if (!PL_thiswhite)
5059                             PL_thiswhite = newSVpvs("");
5060                         if (CopLINE(PL_curcop) == 1) {
5061                             sv_setpvs(PL_thiswhite, "");
5062                             PL_faketokens = 0;
5063                         }
5064                         sv_catpvn(PL_thiswhite, s, d - s);
5065                     }
5066                     s = d;
5067 /*              }
5068                 *s = '\0';
5069                 PL_bufend = s; */
5070             }
5071 #else
5072             *s = '\0';
5073             PL_bufend = s;
5074 #endif
5075         }
5076         goto retry;
5077     case '-':
5078         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5079             I32 ftst = 0;
5080             char tmp;
5081
5082             s++;
5083             PL_bufptr = s;
5084             tmp = *s++;
5085
5086             while (s < PL_bufend && SPACE_OR_TAB(*s))
5087                 s++;
5088
5089             if (strnEQ(s,"=>",2)) {
5090                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5091                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5092                 OPERATOR('-');          /* unary minus */
5093             }
5094             PL_last_uni = PL_oldbufptr;
5095             switch (tmp) {
5096             case 'r': ftst = OP_FTEREAD;        break;
5097             case 'w': ftst = OP_FTEWRITE;       break;
5098             case 'x': ftst = OP_FTEEXEC;        break;
5099             case 'o': ftst = OP_FTEOWNED;       break;
5100             case 'R': ftst = OP_FTRREAD;        break;
5101             case 'W': ftst = OP_FTRWRITE;       break;
5102             case 'X': ftst = OP_FTREXEC;        break;
5103             case 'O': ftst = OP_FTROWNED;       break;
5104             case 'e': ftst = OP_FTIS;           break;
5105             case 'z': ftst = OP_FTZERO;         break;
5106             case 's': ftst = OP_FTSIZE;         break;
5107             case 'f': ftst = OP_FTFILE;         break;
5108             case 'd': ftst = OP_FTDIR;          break;
5109             case 'l': ftst = OP_FTLINK;         break;
5110             case 'p': ftst = OP_FTPIPE;         break;
5111             case 'S': ftst = OP_FTSOCK;         break;
5112             case 'u': ftst = OP_FTSUID;         break;
5113             case 'g': ftst = OP_FTSGID;         break;
5114             case 'k': ftst = OP_FTSVTX;         break;
5115             case 'b': ftst = OP_FTBLK;          break;
5116             case 'c': ftst = OP_FTCHR;          break;
5117             case 't': ftst = OP_FTTTY;          break;
5118             case 'T': ftst = OP_FTTEXT;         break;
5119             case 'B': ftst = OP_FTBINARY;       break;
5120             case 'M': case 'A': case 'C':
5121                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5122                 switch (tmp) {
5123                 case 'M': ftst = OP_FTMTIME;    break;
5124                 case 'A': ftst = OP_FTATIME;    break;
5125                 case 'C': ftst = OP_FTCTIME;    break;
5126                 default:                        break;
5127                 }
5128                 break;
5129             default:
5130                 break;
5131             }
5132             if (ftst) {
5133                 PL_last_lop_op = (OPCODE)ftst;
5134                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5135                         "### Saw file test %c\n", (int)tmp);
5136                 } );
5137                 FTST(ftst);
5138             }
5139             else {
5140                 /* Assume it was a minus followed by a one-letter named
5141                  * subroutine call (or a -bareword), then. */
5142                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5143                         "### '-%c' looked like a file test but was not\n",
5144                         (int) tmp);
5145                 } );
5146                 s = --PL_bufptr;
5147             }
5148         }
5149         {
5150             const char tmp = *s++;
5151             if (*s == tmp) {
5152                 s++;
5153                 if (PL_expect == XOPERATOR)
5154                     TERM(POSTDEC);
5155                 else
5156                     OPERATOR(PREDEC);
5157             }
5158             else if (*s == '>') {
5159                 s++;
5160                 s = SKIPSPACE1(s);
5161                 if (isIDFIRST_lazy_if(s,UTF)) {
5162                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5163                     TOKEN(ARROW);
5164                 }
5165                 else if (*s == '$')
5166                     OPERATOR(ARROW);
5167                 else
5168                     TERM(ARROW);
5169             }
5170             if (PL_expect == XOPERATOR) {
5171                 if (*s == '=' && !PL_lex_allbrackets &&
5172                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5173                     s--;
5174                     TOKEN(0);
5175                 }
5176                 Aop(OP_SUBTRACT);
5177             }
5178             else {
5179                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5180                     check_uni();
5181                 OPERATOR('-');          /* unary minus */
5182             }
5183         }
5184
5185     case '+':
5186         {
5187             const char tmp = *s++;
5188             if (*s == tmp) {
5189                 s++;
5190                 if (PL_expect == XOPERATOR)
5191                     TERM(POSTINC);
5192                 else
5193                     OPERATOR(PREINC);
5194             }
5195             if (PL_expect == XOPERATOR) {
5196                 if (*s == '=' && !PL_lex_allbrackets &&
5197                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5198                     s--;
5199                     TOKEN(0);
5200                 }
5201                 Aop(OP_ADD);
5202             }
5203             else {
5204                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5205                     check_uni();
5206                 OPERATOR('+');
5207             }
5208         }
5209
5210     case '*':
5211         if (PL_expect != XOPERATOR) {
5212             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5213             PL_expect = XOPERATOR;
5214             force_ident(PL_tokenbuf, '*');
5215             if (!*PL_tokenbuf)
5216                 PREREF('*');
5217             TERM('*');
5218         }
5219         s++;
5220         if (*s == '*') {
5221             s++;
5222             if (*s == '=' && !PL_lex_allbrackets &&
5223                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5224                 s -= 2;
5225                 TOKEN(0);
5226             }
5227             PWop(OP_POW);
5228         }
5229         if (*s == '=' && !PL_lex_allbrackets &&
5230                 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5231             s--;
5232             TOKEN(0);
5233         }
5234         Mop(OP_MULTIPLY);
5235
5236     case '%':
5237         if (PL_expect == XOPERATOR) {
5238             if (s[1] == '=' && !PL_lex_allbrackets &&
5239                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5240                 TOKEN(0);
5241             ++s;
5242             Mop(OP_MODULO);
5243         }
5244         PL_tokenbuf[0] = '%';
5245         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5246                 sizeof PL_tokenbuf - 1, FALSE);
5247         if (!PL_tokenbuf[1]) {
5248             PREREF('%');
5249         }
5250         PL_pending_ident = '%';
5251         TERM('%');
5252
5253     case '^':
5254         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5255                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5256             TOKEN(0);
5257         s++;
5258         BOop(OP_BIT_XOR);
5259     case '[':
5260         if (PL_lex_brackets > 100)
5261             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5262         PL_lex_brackstack[PL_lex_brackets++] = 0;
5263         PL_lex_allbrackets++;
5264         {
5265             const char tmp = *s++;
5266             OPERATOR(tmp);
5267         }
5268     case '~':
5269         if (s[1] == '~'
5270             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5271         {
5272             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5273                 TOKEN(0);
5274             s += 2;
5275             Eop(OP_SMARTMATCH);
5276         }
5277         s++;
5278         OPERATOR('~');
5279     case ',':
5280         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5281             TOKEN(0);
5282         s++;
5283         OPERATOR(',');
5284     case ':':
5285         if (s[1] == ':') {
5286             len = 0;
5287             goto just_a_word_zero_gv;
5288         }
5289         s++;
5290         switch (PL_expect) {
5291             OP *attrs;
5292 #ifdef PERL_MAD
5293             I32 stuffstart;
5294 #endif
5295         case XOPERATOR:
5296             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5297                 break;
5298             PL_bufptr = s;      /* update in case we back off */
5299             if (*s == '=') {
5300                 Perl_croak(aTHX_
5301                            "Use of := for an empty attribute list is not allowed");
5302             }
5303             goto grabattrs;
5304         case XATTRBLOCK:
5305             PL_expect = XBLOCK;
5306             goto grabattrs;
5307         case XATTRTERM:
5308             PL_expect = XTERMBLOCK;
5309          grabattrs:
5310 #ifdef PERL_MAD
5311             stuffstart = s - SvPVX(PL_linestr) - 1;
5312 #endif
5313             s = PEEKSPACE(s);
5314             attrs = NULL;
5315             while (isIDFIRST_lazy_if(s,UTF)) {
5316                 I32 tmp;
5317                 SV *sv;
5318                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5319                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5320                     if (tmp < 0) tmp = -tmp;
5321                     switch (tmp) {
5322                     case KEY_or:
5323                     case KEY_and:
5324                     case KEY_for:
5325                     case KEY_foreach:
5326                     case KEY_unless:
5327                     case KEY_if:
5328                     case KEY_while:
5329                     case KEY_until:
5330                         goto got_attrs;
5331                     default:
5332                         break;
5333                     }
5334                 }
5335                 sv = newSVpvn(s, len);
5336                 if (*d == '(') {
5337                     d = scan_str(d,TRUE,TRUE);
5338                     if (!d) {
5339                         /* MUST advance bufptr here to avoid bogus
5340                            "at end of line" context messages from yyerror().
5341                          */
5342                         PL_bufptr = s + len;
5343                         yyerror("Unterminated attribute parameter in attribute list");
5344                         if (attrs)
5345                             op_free(attrs);
5346                         sv_free(sv);
5347                         return REPORT(0);       /* EOF indicator */
5348                     }
5349                 }
5350                 if (PL_lex_stuff) {
5351                     sv_catsv(sv, PL_lex_stuff);
5352                     attrs = op_append_elem(OP_LIST, attrs,
5353                                         newSVOP(OP_CONST, 0, sv));
5354                     SvREFCNT_dec(PL_lex_stuff);
5355                     PL_lex_stuff = NULL;
5356                 }
5357                 else {
5358                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5359                         sv_free(sv);
5360                         if (PL_in_my == KEY_our) {
5361                             deprecate(":unique");
5362                         }
5363                         else
5364                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5365                     }
5366
5367                     /* NOTE: any CV attrs applied here need to be part of
5368                        the CVf_BUILTIN_ATTRS define in cv.h! */
5369                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5370                         sv_free(sv);
5371                         CvLVALUE_on(PL_compcv);
5372                     }
5373                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5374                         sv_free(sv);
5375                         deprecate(":locked");
5376                     }
5377                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5378                         sv_free(sv);
5379                         CvMETHOD_on(PL_compcv);
5380                     }
5381                     /* After we've set the flags, it could be argued that
5382                        we don't need to do the attributes.pm-based setting
5383                        process, and shouldn't bother appending recognized
5384                        flags.  To experiment with that, uncomment the
5385                        following "else".  (Note that's already been
5386                        uncommented.  That keeps the above-applied built-in
5387                        attributes from being intercepted (and possibly
5388                        rejected) by a package's attribute routines, but is
5389                        justified by the performance win for the common case
5390                        of applying only built-in attributes.) */
5391                     else
5392                         attrs = op_append_elem(OP_LIST, attrs,
5393                                             newSVOP(OP_CONST, 0,
5394                                                     sv));
5395                 }
5396                 s = PEEKSPACE(d);
5397                 if (*s == ':' && s[1] != ':')
5398                     s = PEEKSPACE(s+1);
5399                 else if (s == d)
5400                     break;      /* require real whitespace or :'s */
5401                 /* XXX losing whitespace on sequential attributes here */
5402             }
5403             {
5404                 const char tmp
5405                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5406                 if (*s != ';' && *s != '}' && *s != tmp
5407                     && (tmp != '=' || *s != ')')) {
5408                     const char q = ((*s == '\'') ? '"' : '\'');
5409                     /* If here for an expression, and parsed no attrs, back
5410                        off. */
5411                     if (tmp == '=' && !attrs) {
5412                         s = PL_bufptr;
5413                         break;
5414                     }
5415                     /* MUST advance bufptr here to avoid bogus "at end of line"
5416                        context messages from yyerror().
5417                     */
5418                     PL_bufptr = s;
5419                     yyerror( (const char *)
5420                              (*s
5421                               ? Perl_form(aTHX_ "Invalid separator character "
5422                                           "%c%c%c in attribute list", q, *s, q)
5423                               : "Unterminated attribute list" ) );
5424                     if (attrs)
5425                         op_free(attrs);
5426                     OPERATOR(':');
5427                 }
5428             }
5429         got_attrs:
5430             if (attrs) {
5431                 start_force(PL_curforce);
5432                 NEXTVAL_NEXTTOKE.opval = attrs;
5433                 CURMAD('_', PL_nextwhite);
5434                 force_next(THING);
5435             }
5436 #ifdef PERL_MAD
5437             if (PL_madskills) {
5438                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5439                                      (s - SvPVX(PL_linestr)) - stuffstart);
5440             }
5441 #endif
5442             TOKEN(COLONATTR);
5443         }
5444         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5445             s--;
5446             TOKEN(0);
5447         }
5448         PL_lex_allbrackets--;
5449         OPERATOR(':');
5450     case '(':
5451         s++;
5452         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5453             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5454         else
5455             PL_expect = XTERM;
5456         s = SKIPSPACE1(s);
5457         PL_lex_allbrackets++;
5458         TOKEN('(');
5459     case ';':
5460         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5461             TOKEN(0);
5462         CLINE;
5463         s++;
5464         OPERATOR(';');
5465     case ')':
5466         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5467             TOKEN(0);
5468         s++;
5469         PL_lex_allbrackets--;
5470         s = SKIPSPACE1(s);
5471         if (*s == '{')
5472             PREBLOCK(')');
5473         TERM(')');
5474     case ']':
5475         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5476             TOKEN(0);
5477         s++;
5478         if (PL_lex_brackets <= 0)
5479             yyerror("Unmatched right square bracket");
5480         else
5481             --PL_lex_brackets;
5482         PL_lex_allbrackets--;
5483         if (PL_lex_state == LEX_INTERPNORMAL) {
5484             if (PL_lex_brackets == 0) {
5485                 if (*s == '-' && s[1] == '>')
5486                     PL_lex_state = LEX_INTERPENDMAYBE;
5487                 else if (*s != '[' && *s != '{')
5488                     PL_lex_state = LEX_INTERPEND;
5489             }
5490         }
5491         TERM(']');
5492     case '{':
5493       leftbracket:
5494         s++;
5495         if (PL_lex_brackets > 100) {
5496             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5497         }
5498         switch (PL_expect) {
5499         case XTERM:
5500             if (PL_lex_formbrack) {
5501                 s--;
5502                 PRETERMBLOCK(DO);
5503             }
5504             if (PL_oldoldbufptr == PL_last_lop)
5505                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5506             else
5507                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5508             PL_lex_allbrackets++;
5509             OPERATOR(HASHBRACK);
5510         case XOPERATOR:
5511             while (s < PL_bufend && SPACE_OR_TAB(*s))
5512                 s++;
5513             d = s;
5514             PL_tokenbuf[0] = '\0';
5515             if (d < PL_bufend && *d == '-') {
5516                 PL_tokenbuf[0] = '-';
5517                 d++;
5518                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5519                     d++;
5520             }
5521             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5522                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5523                               FALSE, &len);
5524                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5525                     d++;
5526                 if (*d == '}') {
5527                     const char minus = (PL_tokenbuf[0] == '-');
5528                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5529                     if (minus)
5530                         force_next('-');
5531                 }
5532             }
5533             /* FALL THROUGH */
5534         case XATTRBLOCK:
5535         case XBLOCK:
5536             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5537             PL_lex_allbrackets++;
5538             PL_expect = XSTATE;
5539             break;
5540         case XATTRTERM:
5541         case XTERMBLOCK:
5542             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5543             PL_lex_allbrackets++;
5544             PL_expect = XSTATE;
5545             break;
5546         default: {
5547                 const char *t;
5548                 if (PL_oldoldbufptr == PL_last_lop)
5549                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5550                 else
5551                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5552                 PL_lex_allbrackets++;
5553                 s = SKIPSPACE1(s);
5554                 if (*s == '}') {
5555                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5556                         PL_expect = XTERM;
5557                         /* This hack is to get the ${} in the message. */
5558                         PL_bufptr = s+1;
5559                         yyerror("syntax error");
5560                         break;
5561                     }
5562                     OPERATOR(HASHBRACK);
5563                 }
5564                 /* This hack serves to disambiguate a pair of curlies
5565                  * as being a block or an anon hash.  Normally, expectation
5566                  * determines that, but in cases where we're not in a
5567                  * position to expect anything in particular (like inside
5568                  * eval"") we have to resolve the ambiguity.  This code
5569                  * covers the case where the first term in the curlies is a
5570                  * quoted string.  Most other cases need to be explicitly
5571                  * disambiguated by prepending a "+" before the opening
5572                  * curly in order to force resolution as an anon hash.
5573                  *
5574                  * XXX should probably propagate the outer expectation
5575                  * into eval"" to rely less on this hack, but that could
5576                  * potentially break current behavior of eval"".
5577                  * GSAR 97-07-21
5578                  */
5579                 t = s;
5580                 if (*s == '\'' || *s == '"' || *s == '`') {
5581                     /* common case: get past first string, handling escapes */
5582                     for (t++; t < PL_bufend && *t != *s;)
5583                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5584                             t++;
5585                     t++;
5586                 }
5587                 else if (*s == 'q') {
5588                     if (++t < PL_bufend
5589                         && (!isALNUM(*t)
5590                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5591                                 && !isALNUM(*t))))
5592                     {
5593                         /* skip q//-like construct */
5594                         const char *tmps;
5595                         char open, close, term;
5596                         I32 brackets = 1;
5597
5598                         while (t < PL_bufend && isSPACE(*t))
5599                             t++;
5600                         /* check for q => */
5601                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5602                             OPERATOR(HASHBRACK);
5603                         }
5604                         term = *t;
5605                         open = term;
5606                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5607                             term = tmps[5];
5608                         close = term;
5609                         if (open == close)
5610                             for (t++; t < PL_bufend; t++) {
5611                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5612                                     t++;
5613                                 else if (*t == open)
5614                                     break;
5615                             }
5616                         else {
5617                             for (t++; t < PL_bufend; t++) {
5618                                 if (*t == '\\' && t+1 < PL_bufend)
5619                                     t++;
5620                                 else if (*t == close && --brackets <= 0)
5621                                     break;
5622                                 else if (*t == open)
5623                                     brackets++;
5624                             }
5625                         }
5626                         t++;
5627                     }
5628                     else
5629                         /* skip plain q word */
5630                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5631                              t += UTF8SKIP(t);
5632                 }
5633                 else if (isALNUM_lazy_if(t,UTF)) {
5634                     t += UTF8SKIP(t);
5635                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5636                          t += UTF8SKIP(t);
5637                 }
5638                 while (t < PL_bufend && isSPACE(*t))
5639                     t++;
5640                 /* if comma follows first term, call it an anon hash */
5641                 /* XXX it could be a comma expression with loop modifiers */
5642                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5643                                    || (*t == '=' && t[1] == '>')))
5644                     OPERATOR(HASHBRACK);
5645                 if (PL_expect == XREF)
5646                     PL_expect = XTERM;
5647                 else {
5648                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5649                     PL_expect = XSTATE;
5650                 }
5651             }
5652             break;
5653         }
5654         pl_yylval.ival = CopLINE(PL_curcop);
5655         if (isSPACE(*s) || *s == '#')
5656             PL_copline = NOLINE;   /* invalidate current command line number */
5657         TOKEN('{');
5658     case '}':
5659         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5660             TOKEN(0);
5661       rightbracket:
5662         s++;
5663         if (PL_lex_brackets <= 0)
5664             yyerror("Unmatched right curly bracket");
5665         else
5666             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5667         PL_lex_allbrackets--;
5668         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5669             PL_lex_formbrack = 0;
5670         if (PL_lex_state == LEX_INTERPNORMAL) {
5671             if (PL_lex_brackets == 0) {
5672                 if (PL_expect & XFAKEBRACK) {
5673                     PL_expect &= XENUMMASK;
5674                     PL_lex_state = LEX_INTERPEND;
5675                     PL_bufptr = s;
5676 #if 0
5677                     if (PL_madskills) {
5678                         if (!PL_thiswhite)
5679                             PL_thiswhite = newSVpvs("");
5680                         sv_catpvs(PL_thiswhite,"}");
5681                     }
5682 #endif
5683                     return yylex();     /* ignore fake brackets */
5684                 }
5685                 if (*s == '-' && s[1] == '>')
5686                     PL_lex_state = LEX_INTERPENDMAYBE;
5687                 else if (*s != '[' && *s != '{')
5688                     PL_lex_state = LEX_INTERPEND;
5689             }
5690         }
5691         if (PL_expect & XFAKEBRACK) {
5692             PL_expect &= XENUMMASK;
5693             PL_bufptr = s;
5694             return yylex();             /* ignore fake brackets */
5695         }
5696         start_force(PL_curforce);
5697         if (PL_madskills) {
5698             curmad('X', newSVpvn(s-1,1));
5699             CURMAD('_', PL_thiswhite);
5700         }
5701         force_next('}');
5702 #ifdef PERL_MAD
5703         if (!PL_thistoken)
5704             PL_thistoken = newSVpvs("");
5705 #endif
5706         TOKEN(';');
5707     case '&':
5708         s++;
5709         if (*s++ == '&') {
5710             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5711                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5712                 s -= 2;
5713                 TOKEN(0);
5714             }
5715             AOPERATOR(ANDAND);
5716         }
5717         s--;
5718         if (PL_expect == XOPERATOR) {
5719             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5720                 && isIDFIRST_lazy_if(s,UTF))
5721             {
5722                 CopLINE_dec(PL_curcop);
5723                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5724                 CopLINE_inc(PL_curcop);
5725             }
5726             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5727                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5728                 s--;
5729                 TOKEN(0);
5730             }
5731             BAop(OP_BIT_AND);
5732         }
5733
5734         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5735         if (*PL_tokenbuf) {
5736             PL_expect = XOPERATOR;
5737             force_ident(PL_tokenbuf, '&');
5738         }
5739         else
5740             PREREF('&');
5741         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5742         TERM('&');
5743
5744     case '|':
5745         s++;
5746         if (*s++ == '|') {
5747             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5748                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5749                 s -= 2;
5750                 TOKEN(0);
5751             }
5752             AOPERATOR(OROR);
5753         }
5754         s--;
5755         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5756                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5757             s--;
5758             TOKEN(0);
5759         }
5760         BOop(OP_BIT_OR);
5761     case '=':
5762         s++;
5763         {
5764             const char tmp = *s++;
5765             if (tmp == '=') {
5766                 if (!PL_lex_allbrackets &&
5767                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5768                     s -= 2;
5769                     TOKEN(0);
5770                 }
5771                 Eop(OP_EQ);
5772             }
5773             if (tmp == '>') {
5774                 if (!PL_lex_allbrackets &&
5775                         PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5776                     s -= 2;
5777                     TOKEN(0);
5778                 }
5779                 OPERATOR(',');
5780             }
5781             if (tmp == '~')
5782                 PMop(OP_MATCH);
5783             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5784                 && strchr("+-*/%.^&|<",tmp))
5785                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5786                             "Reversed %c= operator",(int)tmp);
5787             s--;
5788             if (PL_expect == XSTATE && isALPHA(tmp) &&
5789                 (s == PL_linestart+1 || s[-2] == '\n') )
5790                 {
5791                     if (PL_in_eval && !PL_rsfp) {
5792                         d = PL_bufend;
5793                         while (s < d) {
5794                             if (*s++ == '\n') {
5795                                 incline(s);
5796                                 if (strnEQ(s,"=cut",4)) {
5797                                     s = strchr(s,'\n');
5798                                     if (s)
5799                                         s++;
5800                                     else
5801                                         s = d;
5802                                     incline(s);
5803                                     goto retry;
5804                                 }
5805                             }
5806                         }
5807                         goto retry;
5808                     }
5809 #ifdef PERL_MAD
5810                     if (PL_madskills) {
5811                         if (!PL_thiswhite)
5812                             PL_thiswhite = newSVpvs("");
5813                         sv_catpvn(PL_thiswhite, PL_linestart,
5814                                   PL_bufend - PL_linestart);
5815                     }
5816 #endif
5817                     s = PL_bufend;
5818                     PL_parser->in_pod = 1;
5819                     goto retry;
5820                 }
5821         }
5822         if (PL_lex_brackets < PL_lex_formbrack) {
5823             const char *t = s;
5824 #ifdef PERL_STRICT_CR
5825             while (SPACE_OR_TAB(*t))
5826 #else
5827             while (SPACE_OR_TAB(*t) || *t == '\r')
5828 #endif
5829                 t++;
5830             if (*t == '\n' || *t == '#') {
5831                 s--;
5832                 PL_expect = XBLOCK;
5833                 goto leftbracket;
5834             }
5835         }
5836         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5837             s--;
5838             TOKEN(0);
5839         }
5840         pl_yylval.ival = 0;
5841         OPERATOR(ASSIGNOP);
5842     case '!':
5843         s++;
5844         {
5845             const char tmp = *s++;
5846             if (tmp == '=') {
5847                 /* was this !=~ where !~ was meant?
5848                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5849
5850                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5851                     const char *t = s+1;
5852
5853                     while (t < PL_bufend && isSPACE(*t))
5854                         ++t;
5855
5856                     if (*t == '/' || *t == '?' ||
5857                         ((*t == 'm' || *t == 's' || *t == 'y')
5858                          && !isALNUM(t[1])) ||
5859                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5860                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5861                                     "!=~ should be !~");
5862                 }
5863                 if (!PL_lex_allbrackets &&
5864                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5865                     s -= 2;
5866                     TOKEN(0);
5867                 }
5868                 Eop(OP_NE);
5869             }
5870             if (tmp == '~')
5871                 PMop(OP_NOT);
5872         }
5873         s--;
5874         OPERATOR('!');
5875     case '<':
5876         if (PL_expect != XOPERATOR) {
5877             if (s[1] != '<' && !strchr(s,'>'))
5878                 check_uni();
5879             if (s[1] == '<')
5880                 s = scan_heredoc(s);
5881             else
5882                 s = scan_inputsymbol(s);
5883             TERM(sublex_start());
5884         }
5885         s++;
5886         {
5887             char tmp = *s++;
5888             if (tmp == '<') {
5889                 if (*s == '=' && !PL_lex_allbrackets &&
5890                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5891                     s -= 2;
5892                     TOKEN(0);
5893                 }
5894                 SHop(OP_LEFT_SHIFT);
5895             }
5896             if (tmp == '=') {
5897                 tmp = *s++;
5898                 if (tmp == '>') {
5899                     if (!PL_lex_allbrackets &&
5900                             PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5901                         s -= 3;
5902                         TOKEN(0);
5903                     }
5904                     Eop(OP_NCMP);
5905                 }
5906                 s--;
5907                 if (!PL_lex_allbrackets &&
5908                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5909                     s -= 2;
5910                     TOKEN(0);
5911                 }
5912                 Rop(OP_LE);
5913             }
5914         }
5915         s--;
5916         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5917             s--;
5918             TOKEN(0);
5919         }
5920         Rop(OP_LT);
5921     case '>':
5922         s++;
5923         {
5924             const char tmp = *s++;
5925             if (tmp == '>') {
5926                 if (*s == '=' && !PL_lex_allbrackets &&
5927                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5928                     s -= 2;
5929                     TOKEN(0);
5930                 }
5931                 SHop(OP_RIGHT_SHIFT);
5932             }
5933             else if (tmp == '=') {
5934                 if (!PL_lex_allbrackets &&
5935                         PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5936                     s -= 2;
5937                     TOKEN(0);
5938                 }
5939                 Rop(OP_GE);
5940             }
5941         }
5942         s--;
5943         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5944             s--;
5945             TOKEN(0);
5946         }
5947         Rop(OP_GT);
5948
5949     case '$':
5950         CLINE;
5951
5952         if (PL_expect == XOPERATOR) {
5953             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5954                 return deprecate_commaless_var_list();
5955             }
5956         }
5957
5958         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5959             PL_tokenbuf[0] = '@';
5960             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5961                            sizeof PL_tokenbuf - 1, FALSE);
5962             if (PL_expect == XOPERATOR)
5963                 no_op("Array length", s);
5964             if (!PL_tokenbuf[1])
5965                 PREREF(DOLSHARP);
5966             PL_expect = XOPERATOR;
5967             PL_pending_ident = '#';
5968             TOKEN(DOLSHARP);
5969         }
5970
5971         PL_tokenbuf[0] = '$';
5972         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5973                        sizeof PL_tokenbuf - 1, FALSE);
5974         if (PL_expect == XOPERATOR)
5975             no_op("Scalar", s);
5976         if (!PL_tokenbuf[1]) {
5977             if (s == PL_bufend)
5978                 yyerror("Final $ should be \\$ or $name");
5979             PREREF('$');
5980         }
5981
5982         /* This kludge not intended to be bulletproof. */
5983         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5984             pl_yylval.opval = newSVOP(OP_CONST, 0,
5985                                    newSViv(CopARYBASE_get(&PL_compiling)));
5986             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5987             TERM(THING);
5988         }
5989
5990         d = s;
5991         {
5992             const char tmp = *s;
5993             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5994                 s = SKIPSPACE1(s);
5995
5996             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5997                 && intuit_more(s)) {
5998                 if (*s == '[') {
5999                     PL_tokenbuf[0] = '@';
6000                     if (ckWARN(WARN_SYNTAX)) {
6001                         char *t = s+1;
6002
6003                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6004                             t++;
6005                         if (*t++ == ',') {
6006                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6007                             while (t < PL_bufend && *t != ']')
6008                                 t++;
6009                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6010                                         "Multidimensional syntax %.*s not supported",
6011                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
6012                         }
6013                     }
6014                 }
6015                 else if (*s == '{') {
6016                     char *t;
6017                     PL_tokenbuf[0] = '%';
6018                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6019                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6020                         {
6021                             char tmpbuf[sizeof PL_tokenbuf];
6022                             do {
6023                                 t++;
6024                             } while (isSPACE(*t));
6025                             if (isIDFIRST_lazy_if(t,UTF)) {
6026                                 STRLEN len;
6027                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6028                                               &len);
6029                                 while (isSPACE(*t))
6030                                     t++;
6031                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
6032                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6033                                                 "You need to quote \"%s\"",
6034                                                 tmpbuf);
6035                             }
6036                         }
6037                 }
6038             }
6039
6040             PL_expect = XOPERATOR;
6041             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6042                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6043                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6044                     PL_expect = XOPERATOR;
6045                 else if (strchr("$@\"'`q", *s))
6046                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6047                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6048                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6049                 else if (isIDFIRST_lazy_if(s,UTF)) {
6050                     char tmpbuf[sizeof PL_tokenbuf];
6051                     int t2;
6052                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6053                     if ((t2 = keyword(tmpbuf, len, 0))) {
6054                         /* binary operators exclude handle interpretations */
6055                         switch (t2) {
6056                         case -KEY_x:
6057                         case -KEY_eq:
6058                         case -KEY_ne:
6059                         case -KEY_gt:
6060                         case -KEY_lt:
6061                         case -KEY_ge:
6062                         case -KEY_le:
6063                         case -KEY_cmp:
6064                             break;
6065                         default:
6066                             PL_expect = XTERM;  /* e.g. print $fh length() */
6067                             break;
6068                         }
6069                     }
6070                     else {
6071                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6072                     }
6073                 }
6074                 else if (isDIGIT(*s))
6075                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6076                 else if (*s == '.' && isDIGIT(s[1]))
6077                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6078                 else if ((*s == '?' || *s == '-' || *s == '+')
6079                          && !isSPACE(s[1]) && s[1] != '=')
6080                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6081                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6082                          && s[1] != '/')
6083                     PL_expect = XTERM;          /* e.g. print $fh /.../
6084                                                    XXX except DORDOR operator
6085                                                 */
6086                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6087                          && s[2] != '=')
6088                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6089             }
6090         }
6091         PL_pending_ident = '$';
6092         TOKEN('$');
6093
6094     case '@':
6095         if (PL_expect == XOPERATOR)
6096             no_op("Array", s);
6097         PL_tokenbuf[0] = '@';
6098         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6099         if (!PL_tokenbuf[1]) {
6100             PREREF('@');
6101         }
6102         if (PL_lex_state == LEX_NORMAL)
6103             s = SKIPSPACE1(s);
6104         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6105             if (*s == '{')
6106                 PL_tokenbuf[0] = '%';
6107
6108             /* Warn about @ where they meant $. */
6109             if (*s == '[' || *s == '{') {
6110                 if (ckWARN(WARN_SYNTAX)) {
6111                     const char *t = s + 1;
6112                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6113                         t++;
6114                     if (*t == '}' || *t == ']') {
6115                         t++;
6116                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6117                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6118                             "Scalar value %.*s better written as $%.*s",
6119                             (int)(t-PL_bufptr), PL_bufptr,
6120                             (int)(t-PL_bufptr-1), PL_bufptr+1);
6121                     }
6122                 }
6123             }
6124         }
6125         PL_pending_ident = '@';
6126         TERM('@');
6127
6128      case '/':                  /* may be division, defined-or, or pattern */
6129         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6130             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6131                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6132                 TOKEN(0);
6133             s += 2;
6134             AOPERATOR(DORDOR);
6135         }
6136      case '?':                  /* may either be conditional or pattern */
6137         if (PL_expect == XOPERATOR) {
6138              char tmp = *s++;
6139              if(tmp == '?') {
6140                 if (!PL_lex_allbrackets &&
6141                         PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6142                     s--;
6143                     TOKEN(0);
6144                 }
6145                 PL_lex_allbrackets++;
6146                 OPERATOR('?');
6147              }
6148              else {
6149                  tmp = *s++;
6150                  if(tmp == '/') {
6151                      /* A // operator. */
6152                     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6153                             (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6154                                             LEX_FAKEEOF_LOGIC)) {
6155                         s -= 2;
6156                         TOKEN(0);
6157                     }
6158                     AOPERATOR(DORDOR);
6159                  }
6160                  else {
6161                      s--;
6162                      if (*s == '=' && !PL_lex_allbrackets &&
6163                              PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6164                          s--;
6165                          TOKEN(0);
6166                      }
6167                      Mop(OP_DIVIDE);
6168                  }
6169              }
6170          }
6171          else {
6172              /* Disable warning on "study /blah/" */
6173              if (PL_oldoldbufptr == PL_last_uni
6174               && (*PL_last_uni != 's' || s - PL_last_uni < 5
6175                   || memNE(PL_last_uni, "study", 5)
6176                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
6177               ))
6178                  check_uni();
6179              if (*s == '?')
6180                  deprecate("?PATTERN? without explicit operator");
6181              s = scan_pat(s,OP_MATCH);
6182              TERM(sublex_start());
6183          }
6184
6185     case '.':
6186         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6187 #ifdef PERL_STRICT_CR
6188             && s[1] == '\n'
6189 #else
6190             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6191 #endif
6192             && (s == PL_linestart || s[-1] == '\n') )
6193         {
6194             PL_lex_formbrack = 0;
6195             PL_expect = XSTATE;
6196             goto rightbracket;
6197         }
6198         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6199             s += 3;
6200             OPERATOR(YADAYADA);
6201         }
6202         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6203             char tmp = *s++;
6204             if (*s == tmp) {
6205                 if (!PL_lex_allbrackets &&
6206                         PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6207                     s--;
6208                     TOKEN(0);
6209                 }
6210                 s++;
6211                 if (*s == tmp) {
6212                     s++;
6213                     pl_yylval.ival = OPf_SPECIAL;
6214                 }
6215                 else
6216                     pl_yylval.ival = 0;
6217                 OPERATOR(DOTDOT);
6218             }
6219             if (*s == '=' && !PL_lex_allbrackets &&
6220                     PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6221                 s--;
6222                 TOKEN(0);
6223             }
6224             Aop(OP_CONCAT);
6225         }
6226         /* FALL THROUGH */
6227     case '0': case '1': case '2': case '3': case '4':
6228     case '5': case '6': case '7': case '8': case '9':
6229         s = scan_num(s, &pl_yylval);
6230         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6231         if (PL_expect == XOPERATOR)
6232             no_op("Number",s);
6233         TERM(THING);
6234
6235     case '\'':
6236         s = scan_str(s,!!PL_madskills,FALSE);
6237         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6238         if (PL_expect == XOPERATOR) {
6239             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6240                 return deprecate_commaless_var_list();
6241             }
6242             else
6243                 no_op("String",s);
6244         }
6245         if (!s)
6246             missingterm(NULL);
6247         pl_yylval.ival = OP_CONST;
6248         TERM(sublex_start());
6249
6250     case '"':
6251         s = scan_str(s,!!PL_madskills,FALSE);
6252         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6253         if (PL_expect == XOPERATOR) {
6254             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6255                 return deprecate_commaless_var_list();
6256             }
6257             else
6258                 no_op("String",s);
6259         }
6260         if (!s)
6261             missingterm(NULL);
6262         pl_yylval.ival = OP_CONST;
6263         /* FIXME. I think that this can be const if char *d is replaced by
6264            more localised variables.  */
6265         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6266             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6267                 pl_yylval.ival = OP_STRINGIFY;
6268                 break;
6269             }
6270         }
6271         TERM(sublex_start());
6272
6273     case '`':
6274         s = scan_str(s,!!PL_madskills,FALSE);
6275         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6276         if (PL_expect == XOPERATOR)
6277             no_op("Backticks",s);
6278         if (!s)
6279             missingterm(NULL);
6280         readpipe_override();
6281         TERM(sublex_start());
6282
6283     case '\\':
6284         s++;
6285         if (PL_lex_inwhat && isDIGIT(*s))
6286             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6287                            *s, *s);
6288         if (PL_expect == XOPERATOR)
6289             no_op("Backslash",s);
6290         OPERATOR(REFGEN);
6291
6292     case 'v':
6293         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6294             char *start = s + 2;
6295             while (isDIGIT(*start) || *start == '_')
6296                 start++;
6297             if (*start == '.' && isDIGIT(start[1])) {
6298                 s = scan_num(s, &pl_yylval);
6299                 TERM(THING);
6300             }
6301             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6302             else if (!isALPHA(*start) && (PL_expect == XTERM
6303                         || PL_expect == XREF || PL_expect == XSTATE
6304                         || PL_expect == XTERMORDORDOR)) {
6305                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6306                 if (!gv) {
6307                     s = scan_num(s, &pl_yylval);
6308                     TERM(THING);
6309                 }
6310             }
6311         }
6312         goto keylookup;
6313     case 'x':
6314         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6315             s++;
6316             Mop(OP_REPEAT);
6317         }
6318         goto keylookup;
6319
6320     case '_':
6321     case 'a': case 'A':
6322     case 'b': case 'B':
6323     case 'c': case 'C':
6324     case 'd': case 'D':
6325     case 'e': case 'E':
6326     case 'f': case 'F':
6327     case 'g': case 'G':
6328     case 'h': case 'H':
6329     case 'i': case 'I':
6330     case 'j': case 'J':
6331     case 'k': case 'K':
6332     case 'l': case 'L':
6333     case 'm': case 'M':
6334     case 'n': case 'N':
6335     case 'o': case 'O':
6336     case 'p': case 'P':
6337     case 'q': case 'Q':
6338     case 'r': case 'R':
6339     case 's': case 'S':
6340     case 't': case 'T':
6341     case 'u': case 'U':
6342               case 'V':
6343     case 'w': case 'W':
6344               case 'X':
6345     case 'y': case 'Y':
6346     case 'z': case 'Z':
6347
6348       keylookup: {
6349         bool anydelim;
6350         I32 tmp;
6351
6352         orig_keyword = 0;
6353         gv = NULL;
6354         gvp = NULL;
6355
6356         PL_bufptr = s;
6357         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6358
6359         /* Some keywords can be followed by any delimiter, including ':' */
6360         anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6361
6362         /* x::* is just a word, unless x is "CORE" */
6363         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6364             goto just_a_word;
6365
6366         d = s;
6367         while (d < PL_bufend && isSPACE(*d))
6368                 d++;    /* no comments skipped here, or s### is misparsed */
6369
6370         /* Is this a word before a => operator? */
6371         if (*d == '=' && d[1] == '>') {
6372             CLINE;
6373             pl_yylval.opval
6374                 = (OP*)newSVOP(OP_CONST, 0,
6375                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6376             pl_yylval.opval->op_private = OPpCONST_BARE;
6377             TERM(WORD);
6378         }
6379
6380         /* Check for plugged-in keyword */
6381         {
6382             OP *o;
6383             int result;
6384             char *saved_bufptr = PL_bufptr;
6385             PL_bufptr = s;
6386             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6387             s = PL_bufptr;
6388             if (result == KEYWORD_PLUGIN_DECLINE) {
6389                 /* not a plugged-in keyword */
6390                 PL_bufptr = saved_bufptr;
6391             } else if (result == KEYWORD_PLUGIN_STMT) {
6392                 pl_yylval.opval = o;
6393                 CLINE;
6394                 PL_expect = XSTATE;
6395                 return REPORT(PLUGSTMT);
6396             } else if (result == KEYWORD_PLUGIN_EXPR) {
6397                 pl_yylval.opval = o;
6398                 CLINE;
6399                 PL_expect = XOPERATOR;
6400                 return REPORT(PLUGEXPR);
6401             } else {
6402                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6403                                         PL_tokenbuf);
6404             }
6405         }
6406
6407         /* Check for built-in keyword */
6408         tmp = keyword(PL_tokenbuf, len, 0);
6409
6410         /* Is this a label? */
6411         if (!anydelim && PL_expect == XSTATE
6412               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6413             s = d + 1;
6414             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6415             CLINE;
6416             TOKEN(LABEL);
6417         }
6418
6419         if (tmp < 0) {                  /* second-class keyword? */
6420             GV *ogv = NULL;     /* override (winner) */
6421             GV *hgv = NULL;     /* hidden (loser) */
6422             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6423                 CV *cv;
6424                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6425                     (cv = GvCVu(gv)))
6426                 {
6427                     if (GvIMPORTED_CV(gv))
6428                         ogv = gv;
6429                     else if (! CvMETHOD(cv))
6430                         hgv = gv;
6431                 }
6432                 if (!ogv &&
6433                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6434                     (gv = *gvp) && isGV_with_GP(gv) &&
6435                     GvCVu(gv) && GvIMPORTED_CV(gv))
6436                 {
6437                     ogv = gv;
6438                 }
6439             }
6440             if (ogv) {
6441                 orig_keyword = tmp;
6442                 tmp = 0;                /* overridden by import or by GLOBAL */
6443             }
6444             else if (gv && !gvp
6445                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6446                      && GvCVu(gv))
6447             {
6448                 tmp = 0;                /* any sub overrides "weak" keyword */
6449             }
6450             else {                      /* no override */
6451                 tmp = -tmp;
6452                 if (tmp == KEY_dump) {
6453                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6454                                    "dump() better written as CORE::dump()");
6455                 }
6456                 gv = NULL;
6457                 gvp = 0;
6458                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6459                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6460                                    "Ambiguous call resolved as CORE::%s(), "
6461                                    "qualify as such or use &",
6462                                    GvENAME(hgv));
6463             }
6464         }
6465
6466       reserved_word:
6467         switch (tmp) {
6468
6469         default:                        /* not a keyword */
6470             /* Trade off - by using this evil construction we can pull the
6471                variable gv into the block labelled keylookup. If not, then
6472                we have to give it function scope so that the goto from the
6473                earlier ':' case doesn't bypass the initialisation.  */
6474             if (0) {
6475             just_a_word_zero_gv:
6476                 gv = NULL;
6477                 gvp = NULL;
6478                 orig_keyword = 0;
6479             }
6480           just_a_word: {
6481                 SV *sv;
6482                 int pkgname = 0;
6483                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6484                 OP *rv2cv_op;
6485                 CV *cv;
6486 #ifdef PERL_MAD
6487                 SV *nextPL_nextwhite = 0;
6488 #endif
6489
6490
6491                 /* Get the rest if it looks like a package qualifier */
6492
6493                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6494                     STRLEN morelen;
6495                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6496                                   TRUE, &morelen);
6497                     if (!morelen)
6498                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6499                                 *s == '\'' ? "'" : "::");
6500                     len += morelen;
6501                     pkgname = 1;
6502                 }
6503
6504                 if (PL_expect == XOPERATOR) {
6505                     if (PL_bufptr == PL_linestart) {
6506                         CopLINE_dec(PL_curcop);
6507                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6508                         CopLINE_inc(PL_curcop);
6509                     }
6510                     else
6511                         no_op("Bareword",s);
6512                 }
6513
6514                 /* Look for a subroutine with this name in current package,
6515                    unless name is "Foo::", in which case Foo is a bareword
6516                    (and a package name). */
6517
6518                 if (len > 2 && !PL_madskills &&
6519                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6520                 {
6521                     if (ckWARN(WARN_BAREWORD)
6522                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6523                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6524                             "Bareword \"%s\" refers to nonexistent package",
6525                              PL_tokenbuf);
6526                     len -= 2;
6527                     PL_tokenbuf[len] = '\0';
6528                     gv = NULL;
6529                     gvp = 0;
6530                 }
6531                 else {
6532                     if (!gv) {
6533                         /* Mustn't actually add anything to a symbol table.
6534                            But also don't want to "initialise" any placeholder
6535                            constants that might already be there into full
6536                            blown PVGVs with attached PVCV.  */
6537                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6538                                                GV_NOADD_NOINIT, SVt_PVCV);
6539                     }
6540                     len = 0;
6541                 }
6542
6543                 /* if we saw a global override before, get the right name */
6544
6545                 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6546                     len ? len : strlen(PL_tokenbuf));
6547                 if (gvp) {
6548                     SV * const tmp_sv = sv;
6549                     sv = newSVpvs("CORE::GLOBAL::");
6550                     sv_catsv(sv, tmp_sv);
6551                     SvREFCNT_dec(tmp_sv);
6552                 }
6553
6554 #ifdef PERL_MAD
6555                 if (PL_madskills && !PL_thistoken) {
6556                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6557                     PL_thistoken = newSVpvn(start,s - start);
6558                     PL_realtokenstart = s - SvPVX(PL_linestr);
6559                 }
6560 #endif
6561
6562                 /* Presume this is going to be a bareword of some sort. */
6563                 CLINE;
6564                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6565                 pl_yylval.opval->op_private = OPpCONST_BARE;
6566
6567                 /* And if "Foo::", then that's what it certainly is. */
6568                 if (len)
6569                     goto safe_bareword;
6570
6571                 {
6572                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6573                     const_op->op_private = OPpCONST_BARE;
6574                     rv2cv_op = newCVREF(0, const_op);
6575                 }
6576                 cv = rv2cv_op_cv(rv2cv_op, 0);
6577
6578                 /* See if it's the indirect object for a list operator. */
6579
6580                 if (PL_oldoldbufptr &&
6581                     PL_oldoldbufptr < PL_bufptr &&
6582                     (PL_oldoldbufptr == PL_last_lop
6583                      || PL_oldoldbufptr == PL_last_uni) &&
6584                     /* NO SKIPSPACE BEFORE HERE! */
6585                     (PL_expect == XREF ||
6586                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6587                 {
6588                     bool immediate_paren = *s == '(';
6589
6590                     /* (Now we can afford to cross potential line boundary.) */
6591                     s = SKIPSPACE2(s,nextPL_nextwhite);
6592 #ifdef PERL_MAD
6593                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6594 #endif
6595
6596                     /* Two barewords in a row may indicate method call. */
6597
6598                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6599                         (tmp = intuit_method(s, gv, cv))) {
6600                         op_free(rv2cv_op);
6601                         if (tmp == METHOD && !PL_lex_allbrackets &&
6602                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6603                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6604                         return REPORT(tmp);
6605                     }
6606
6607                     /* If not a declared subroutine, it's an indirect object. */
6608                     /* (But it's an indir obj regardless for sort.) */
6609                     /* Also, if "_" follows a filetest operator, it's a bareword */
6610
6611                     if (
6612                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6613                          (!cv &&
6614                         (PL_last_lop_op != OP_MAPSTART &&
6615                          PL_last_lop_op != OP_GREPSTART))))
6616                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6617                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6618                        )
6619                     {
6620                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6621                         goto bareword;
6622                     }
6623                 }
6624
6625                 PL_expect = XOPERATOR;
6626 #ifdef PERL_MAD
6627                 if (isSPACE(*s))
6628                     s = SKIPSPACE2(s,nextPL_nextwhite);
6629                 PL_nextwhite = nextPL_nextwhite;
6630 #else
6631                 s = skipspace(s);
6632 #endif
6633
6634                 /* Is this a word before a => operator? */
6635                 if (*s == '=' && s[1] == '>' && !pkgname) {
6636                     op_free(rv2cv_op);
6637                     CLINE;
6638                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6639                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6640                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6641                     TERM(WORD);
6642                 }
6643
6644                 /* If followed by a paren, it's certainly a subroutine. */
6645                 if (*s == '(') {
6646                     CLINE;
6647                     if (cv) {
6648                         d = s + 1;
6649                         while (SPACE_OR_TAB(*d))
6650                             d++;
6651                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6652                             s = d + 1;
6653                             goto its_constant;
6654                         }
6655                     }
6656 #ifdef PERL_MAD
6657                     if (PL_madskills) {
6658                         PL_nextwhite = PL_thiswhite;
6659                         PL_thiswhite = 0;
6660                     }
6661                     start_force(PL_curforce);
6662 #endif
6663                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6664                     PL_expect = XOPERATOR;
6665 #ifdef PERL_MAD
6666                     if (PL_madskills) {
6667                         PL_nextwhite = nextPL_nextwhite;
6668                         curmad('X', PL_thistoken);
6669                         PL_thistoken = newSVpvs("");
6670                     }
6671 #endif
6672                     op_free(rv2cv_op);
6673                     force_next(WORD);
6674                     pl_yylval.ival = 0;
6675                     TOKEN('&');
6676                 }
6677
6678                 /* If followed by var or block, call it a method (unless sub) */
6679
6680                 if ((*s == '$' || *s == '{') && !cv) {
6681                     op_free(rv2cv_op);
6682                     PL_last_lop = PL_oldbufptr;
6683                     PL_last_lop_op = OP_METHOD;
6684                     if (!PL_lex_allbrackets &&
6685                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6686                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6687                     PREBLOCK(METHOD);
6688                 }
6689
6690                 /* If followed by a bareword, see if it looks like indir obj. */
6691
6692                 if (!orig_keyword
6693                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6694                         && (tmp = intuit_method(s, gv, cv))) {
6695                     op_free(rv2cv_op);
6696                     if (tmp == METHOD && !PL_lex_allbrackets &&
6697                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6698                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6699                     return REPORT(tmp);
6700                 }
6701
6702                 /* Not a method, so call it a subroutine (if defined) */
6703
6704                 if (cv) {
6705                     if (lastchar == '-')
6706                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6707                                          "Ambiguous use of -%s resolved as -&%s()",
6708                                          PL_tokenbuf, PL_tokenbuf);
6709                     /* Check for a constant sub */
6710                     if ((sv = cv_const_sv(cv))) {
6711                   its_constant:
6712                         op_free(rv2cv_op);
6713                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6714                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6715                         pl_yylval.opval->op_private = 0;
6716                         pl_yylval.opval->op_flags |= OPf_SPECIAL;
6717                         TOKEN(WORD);
6718                     }
6719
6720                     op_free(pl_yylval.opval);
6721                     pl_yylval.opval = rv2cv_op;
6722                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6723                     PL_last_lop = PL_oldbufptr;
6724                     PL_last_lop_op = OP_ENTERSUB;
6725                     /* Is there a prototype? */
6726                     if (
6727 #ifdef PERL_MAD
6728                         cv &&
6729 #endif
6730                         SvPOK(cv))
6731                     {
6732                         STRLEN protolen;
6733                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6734                         if (!protolen)
6735                             TERM(FUNC0SUB);
6736                         while (*proto == ';')
6737                             proto++;
6738                         if (
6739                             (
6740                                 (
6741                                     *proto == '$' || *proto == '_'
6742                                  || *proto == '*' || *proto == '+'
6743                                 )
6744                              && proto[1] == '\0'
6745                             )
6746                          || (
6747                              *proto == '\\' && proto[1] && proto[2] == '\0'
6748                             )
6749                         )
6750                             OPERATOR(UNIOPSUB);
6751                         if (*proto == '\\' && proto[1] == '[') {
6752                             const char *p = proto + 2;
6753                             while(*p && *p != ']')
6754                                 ++p;
6755                             if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6756                         }
6757                         if (*proto == '&' && *s == '{') {
6758                             if (PL_curstash)
6759                                 sv_setpvs(PL_subname, "__ANON__");
6760                             else
6761                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6762                             if (!PL_lex_allbrackets &&
6763                                     PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6764                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6765                             PREBLOCK(LSTOPSUB);
6766                         }
6767                     }
6768 #ifdef PERL_MAD
6769                     {
6770                         if (PL_madskills) {
6771                             PL_nextwhite = PL_thiswhite;
6772                             PL_thiswhite = 0;
6773                         }
6774                         start_force(PL_curforce);
6775                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6776                         PL_expect = XTERM;
6777                         if (PL_madskills) {
6778                             PL_nextwhite = nextPL_nextwhite;
6779                             curmad('X', PL_thistoken);
6780                             PL_thistoken = newSVpvs("");
6781                         }
6782                         force_next(WORD);
6783                         if (!PL_lex_allbrackets &&
6784                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6785                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6786                         TOKEN(NOAMP);
6787                     }
6788                 }
6789
6790                 /* Guess harder when madskills require "best effort". */
6791                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6792                     int probable_sub = 0;
6793                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6794                         probable_sub = 1;
6795                     else if (isALPHA(*s)) {
6796                         char tmpbuf[1024];
6797                         STRLEN tmplen;
6798                         d = s;
6799                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6800                         if (!keyword(tmpbuf, tmplen, 0))
6801                             probable_sub = 1;
6802                         else {
6803                             while (d < PL_bufend && isSPACE(*d))
6804                                 d++;
6805                             if (*d == '=' && d[1] == '>')
6806                                 probable_sub = 1;
6807                         }
6808                     }
6809                     if (probable_sub) {
6810                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6811                         op_free(pl_yylval.opval);
6812                         pl_yylval.opval = rv2cv_op;
6813                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6814                         PL_last_lop = PL_oldbufptr;
6815                         PL_last_lop_op = OP_ENTERSUB;
6816                         PL_nextwhite = PL_thiswhite;
6817                         PL_thiswhite = 0;
6818                         start_force(PL_curforce);
6819                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6820                         PL_expect = XTERM;
6821                         PL_nextwhite = nextPL_nextwhite;
6822                         curmad('X', PL_thistoken);
6823                         PL_thistoken = newSVpvs("");
6824                         force_next(WORD);
6825                         if (!PL_lex_allbrackets &&
6826                                 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6827                             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6828                         TOKEN(NOAMP);
6829                     }
6830 #else
6831                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6832                     PL_expect = XTERM;
6833                     force_next(WORD);
6834                     if (!PL_lex_allbrackets &&
6835                             PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6836                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6837                     TOKEN(NOAMP);
6838 #endif
6839                 }
6840
6841                 /* Call it a bare word */
6842
6843                 if (PL_hints & HINT_STRICT_SUBS)
6844                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6845                 else {
6846                 bareword:
6847                     /* after "print" and similar functions (corresponding to
6848                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6849                      * a filehandle should be subject to "strict subs".
6850                      * Likewise for the optional indirect-object argument to system
6851                      * or exec, which can't be a bareword */
6852                     if ((PL_last_lop_op == OP_PRINT
6853                             || PL_last_lop_op == OP_PRTF
6854                             || PL_last_lop_op == OP_SAY
6855                             || PL_last_lop_op == OP_SYSTEM
6856                             || PL_last_lop_op == OP_EXEC)
6857                             && (PL_hints & HINT_STRICT_SUBS))
6858                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6859                     if (lastchar != '-') {
6860                         if (ckWARN(WARN_RESERVED)) {
6861                             d = PL_tokenbuf;
6862                             while (isLOWER(*d))
6863                                 d++;
6864                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6865                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6866                                        PL_tokenbuf);
6867                         }
6868                     }
6869                 }
6870                 op_free(rv2cv_op);
6871
6872             safe_bareword:
6873                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6874                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6875                                      "Operator or semicolon missing before %c%s",
6876                                      lastchar, PL_tokenbuf);
6877                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6878                                      "Ambiguous use of %c resolved as operator %c",
6879                                      lastchar, lastchar);
6880                 }
6881                 TOKEN(WORD);
6882             }
6883
6884         case KEY___FILE__:
6885             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6886                                         newSVpv(CopFILE(PL_curcop),0));
6887             TERM(THING);
6888
6889         case KEY___LINE__:
6890             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6891                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6892             TERM(THING);
6893
6894         case KEY___PACKAGE__:
6895             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6896                                         (PL_curstash
6897                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6898                                          : &PL_sv_undef));
6899             TERM(THING);
6900
6901         case KEY___DATA__:
6902         case KEY___END__: {
6903             GV *gv;
6904             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6905                 const char *pname = "main";
6906                 if (PL_tokenbuf[2] == 'D')
6907                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6908                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6909                                 SVt_PVIO);
6910                 GvMULTI_on(gv);
6911                 if (!GvIO(gv))
6912                     GvIOp(gv) = newIO();
6913                 IoIFP(GvIOp(gv)) = PL_rsfp;
6914 #if defined(HAS_FCNTL) && defined(F_SETFD)
6915                 {
6916                     const int fd = PerlIO_fileno(PL_rsfp);
6917                     fcntl(fd,F_SETFD,fd >= 3);
6918                 }
6919 #endif
6920                 /* Mark this internal pseudo-handle as clean */
6921                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6922                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6923                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6924                 else
6925                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6926 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6927                 /* if the script was opened in binmode, we need to revert
6928                  * it to text mode for compatibility; but only iff it has CRs
6929                  * XXX this is a questionable hack at best. */
6930                 if (PL_bufend-PL_bufptr > 2
6931                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6932                 {
6933                     Off_t loc = 0;
6934                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6935                         loc = PerlIO_tell(PL_rsfp);
6936                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6937                     }
6938 #ifdef NETWARE
6939                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6940 #else
6941                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6942 #endif  /* NETWARE */
6943 #ifdef PERLIO_IS_STDIO /* really? */
6944 #  if defined(__BORLANDC__)
6945                         /* XXX see note in do_binmode() */
6946                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6947 #  endif
6948 #endif
6949                         if (loc > 0)
6950                             PerlIO_seek(PL_rsfp, loc, 0);
6951                     }
6952                 }
6953 #endif
6954 #ifdef PERLIO_LAYERS
6955                 if (!IN_BYTES) {
6956                     if (UTF)
6957                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6958                     else if (PL_encoding) {
6959                         SV *name;
6960                         dSP;
6961                         ENTER;
6962                         SAVETMPS;
6963                         PUSHMARK(sp);
6964                         EXTEND(SP, 1);
6965                         XPUSHs(PL_encoding);
6966                         PUTBACK;
6967                         call_method("name", G_SCALAR);
6968                         SPAGAIN;
6969                         name = POPs;
6970                         PUTBACK;
6971                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6972                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6973                                                       SVfARG(name)));
6974                         FREETMPS;
6975                         LEAVE;
6976                     }
6977                 }
6978 #endif
6979 #ifdef PERL_MAD
6980                 if (PL_madskills) {
6981                     if (PL_realtokenstart >= 0) {
6982                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6983                         if (!PL_endwhite)
6984                             PL_endwhite = newSVpvs("");
6985                         sv_catsv(PL_endwhite, PL_thiswhite);
6986                         PL_thiswhite = 0;
6987                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6988                         PL_realtokenstart = -1;
6989                     }
6990                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6991                            != NULL) ;
6992                 }
6993 #endif
6994                 PL_rsfp = NULL;
6995             }
6996             goto fake_eof;
6997         }
6998
6999         case KEY_AUTOLOAD:
7000         case KEY_DESTROY:
7001         case KEY_BEGIN:
7002         case KEY_UNITCHECK:
7003         case KEY_CHECK:
7004         case KEY_INIT:
7005         case KEY_END:
7006             if (PL_expect == XSTATE) {
7007                 s = PL_bufptr;
7008                 goto really_sub;
7009             }
7010             goto just_a_word;
7011
7012         case KEY_CORE:
7013             if (*s == ':' && s[1] == ':') {
7014                 s += 2;
7015                 d = s;
7016                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7017                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
7018                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7019                 if (tmp < 0)
7020                     tmp = -tmp;
7021                 else if (tmp == KEY_require || tmp == KEY_do)
7022                     /* that's a way to remember we saw "CORE::" */
7023                     orig_keyword = tmp;
7024                 goto reserved_word;
7025             }
7026             goto just_a_word;
7027
7028         case KEY_abs:
7029             UNI(OP_ABS);
7030
7031         case KEY_alarm:
7032             UNI(OP_ALARM);
7033
7034         case KEY_accept:
7035             LOP(OP_ACCEPT,XTERM);
7036
7037         case KEY_and:
7038             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7039                 return REPORT(0);
7040             OPERATOR(ANDOP);
7041
7042         case KEY_atan2:
7043             LOP(OP_ATAN2,XTERM);
7044
7045         case KEY_bind:
7046             LOP(OP_BIND,XTERM);
7047
7048         case KEY_binmode:
7049             LOP(OP_BINMODE,XTERM);
7050
7051         case KEY_bless:
7052             LOP(OP_BLESS,XTERM);
7053
7054         case KEY_break:
7055             FUN0(OP_BREAK);
7056
7057         case KEY_chop:
7058             UNI(OP_CHOP);
7059
7060         case KEY_continue:
7061             /* When 'use switch' is in effect, continue has a dual
7062                life as a control operator. */
7063             {
7064                 if (!FEATURE_IS_ENABLED("switch"))
7065                     PREBLOCK(CONTINUE);
7066                 else {
7067                     /* We have to disambiguate the two senses of
7068                       "continue". If the next token is a '{' then
7069                       treat it as the start of a continue block;
7070                       otherwise treat it as a control operator.
7071                      */
7072                     s = skipspace(s);
7073                     if (*s == '{')
7074             PREBLOCK(CONTINUE);
7075                     else
7076                         FUN0(OP_CONTINUE);
7077                 }
7078             }
7079
7080         case KEY_chdir:
7081             /* may use HOME */
7082             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7083             UNI(OP_CHDIR);
7084
7085         case KEY_close:
7086             UNI(OP_CLOSE);
7087
7088         case KEY_closedir:
7089             UNI(OP_CLOSEDIR);
7090
7091         case KEY_cmp:
7092             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7093                 return REPORT(0);
7094             Eop(OP_SCMP);
7095
7096         case KEY_caller:
7097             UNI(OP_CALLER);
7098
7099         case KEY_crypt:
7100 #ifdef FCRYPT
7101             if (!PL_cryptseen) {
7102                 PL_cryptseen = TRUE;
7103                 init_des();
7104             }
7105 #endif
7106             LOP(OP_CRYPT,XTERM);
7107
7108         case KEY_chmod:
7109             LOP(OP_CHMOD,XTERM);
7110
7111         case KEY_chown:
7112             LOP(OP_CHOWN,XTERM);
7113
7114         case KEY_connect:
7115             LOP(OP_CONNECT,XTERM);
7116
7117         case KEY_chr:
7118             UNI(OP_CHR);
7119
7120         case KEY_cos:
7121             UNI(OP_COS);
7122
7123         case KEY_chroot:
7124             UNI(OP_CHROOT);
7125
7126         case KEY_default:
7127             PREBLOCK(DEFAULT);
7128
7129         case KEY_do:
7130             s = SKIPSPACE1(s);
7131             if (*s == '{')
7132                 PRETERMBLOCK(DO);
7133             if (*s != '\'')
7134                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7135             if (orig_keyword == KEY_do) {
7136                 orig_keyword = 0;
7137                 pl_yylval.ival = 1;
7138             }
7139             else
7140                 pl_yylval.ival = 0;
7141             OPERATOR(DO);
7142
7143         case KEY_die:
7144             PL_hints |= HINT_BLOCK_SCOPE;
7145             LOP(OP_DIE,XTERM);
7146
7147         case KEY_defined:
7148             UNI(OP_DEFINED);
7149
7150         case KEY_delete:
7151             UNI(OP_DELETE);
7152
7153         case KEY_dbmopen:
7154             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7155                               STR_WITH_LEN("NDBM_File::"),
7156                               STR_WITH_LEN("DB_File::"),
7157                               STR_WITH_LEN("GDBM_File::"),
7158                               STR_WITH_LEN("SDBM_File::"),
7159                               STR_WITH_LEN("ODBM_File::"),
7160                               NULL);
7161             LOP(OP_DBMOPEN,XTERM);
7162
7163         case KEY_dbmclose:
7164             UNI(OP_DBMCLOSE);
7165
7166         case KEY_dump:
7167             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7168             LOOPX(OP_DUMP);
7169
7170         case KEY_else:
7171             PREBLOCK(ELSE);
7172
7173         case KEY_elsif:
7174             pl_yylval.ival = CopLINE(PL_curcop);
7175             OPERATOR(ELSIF);
7176
7177         case KEY_eq:
7178             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7179                 return REPORT(0);
7180             Eop(OP_SEQ);
7181
7182         case KEY_exists:
7183             UNI(OP_EXISTS);
7184
7185         case KEY_exit:
7186             if (PL_madskills)
7187                 UNI(OP_INT);
7188             UNI(OP_EXIT);
7189
7190         case KEY_eval:
7191             s = SKIPSPACE1(s);
7192             if (*s == '{') { /* block eval */
7193                 PL_expect = XTERMBLOCK;
7194                 UNIBRACK(OP_ENTERTRY);
7195             }
7196             else { /* string eval */
7197                 PL_expect = XTERM;
7198                 UNIBRACK(OP_ENTEREVAL);
7199             }
7200
7201         case KEY_eof:
7202             UNI(OP_EOF);
7203
7204         case KEY_exp:
7205             UNI(OP_EXP);
7206
7207         case KEY_each:
7208             UNI(OP_EACH);
7209
7210         case KEY_exec:
7211             LOP(OP_EXEC,XREF);
7212
7213         case KEY_endhostent:
7214             FUN0(OP_EHOSTENT);
7215
7216         case KEY_endnetent:
7217             FUN0(OP_ENETENT);
7218
7219         case KEY_endservent:
7220             FUN0(OP_ESERVENT);
7221
7222         case KEY_endprotoent:
7223             FUN0(OP_EPROTOENT);
7224
7225         case KEY_endpwent:
7226             FUN0(OP_EPWENT);
7227
7228         case KEY_endgrent:
7229             FUN0(OP_EGRENT);
7230
7231         case KEY_for:
7232         case KEY_foreach:
7233             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7234                 return REPORT(0);
7235             pl_yylval.ival = CopLINE(PL_curcop);
7236             s = SKIPSPACE1(s);
7237             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7238                 char *p = s;
7239 #ifdef PERL_MAD
7240                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7241 #endif
7242
7243                 if ((PL_bufend - p) >= 3 &&
7244                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7245                     p += 2;
7246                 else if ((PL_bufend - p) >= 4 &&
7247                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7248                     p += 3;
7249                 p = PEEKSPACE(p);
7250                 if (isIDFIRST_lazy_if(p,UTF)) {
7251                     p = scan_ident(p, PL_bufend,
7252                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7253                     p = PEEKSPACE(p);
7254                 }
7255                 if (*p != '$')
7256                     Perl_croak(aTHX_ "Missing $ on loop variable");
7257 #ifdef PERL_MAD
7258                 s = SvPVX(PL_linestr) + soff;
7259 #endif
7260             }
7261             OPERATOR(FOR);
7262
7263         case KEY_formline:
7264             LOP(OP_FORMLINE,XTERM);
7265
7266         case KEY_fork:
7267             FUN0(OP_FORK);
7268
7269         case KEY_fcntl:
7270             LOP(OP_FCNTL,XTERM);
7271
7272         case KEY_fileno:
7273             UNI(OP_FILENO);
7274
7275         case KEY_flock:
7276             LOP(OP_FLOCK,XTERM);
7277
7278         case KEY_gt:
7279             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7280                 return REPORT(0);
7281             Rop(OP_SGT);
7282
7283         case KEY_ge:
7284             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7285                 return REPORT(0);
7286             Rop(OP_SGE);
7287
7288         case KEY_grep:
7289             LOP(OP_GREPSTART, XREF);
7290
7291         case KEY_goto:
7292             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7293             LOOPX(OP_GOTO);
7294
7295         case KEY_gmtime:
7296             UNI(OP_GMTIME);
7297
7298         case KEY_getc:
7299             UNIDOR(OP_GETC);
7300
7301         case KEY_getppid:
7302             FUN0(OP_GETPPID);
7303
7304         case KEY_getpgrp:
7305             UNI(OP_GETPGRP);
7306
7307         case KEY_getpriority:
7308             LOP(OP_GETPRIORITY,XTERM);
7309
7310         case KEY_getprotobyname:
7311             UNI(OP_GPBYNAME);
7312
7313         case KEY_getprotobynumber:
7314             LOP(OP_GPBYNUMBER,XTERM);
7315
7316         case KEY_getprotoent:
7317             FUN0(OP_GPROTOENT);
7318
7319         case KEY_getpwent:
7320             FUN0(OP_GPWENT);
7321
7322         case KEY_getpwnam:
7323             UNI(OP_GPWNAM);
7324
7325         case KEY_getpwuid:
7326             UNI(OP_GPWUID);
7327
7328         case KEY_getpeername:
7329             UNI(OP_GETPEERNAME);
7330
7331         case KEY_gethostbyname:
7332             UNI(OP_GHBYNAME);
7333
7334         case KEY_gethostbyaddr:
7335             LOP(OP_GHBYADDR,XTERM);
7336
7337         case KEY_gethostent:
7338             FUN0(OP_GHOSTENT);
7339
7340         case KEY_getnetbyname:
7341             UNI(OP_GNBYNAME);
7342
7343         case KEY_getnetbyaddr:
7344             LOP(OP_GNBYADDR,XTERM);
7345
7346         case KEY_getnetent:
7347             FUN0(OP_GNETENT);
7348
7349         case KEY_getservbyname:
7350             LOP(OP_GSBYNAME,XTERM);
7351
7352         case KEY_getservbyport:
7353             LOP(OP_GSBYPORT,XTERM);
7354
7355         case KEY_getservent:
7356             FUN0(OP_GSERVENT);
7357
7358         case KEY_getsockname:
7359             UNI(OP_GETSOCKNAME);
7360
7361         case KEY_getsockopt:
7362             LOP(OP_GSOCKOPT,XTERM);
7363
7364         case KEY_getgrent:
7365             FUN0(OP_GGRENT);
7366
7367         case KEY_getgrnam:
7368             UNI(OP_GGRNAM);
7369
7370         case KEY_getgrgid:
7371             UNI(OP_GGRGID);
7372
7373         case KEY_getlogin:
7374             FUN0(OP_GETLOGIN);
7375
7376         case KEY_given:
7377             pl_yylval.ival = CopLINE(PL_curcop);
7378             OPERATOR(GIVEN);
7379
7380         case KEY_glob:
7381             LOP(OP_GLOB,XTERM);
7382
7383         case KEY_hex:
7384             UNI(OP_HEX);
7385
7386         case KEY_if:
7387             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7388                 return REPORT(0);
7389             pl_yylval.ival = CopLINE(PL_curcop);
7390             OPERATOR(IF);
7391
7392         case KEY_index:
7393             LOP(OP_INDEX,XTERM);
7394
7395         case KEY_int:
7396             UNI(OP_INT);
7397
7398         case KEY_ioctl:
7399             LOP(OP_IOCTL,XTERM);
7400
7401         case KEY_join:
7402             LOP(OP_JOIN,XTERM);
7403
7404         case KEY_keys:
7405             UNI(OP_KEYS);
7406
7407         case KEY_kill:
7408             LOP(OP_KILL,XTERM);
7409
7410         case KEY_last:
7411             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7412             LOOPX(OP_LAST);
7413
7414         case KEY_lc:
7415             UNI(OP_LC);
7416
7417         case KEY_lcfirst:
7418             UNI(OP_LCFIRST);
7419
7420         case KEY_local:
7421             pl_yylval.ival = 0;
7422             OPERATOR(LOCAL);
7423
7424         case KEY_length:
7425             UNI(OP_LENGTH);
7426
7427         case KEY_lt:
7428             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7429                 return REPORT(0);
7430             Rop(OP_SLT);
7431
7432         case KEY_le:
7433             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7434                 return REPORT(0);
7435             Rop(OP_SLE);
7436
7437         case KEY_localtime:
7438             UNI(OP_LOCALTIME);
7439
7440         case KEY_log:
7441             UNI(OP_LOG);
7442
7443         case KEY_link:
7444             LOP(OP_LINK,XTERM);
7445
7446         case KEY_listen:
7447             LOP(OP_LISTEN,XTERM);
7448
7449         case KEY_lock:
7450             UNI(OP_LOCK);
7451
7452         case KEY_lstat:
7453             UNI(OP_LSTAT);
7454
7455         case KEY_m:
7456             s = scan_pat(s,OP_MATCH);
7457             TERM(sublex_start());
7458
7459         case KEY_map:
7460             LOP(OP_MAPSTART, XREF);
7461
7462         case KEY_mkdir:
7463             LOP(OP_MKDIR,XTERM);
7464
7465         case KEY_msgctl:
7466             LOP(OP_MSGCTL,XTERM);
7467
7468         case KEY_msgget:
7469             LOP(OP_MSGGET,XTERM);
7470
7471         case KEY_msgrcv:
7472             LOP(OP_MSGRCV,XTERM);
7473
7474         case KEY_msgsnd:
7475             LOP(OP_MSGSND,XTERM);
7476
7477         case KEY_our:
7478         case KEY_my:
7479         case KEY_state:
7480             PL_in_my = (U16)tmp;
7481             s = SKIPSPACE1(s);
7482             if (isIDFIRST_lazy_if(s,UTF)) {
7483 #ifdef PERL_MAD
7484                 char* start = s;
7485 #endif
7486                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7487                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7488                     goto really_sub;
7489                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7490                 if (!PL_in_my_stash) {
7491                     char tmpbuf[1024];
7492                     PL_bufptr = s;
7493                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7494                     yyerror(tmpbuf);
7495                 }
7496 #ifdef PERL_MAD
7497                 if (PL_madskills) {     /* just add type to declarator token */
7498                     sv_catsv(PL_thistoken, PL_nextwhite);
7499                     PL_nextwhite = 0;
7500                     sv_catpvn(PL_thistoken, start, s - start);
7501                 }
7502 #endif
7503             }
7504             pl_yylval.ival = 1;
7505             OPERATOR(MY);
7506
7507         case KEY_next:
7508             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7509             LOOPX(OP_NEXT);
7510
7511         case KEY_ne:
7512             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7513                 return REPORT(0);
7514             Eop(OP_SNE);
7515
7516         case KEY_no:
7517             s = tokenize_use(0, s);
7518             OPERATOR(USE);
7519
7520         case KEY_not:
7521             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7522                 FUN1(OP_NOT);
7523             else {
7524                 if (!PL_lex_allbrackets &&
7525                         PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7526                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7527                 OPERATOR(NOTOP);
7528             }
7529
7530         case KEY_open:
7531             s = SKIPSPACE1(s);
7532             if (isIDFIRST_lazy_if(s,UTF)) {
7533                 const char *t;
7534                 for (d = s; isALNUM_lazy_if(d,UTF);)
7535                     d++;
7536                 for (t=d; isSPACE(*t);)
7537                     t++;
7538                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7539                     /* [perl #16184] */
7540                     && !(t[0] == '=' && t[1] == '>')
7541                 ) {
7542                     int parms_len = (int)(d-s);
7543                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7544                            "Precedence problem: open %.*s should be open(%.*s)",
7545                             parms_len, s, parms_len, s);
7546                 }
7547             }
7548             LOP(OP_OPEN,XTERM);
7549
7550         case KEY_or:
7551             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7552                 return REPORT(0);
7553             pl_yylval.ival = OP_OR;
7554             OPERATOR(OROP);
7555
7556         case KEY_ord:
7557             UNI(OP_ORD);
7558
7559         case KEY_oct:
7560             UNI(OP_OCT);
7561
7562         case KEY_opendir:
7563             LOP(OP_OPEN_DIR,XTERM);
7564
7565         case KEY_print:
7566             checkcomma(s,PL_tokenbuf,"filehandle");
7567             LOP(OP_PRINT,XREF);
7568
7569         case KEY_printf:
7570             checkcomma(s,PL_tokenbuf,"filehandle");
7571             LOP(OP_PRTF,XREF);
7572
7573         case KEY_prototype:
7574             UNI(OP_PROTOTYPE);
7575
7576         case KEY_push:
7577             LOP(OP_PUSH,XTERM);
7578
7579         case KEY_pop:
7580             UNIDOR(OP_POP);
7581
7582         case KEY_pos:
7583             UNIDOR(OP_POS);
7584
7585         case KEY_pack:
7586             LOP(OP_PACK,XTERM);
7587
7588         case KEY_package:
7589             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7590             s = SKIPSPACE1(s);
7591             s = force_strict_version(s);
7592             PL_lex_expect = XBLOCK;
7593             OPERATOR(PACKAGE);
7594
7595         case KEY_pipe:
7596             LOP(OP_PIPE_OP,XTERM);
7597
7598         case KEY_q:
7599             s = scan_str(s,!!PL_madskills,FALSE);
7600             if (!s)
7601                 missingterm(NULL);
7602             pl_yylval.ival = OP_CONST;
7603             TERM(sublex_start());
7604
7605         case KEY_quotemeta:
7606             UNI(OP_QUOTEMETA);
7607
7608         case KEY_qw: {
7609             OP *words = NULL;
7610             s = scan_str(s,!!PL_madskills,FALSE);
7611             if (!s)
7612                 missingterm(NULL);
7613             PL_expect = XOPERATOR;
7614             if (SvCUR(PL_lex_stuff)) {
7615                 int warned = 0;
7616                 d = SvPV_force(PL_lex_stuff, len);
7617                 while (len) {
7618                     for (; isSPACE(*d) && len; --len, ++d)
7619                         /**/;
7620                     if (len) {
7621                         SV *sv;
7622                         const char *b = d;
7623                         if (!warned && ckWARN(WARN_QW)) {
7624                             for (; !isSPACE(*d) && len; --len, ++d) {
7625                                 if (*d == ',') {
7626                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7627                                         "Possible attempt to separate words with commas");
7628                                     ++warned;
7629                                 }
7630                                 else if (*d == '#') {
7631                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7632                                         "Possible attempt to put comments in qw() list");
7633                                     ++warned;
7634                                 }
7635                             }
7636                         }
7637                         else {
7638                             for (; !isSPACE(*d) && len; --len, ++d)
7639                                 /**/;
7640                         }
7641                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7642                         words = op_append_elem(OP_LIST, words,
7643                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7644                     }
7645                 }
7646             }
7647             if (!words)
7648                 words = newNULLLIST();
7649             if (PL_lex_stuff) {
7650                 SvREFCNT_dec(PL_lex_stuff);
7651                 PL_lex_stuff = NULL;
7652             }
7653             PL_expect = XOPERATOR;
7654             pl_yylval.opval = sawparens(words);
7655             TOKEN(QWLIST);
7656         }
7657
7658         case KEY_qq:
7659             s = scan_str(s,!!PL_madskills,FALSE);
7660             if (!s)
7661                 missingterm(NULL);
7662             pl_yylval.ival = OP_STRINGIFY;
7663             if (SvIVX(PL_lex_stuff) == '\'')
7664                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
7665             TERM(sublex_start());
7666
7667         case KEY_qr:
7668             s = scan_pat(s,OP_QR);
7669             TERM(sublex_start());
7670
7671         case KEY_qx:
7672             s = scan_str(s,!!PL_madskills,FALSE);
7673             if (!s)
7674                 missingterm(NULL);
7675             readpipe_override();
7676             TERM(sublex_start());
7677
7678         case KEY_return:
7679             OLDLOP(OP_RETURN);
7680
7681         case KEY_require:
7682             s = SKIPSPACE1(s);
7683             if (isDIGIT(*s)) {
7684                 s = force_version(s, FALSE);
7685             }
7686             else if (*s != 'v' || !isDIGIT(s[1])
7687                     || (s = force_version(s, TRUE), *s == 'v'))
7688             {
7689                 *PL_tokenbuf = '\0';
7690                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7691                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7692                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7693                 else if (*s == '<')
7694                     yyerror("<> should be quotes");
7695             }
7696             if (orig_keyword == KEY_require) {
7697                 orig_keyword = 0;
7698                 pl_yylval.ival = 1;
7699             }
7700             else
7701                 pl_yylval.ival = 0;
7702             PL_expect = XTERM;
7703             PL_bufptr = s;
7704             PL_last_uni = PL_oldbufptr;
7705             PL_last_lop_op = OP_REQUIRE;
7706             s = skipspace(s);
7707             return REPORT( (int)REQUIRE );
7708
7709         case KEY_reset:
7710             UNI(OP_RESET);
7711
7712         case KEY_redo:
7713             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7714             LOOPX(OP_REDO);
7715
7716         case KEY_rename:
7717             LOP(OP_RENAME,XTERM);
7718
7719         case KEY_rand:
7720             UNI(OP_RAND);
7721
7722         case KEY_rmdir:
7723             UNI(OP_RMDIR);
7724
7725         case KEY_rindex:
7726             LOP(OP_RINDEX,XTERM);
7727
7728         case KEY_read:
7729             LOP(OP_READ,XTERM);
7730
7731         case KEY_readdir:
7732             UNI(OP_READDIR);
7733
7734         case KEY_readline:
7735             UNIDOR(OP_READLINE);
7736
7737         case KEY_readpipe:
7738             UNIDOR(OP_BACKTICK);
7739
7740         case KEY_rewinddir:
7741             UNI(OP_REWINDDIR);
7742
7743         case KEY_recv:
7744             LOP(OP_RECV,XTERM);
7745
7746         case KEY_reverse:
7747             LOP(OP_REVERSE,XTERM);
7748
7749         case KEY_readlink:
7750             UNIDOR(OP_READLINK);
7751
7752         case KEY_ref:
7753             UNI(OP_REF);
7754
7755         case KEY_s:
7756             s = scan_subst(s);
7757             if (pl_yylval.opval)
7758                 TERM(sublex_start());
7759             else
7760                 TOKEN(1);       /* force error */
7761
7762         case KEY_say:
7763             checkcomma(s,PL_tokenbuf,"filehandle");
7764             LOP(OP_SAY,XREF);
7765
7766         case KEY_chomp:
7767             UNI(OP_CHOMP);
7768
7769         case KEY_scalar:
7770             UNI(OP_SCALAR);
7771
7772         case KEY_select:
7773             LOP(OP_SELECT,XTERM);
7774
7775         case KEY_seek:
7776             LOP(OP_SEEK,XTERM);
7777
7778         case KEY_semctl:
7779             LOP(OP_SEMCTL,XTERM);
7780
7781         case KEY_semget:
7782             LOP(OP_SEMGET,XTERM);
7783
7784         case KEY_semop:
7785             LOP(OP_SEMOP,XTERM);
7786
7787         case KEY_send:
7788             LOP(OP_SEND,XTERM);
7789
7790         case KEY_setpgrp:
7791             LOP(OP_SETPGRP,XTERM);
7792
7793         case KEY_setpriority:
7794             LOP(OP_SETPRIORITY,XTERM);
7795
7796         case KEY_sethostent:
7797             UNI(OP_SHOSTENT);
7798
7799         case KEY_setnetent:
7800             UNI(OP_SNETENT);
7801
7802         case KEY_setservent:
7803             UNI(OP_SSERVENT);
7804
7805         case KEY_setprotoent:
7806             UNI(OP_SPROTOENT);
7807
7808         case KEY_setpwent:
7809             FUN0(OP_SPWENT);
7810
7811         case KEY_setgrent:
7812             FUN0(OP_SGRENT);
7813
7814         case KEY_seekdir:
7815             LOP(OP_SEEKDIR,XTERM);
7816
7817         case KEY_setsockopt:
7818             LOP(OP_SSOCKOPT,XTERM);
7819
7820         case KEY_shift:
7821             UNIDOR(OP_SHIFT);
7822
7823         case KEY_shmctl:
7824             LOP(OP_SHMCTL,XTERM);
7825
7826         case KEY_shmget:
7827             LOP(OP_SHMGET,XTERM);
7828
7829         case KEY_shmread:
7830             LOP(OP_SHMREAD,XTERM);
7831
7832         case KEY_shmwrite:
7833             LOP(OP_SHMWRITE,XTERM);
7834
7835         case KEY_shutdown:
7836             LOP(OP_SHUTDOWN,XTERM);
7837
7838         case KEY_sin:
7839             UNI(OP_SIN);
7840
7841         case KEY_sleep:
7842             UNI(OP_SLEEP);
7843
7844         case KEY_socket:
7845             LOP(OP_SOCKET,XTERM);
7846
7847         case KEY_socketpair:
7848             LOP(OP_SOCKPAIR,XTERM);
7849
7850         case KEY_sort:
7851             checkcomma(s,PL_tokenbuf,"subroutine name");
7852             s = SKIPSPACE1(s);
7853             if (*s == ';' || *s == ')')         /* probably a close */
7854                 Perl_croak(aTHX_ "sort is now a reserved word");
7855             PL_expect = XTERM;
7856             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7857             LOP(OP_SORT,XREF);
7858
7859         case KEY_split:
7860             LOP(OP_SPLIT,XTERM);
7861
7862         case KEY_sprintf:
7863             LOP(OP_SPRINTF,XTERM);
7864
7865         case KEY_splice:
7866             LOP(OP_SPLICE,XTERM);
7867
7868         case KEY_sqrt:
7869             UNI(OP_SQRT);
7870
7871         case KEY_srand:
7872             UNI(OP_SRAND);
7873
7874         case KEY_stat:
7875             UNI(OP_STAT);
7876
7877         case KEY_study:
7878             UNI(OP_STUDY);
7879
7880         case KEY_substr:
7881             LOP(OP_SUBSTR,XTERM);
7882
7883         case KEY_format:
7884         case KEY_sub:
7885           really_sub:
7886             {
7887                 char tmpbuf[sizeof PL_tokenbuf];
7888                 SSize_t tboffset = 0;
7889                 expectation attrful;
7890                 bool have_name, have_proto;
7891                 const int key = tmp;
7892
7893 #ifdef PERL_MAD
7894                 SV *tmpwhite = 0;
7895
7896                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7897                 SV *subtoken = newSVpvn(tstart, s - tstart);
7898                 PL_thistoken = 0;
7899
7900                 d = s;
7901                 s = SKIPSPACE2(s,tmpwhite);
7902 #else
7903                 s = skipspace(s);
7904 #endif
7905
7906                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7907                     (*s == ':' && s[1] == ':'))
7908                 {
7909 #ifdef PERL_MAD
7910                     SV *nametoke = NULL;
7911 #endif
7912
7913                     PL_expect = XBLOCK;
7914                     attrful = XATTRBLOCK;
7915                     /* remember buffer pos'n for later force_word */
7916                     tboffset = s - PL_oldbufptr;
7917                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7918 #ifdef PERL_MAD
7919                     if (PL_madskills)
7920                         nametoke = newSVpvn(s, d - s);
7921 #endif
7922                     if (memchr(tmpbuf, ':', len))
7923                         sv_setpvn(PL_subname, tmpbuf, len);
7924                     else {
7925                         sv_setsv(PL_subname,PL_curstname);
7926                         sv_catpvs(PL_subname,"::");
7927                         sv_catpvn(PL_subname,tmpbuf,len);
7928                     }
7929                     have_name = TRUE;
7930
7931 #ifdef PERL_MAD
7932
7933                     start_force(0);
7934                     CURMAD('X', nametoke);
7935                     CURMAD('_', tmpwhite);
7936                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7937                                       FALSE, TRUE, TRUE);
7938
7939                     s = SKIPSPACE2(d,tmpwhite);
7940 #else
7941                     s = skipspace(d);
7942 #endif
7943                 }
7944                 else {
7945                     if (key == KEY_my)
7946                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7947                     PL_expect = XTERMBLOCK;
7948                     attrful = XATTRTERM;
7949                     sv_setpvs(PL_subname,"?");
7950                     have_name = FALSE;
7951                 }
7952
7953                 if (key == KEY_format) {
7954                     if (*s == '=')
7955                         PL_lex_formbrack = PL_lex_brackets + 1;
7956 #ifdef PERL_MAD
7957                     PL_thistoken = subtoken;
7958                     s = d;
7959 #else
7960                     if (have_name)
7961                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7962                                           FALSE, TRUE, TRUE);
7963 #endif
7964                     OPERATOR(FORMAT);
7965                 }
7966
7967                 /* Look for a prototype */
7968                 if (*s == '(') {
7969                     char *p;
7970                     bool bad_proto = FALSE;
7971                     bool in_brackets = FALSE;
7972                     char greedy_proto = ' ';
7973                     bool proto_after_greedy_proto = FALSE;
7974                     bool must_be_last = FALSE;
7975                     bool underscore = FALSE;
7976                     bool seen_underscore = FALSE;
7977                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7978
7979                     s = scan_str(s,!!PL_madskills,FALSE);
7980                     if (!s)
7981                         Perl_croak(aTHX_ "Prototype not terminated");
7982                     /* strip spaces and check for bad characters */
7983                     d = SvPVX(PL_lex_stuff);
7984                     tmp = 0;
7985                     for (p = d; *p; ++p) {
7986                         if (!isSPACE(*p)) {
7987                             d[tmp++] = *p;
7988
7989                             if (warnillegalproto) {
7990                                 if (must_be_last)
7991                                     proto_after_greedy_proto = TRUE;
7992                                 if (!strchr("$@%*;[]&\\_+", *p)) {
7993                                     bad_proto = TRUE;
7994                                 }
7995                                 else {
7996                                     if ( underscore ) {
7997                                         if ( *p != ';' )
7998                                             bad_proto = TRUE;
7999                                         underscore = FALSE;
8000                                     }
8001                                     if ( *p == '[' ) {
8002                                         in_brackets = TRUE;
8003                                     }
8004                                     else if ( *p == ']' ) {
8005                                         in_brackets = FALSE;
8006                                     }
8007                                     else if ( (*p == '@' || *p == '%') &&
8008                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
8009                                          !in_brackets ) {
8010                                         must_be_last = TRUE;
8011                                         greedy_proto = *p;
8012                                     }
8013                                     else if ( *p == '_' ) {
8014                                         underscore = seen_underscore = TRUE;
8015                                     }
8016                                 }
8017                             }
8018                         }
8019                     }
8020                     d[tmp] = '\0';
8021                     if (proto_after_greedy_proto)
8022                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8023                                     "Prototype after '%c' for %"SVf" : %s",
8024                                     greedy_proto, SVfARG(PL_subname), d);
8025                     if (bad_proto)
8026                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8027                                     "Illegal character %sin prototype for %"SVf" : %s",
8028                                     seen_underscore ? "after '_' " : "",
8029                                     SVfARG(PL_subname), d);
8030                     SvCUR_set(PL_lex_stuff, tmp);
8031                     have_proto = TRUE;
8032
8033 #ifdef PERL_MAD
8034                     start_force(0);
8035                     CURMAD('q', PL_thisopen);
8036                     CURMAD('_', tmpwhite);
8037                     CURMAD('=', PL_thisstuff);
8038                     CURMAD('Q', PL_thisclose);
8039                     NEXTVAL_NEXTTOKE.opval =
8040                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8041                     PL_lex_stuff = NULL;
8042                     force_next(THING);
8043
8044                     s = SKIPSPACE2(s,tmpwhite);
8045 #else
8046                     s = skipspace(s);
8047 #endif
8048                 }
8049                 else
8050                     have_proto = FALSE;
8051
8052                 if (*s == ':' && s[1] != ':')
8053                     PL_expect = attrful;
8054                 else if (*s != '{' && key == KEY_sub) {
8055                     if (!have_name)
8056                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8057                     else if (*s != ';' && *s != '}')
8058                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8059                 }
8060
8061 #ifdef PERL_MAD
8062                 start_force(0);
8063                 if (tmpwhite) {
8064                     if (PL_madskills)
8065                         curmad('^', newSVpvs(""));
8066                     CURMAD('_', tmpwhite);
8067                 }
8068                 force_next(0);
8069
8070                 PL_thistoken = subtoken;
8071 #else
8072                 if (have_proto) {
8073                     NEXTVAL_NEXTTOKE.opval =
8074                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8075                     PL_lex_stuff = NULL;
8076                     force_next(THING);
8077                 }
8078 #endif
8079                 if (!have_name) {
8080                     if (PL_curstash)
8081                         sv_setpvs(PL_subname, "__ANON__");
8082                     else
8083                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8084                     TOKEN(ANONSUB);
8085                 }
8086 #ifndef PERL_MAD
8087                 (void) force_word(PL_oldbufptr + tboffset, WORD,
8088                                   FALSE, TRUE, TRUE);
8089 #endif
8090                 if (key == KEY_my)
8091                     TOKEN(MYSUB);
8092                 TOKEN(SUB);
8093             }
8094
8095         case KEY_system:
8096             LOP(OP_SYSTEM,XREF);
8097
8098         case KEY_symlink:
8099             LOP(OP_SYMLINK,XTERM);
8100
8101         case KEY_syscall:
8102             LOP(OP_SYSCALL,XTERM);
8103
8104         case KEY_sysopen:
8105             LOP(OP_SYSOPEN,XTERM);
8106
8107         case KEY_sysseek:
8108             LOP(OP_SYSSEEK,XTERM);
8109
8110         case KEY_sysread:
8111             LOP(OP_SYSREAD,XTERM);
8112
8113         case KEY_syswrite:
8114             LOP(OP_SYSWRITE,XTERM);
8115
8116         case KEY_tr:
8117             s = scan_trans(s);
8118             TERM(sublex_start());
8119
8120         case KEY_tell:
8121             UNI(OP_TELL);
8122
8123         case KEY_telldir:
8124             UNI(OP_TELLDIR);
8125
8126         case KEY_tie:
8127             LOP(OP_TIE,XTERM);
8128
8129         case KEY_tied:
8130             UNI(OP_TIED);
8131
8132         case KEY_time:
8133             FUN0(OP_TIME);
8134
8135         case KEY_times:
8136             FUN0(OP_TMS);
8137
8138         case KEY_truncate:
8139             LOP(OP_TRUNCATE,XTERM);
8140
8141         case KEY_uc:
8142             UNI(OP_UC);
8143
8144         case KEY_ucfirst:
8145             UNI(OP_UCFIRST);
8146
8147         case KEY_untie:
8148             UNI(OP_UNTIE);
8149
8150         case KEY_until:
8151             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8152                 return REPORT(0);
8153             pl_yylval.ival = CopLINE(PL_curcop);
8154             OPERATOR(UNTIL);
8155
8156         case KEY_unless:
8157             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8158                 return REPORT(0);
8159             pl_yylval.ival = CopLINE(PL_curcop);
8160             OPERATOR(UNLESS);
8161
8162         case KEY_unlink:
8163             LOP(OP_UNLINK,XTERM);
8164
8165         case KEY_undef:
8166             UNIDOR(OP_UNDEF);
8167
8168         case KEY_unpack:
8169             LOP(OP_UNPACK,XTERM);
8170
8171         case KEY_utime:
8172             LOP(OP_UTIME,XTERM);
8173
8174         case KEY_umask:
8175             UNIDOR(OP_UMASK);
8176
8177         case KEY_unshift:
8178             LOP(OP_UNSHIFT,XTERM);
8179
8180         case KEY_use:
8181             s = tokenize_use(1, s);
8182             OPERATOR(USE);
8183
8184         case KEY_values:
8185             UNI(OP_VALUES);
8186
8187         case KEY_vec:
8188             LOP(OP_VEC,XTERM);
8189
8190         case KEY_when:
8191             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8192                 return REPORT(0);
8193             pl_yylval.ival = CopLINE(PL_curcop);
8194             OPERATOR(WHEN);
8195
8196         case KEY_while:
8197             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8198                 return REPORT(0);
8199             pl_yylval.ival = CopLINE(PL_curcop);
8200             OPERATOR(WHILE);
8201
8202         case KEY_warn:
8203             PL_hints |= HINT_BLOCK_SCOPE;
8204             LOP(OP_WARN,XTERM);
8205
8206         case KEY_wait:
8207             FUN0(OP_WAIT);
8208
8209         case KEY_waitpid:
8210             LOP(OP_WAITPID,XTERM);
8211
8212         case KEY_wantarray:
8213             FUN0(OP_WANTARRAY);
8214
8215         case KEY_write:
8216 #ifdef EBCDIC
8217         {
8218             char ctl_l[2];
8219             ctl_l[0] = toCTRL('L');
8220             ctl_l[1] = '\0';
8221             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8222         }
8223 #else
8224             /* Make sure $^L is defined */
8225             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8226 #endif
8227             UNI(OP_ENTERWRITE);
8228
8229         case KEY_x:
8230             if (PL_expect == XOPERATOR) {
8231                 if (*s == '=' && !PL_lex_allbrackets &&
8232                         PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8233                     return REPORT(0);
8234                 Mop(OP_REPEAT);
8235             }
8236             check_uni();
8237             goto just_a_word;
8238
8239         case KEY_xor:
8240             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8241                 return REPORT(0);
8242             pl_yylval.ival = OP_XOR;
8243             OPERATOR(OROP);
8244
8245         case KEY_y:
8246             s = scan_trans(s);
8247             TERM(sublex_start());
8248         }
8249     }}
8250 }
8251 #ifdef __SC__
8252 #pragma segment Main
8253 #endif
8254
8255 static int
8256 S_pending_ident(pTHX)
8257 {
8258     dVAR;
8259     register char *d;
8260     PADOFFSET tmp = 0;
8261     /* pit holds the identifier we read and pending_ident is reset */
8262     char pit = PL_pending_ident;
8263     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8264     /* All routes through this function want to know if there is a colon.  */
8265     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8266     PL_pending_ident = 0;
8267
8268     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8269     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8270           "### Pending identifier '%s'\n", PL_tokenbuf); });
8271
8272     /* if we're in a my(), we can't allow dynamics here.
8273        $foo'bar has already been turned into $foo::bar, so
8274        just check for colons.
8275
8276        if it's a legal name, the OP is a PADANY.
8277     */
8278     if (PL_in_my) {
8279         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8280             if (has_colon)
8281                 yyerror(Perl_form(aTHX_ "No package name allowed for "
8282                                   "variable %s in \"our\"",
8283                                   PL_tokenbuf));
8284             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8285         }
8286         else {
8287             if (has_colon)
8288                 yyerror(Perl_form(aTHX_ PL_no_myglob,
8289                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8290
8291             pl_yylval.opval = newOP(OP_PADANY, 0);
8292             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8293             return PRIVATEREF;
8294         }
8295     }
8296
8297     /*
8298        build the ops for accesses to a my() variable.
8299
8300        Deny my($a) or my($b) in a sort block, *if* $a or $b is
8301        then used in a comparison.  This catches most, but not
8302        all cases.  For instance, it catches
8303            sort { my($a); $a <=> $b }
8304        but not
8305            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8306        (although why you'd do that is anyone's guess).
8307     */
8308
8309     if (!has_colon) {
8310         if (!PL_in_my)
8311             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8312         if (tmp != NOT_IN_PAD) {
8313             /* might be an "our" variable" */
8314             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8315                 /* build ops for a bareword */
8316                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8317                 HEK * const stashname = HvNAME_HEK(stash);
8318                 SV *  const sym = newSVhek(stashname);
8319                 sv_catpvs(sym, "::");
8320                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8321                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8322                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8323                 gv_fetchsv(sym,
8324                     (PL_in_eval
8325                         ? (GV_ADDMULTI | GV_ADDINEVAL)
8326                         : GV_ADDMULTI
8327                     ),
8328                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8329                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8330                      : SVt_PVHV));
8331                 return WORD;
8332             }
8333
8334             /* if it's a sort block and they're naming $a or $b */
8335             if (PL_last_lop_op == OP_SORT &&
8336                 PL_tokenbuf[0] == '$' &&
8337                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8338                 && !PL_tokenbuf[2])
8339             {
8340                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8341                      d < PL_bufend && *d != '\n';
8342                      d++)
8343                 {
8344                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8345                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8346                               PL_tokenbuf);
8347                     }
8348                 }
8349             }
8350
8351             pl_yylval.opval = newOP(OP_PADANY, 0);
8352             pl_yylval.opval->op_targ = tmp;
8353             return PRIVATEREF;
8354         }
8355     }
8356
8357     /*
8358        Whine if they've said @foo in a doublequoted string,
8359        and @foo isn't a variable we can find in the symbol
8360        table.
8361     */
8362     if (ckWARN(WARN_AMBIGUOUS) &&
8363         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8364         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8365                                          SVt_PVAV);
8366         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8367                 /* DO NOT warn for @- and @+ */
8368                 && !( PL_tokenbuf[2] == '\0' &&
8369                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8370            )
8371         {
8372             /* Downgraded from fatal to warning 20000522 mjd */
8373             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8374                         "Possible unintended interpolation of %s in string",
8375                         PL_tokenbuf);
8376         }
8377     }
8378
8379     /* build ops for a bareword */
8380     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8381                                                       tokenbuf_len - 1));
8382     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8383     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8384                      PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8385                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8386                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8387                       : SVt_PVHV));
8388     return WORD;
8389 }
8390
8391 /*
8392  *  The following code was generated by perl_keyword.pl.
8393  */
8394
8395 I32
8396 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
8397 {
8398     dVAR;
8399
8400     PERL_ARGS_ASSERT_KEYWORD;
8401
8402   switch (len)
8403   {
8404     case 1: /* 5 tokens of length 1 */
8405       switch (name[0])
8406       {
8407         case 'm':
8408           {                                       /* m          */
8409             return KEY_m;
8410           }
8411
8412         case 'q':
8413           {                                       /* q          */
8414             return KEY_q;
8415           }
8416
8417         case 's':
8418           {                                       /* s          */
8419             return KEY_s;
8420           }
8421
8422         case 'x':
8423           {                                       /* x          */
8424             return -KEY_x;
8425           }
8426
8427         case 'y':
8428           {                                       /* y          */
8429             return KEY_y;
8430           }
8431
8432         default:
8433           goto unknown;
8434       }
8435
8436     case 2: /* 18 tokens of length 2 */
8437       switch (name[0])
8438       {
8439         case 'd':
8440           if (name[1] == 'o')
8441           {                                       /* do         */
8442             return KEY_do;
8443           }
8444
8445           goto unknown;
8446
8447         case 'e':
8448           if (name[1] == 'q')
8449           {                                       /* eq         */
8450             return -KEY_eq;
8451           }
8452
8453           goto unknown;
8454
8455         case 'g':
8456           switch (name[1])
8457           {
8458             case 'e':
8459               {                                   /* ge         */
8460                 return -KEY_ge;
8461               }
8462
8463             case 't':
8464               {                                   /* gt         */
8465                 return -KEY_gt;
8466               }
8467
8468             default:
8469               goto unknown;
8470           }
8471
8472         case 'i':
8473           if (name[1] == 'f')
8474           {                                       /* if         */
8475             return KEY_if;
8476           }
8477
8478           goto unknown;
8479
8480         case 'l':
8481           switch (name[1])
8482           {
8483             case 'c':
8484               {                                   /* lc         */
8485                 return -KEY_lc;
8486               }
8487
8488             case 'e':
8489               {                                   /* le         */
8490                 return -KEY_le;
8491               }
8492
8493             case 't':
8494               {                                   /* lt         */
8495                 return -KEY_lt;
8496               }
8497
8498             default:
8499               goto unknown;
8500           }
8501
8502         case 'm':
8503           if (name[1] == 'y')
8504           {                                       /* my         */
8505             return KEY_my;
8506           }
8507
8508           goto unknown;
8509
8510         case 'n':
8511           switch (name[1])
8512           {
8513             case 'e':
8514               {                                   /* ne         */
8515                 return -KEY_ne;
8516               }
8517
8518             case 'o':
8519               {                                   /* no         */
8520                 return KEY_no;
8521               }
8522
8523             default:
8524               goto unknown;
8525           }
8526
8527         case 'o':
8528           if (name[1] == 'r')
8529           {                                       /* or         */
8530             return -KEY_or;
8531           }
8532
8533           goto unknown;
8534
8535         case 'q':
8536           switch (name[1])
8537           {
8538             case 'q':
8539               {                                   /* qq         */
8540                 return KEY_qq;
8541               }
8542
8543             case 'r':
8544               {                                   /* qr         */
8545                 return KEY_qr;
8546               }
8547
8548             case 'w':
8549               {                                   /* qw         */
8550                 return KEY_qw;
8551               }
8552
8553             case 'x':
8554               {                                   /* qx         */
8555                 return KEY_qx;
8556               }
8557
8558             default:
8559               goto unknown;
8560           }
8561
8562         case 't':
8563           if (name[1] == 'r')
8564           {                                       /* tr         */
8565             return KEY_tr;
8566           }
8567
8568           goto unknown;
8569
8570         case 'u':
8571           if (name[1] == 'c')
8572           {                                       /* uc         */
8573             return -KEY_uc;
8574           }
8575
8576           goto unknown;
8577
8578         default:
8579           goto unknown;
8580       }
8581
8582     case 3: /* 29 tokens of length 3 */
8583       switch (name[0])
8584       {
8585         case 'E':
8586           if (name[1] == 'N' &&
8587               name[2] == 'D')
8588           {                                       /* END        */
8589             return KEY_END;
8590           }
8591
8592           goto unknown;
8593
8594         case 'a':
8595           switch (name[1])
8596           {
8597             case 'b':
8598               if (name[2] == 's')
8599               {                                   /* abs        */
8600                 return -KEY_abs;
8601               }
8602
8603               goto unknown;
8604
8605             case 'n':
8606               if (name[2] == 'd')
8607               {                                   /* and        */
8608                 return -KEY_and;
8609               }
8610
8611               goto unknown;
8612
8613             default:
8614               goto unknown;
8615           }
8616
8617         case 'c':
8618           switch (name[1])
8619           {
8620             case 'h':
8621               if (name[2] == 'r')
8622               {                                   /* chr        */
8623                 return -KEY_chr;
8624               }
8625
8626               goto unknown;
8627
8628             case 'm':
8629               if (name[2] == 'p')
8630               {                                   /* cmp        */
8631                 return -KEY_cmp;
8632               }
8633
8634               goto unknown;
8635
8636             case 'o':
8637               if (name[2] == 's')
8638               {                                   /* cos        */
8639                 return -KEY_cos;
8640               }
8641
8642               goto unknown;
8643
8644             default:
8645               goto unknown;
8646           }
8647
8648         case 'd':
8649           if (name[1] == 'i' &&
8650               name[2] == 'e')
8651           {                                       /* die        */
8652             return -KEY_die;
8653           }
8654
8655           goto unknown;
8656
8657         case 'e':
8658           switch (name[1])
8659           {
8660             case 'o':
8661               if (name[2] == 'f')
8662               {                                   /* eof        */
8663                 return -KEY_eof;
8664               }
8665
8666               goto unknown;
8667
8668             case 'x':
8669               if (name[2] == 'p')
8670               {                                   /* exp        */
8671                 return -KEY_exp;
8672               }
8673
8674               goto unknown;
8675
8676             default:
8677               goto unknown;
8678           }
8679
8680         case 'f':
8681           if (name[1] == 'o' &&
8682               name[2] == 'r')
8683           {                                       /* for        */
8684             return KEY_for;
8685           }
8686
8687           goto unknown;
8688
8689         case 'h':
8690           if (name[1] == 'e' &&
8691               name[2] == 'x')
8692           {                                       /* hex        */
8693             return -KEY_hex;
8694           }
8695
8696           goto unknown;
8697
8698         case 'i':
8699           if (name[1] == 'n' &&
8700               name[2] == 't')
8701           {                                       /* int        */
8702             return -KEY_int;
8703           }
8704
8705           goto unknown;
8706
8707         case 'l':
8708           if (name[1] == 'o' &&
8709               name[2] == 'g')
8710           {                                       /* log        */
8711             return -KEY_log;
8712           }
8713
8714           goto unknown;
8715
8716         case 'm':
8717           if (name[1] == 'a' &&
8718               name[2] == 'p')
8719           {                                       /* map        */
8720             return KEY_map;
8721           }
8722
8723           goto unknown;
8724
8725         case 'n':
8726           if (name[1] == 'o' &&
8727               name[2] == 't')
8728           {                                       /* not        */
8729             return -KEY_not;
8730           }
8731
8732           goto unknown;
8733
8734         case 'o':
8735           switch (name[1])
8736           {
8737             case 'c':
8738               if (name[2] == 't')
8739               {                                   /* oct        */
8740                 return -KEY_oct;
8741               }
8742
8743               goto unknown;
8744
8745             case 'r':
8746               if (name[2] == 'd')
8747               {                                   /* ord        */
8748                 return -KEY_ord;
8749               }
8750
8751               goto unknown;
8752
8753             case 'u':
8754               if (name[2] == 'r')
8755               {                                   /* our        */
8756                 return KEY_our;
8757               }
8758
8759               goto unknown;
8760
8761             default:
8762               goto unknown;
8763           }
8764
8765         case 'p':
8766           if (name[1] == 'o')
8767           {
8768             switch (name[2])
8769             {
8770               case 'p':
8771                 {                                 /* pop        */
8772                   return -KEY_pop;
8773                 }
8774
8775               case 's':
8776                 {                                 /* pos        */
8777                   return KEY_pos;
8778                 }
8779
8780               default:
8781                 goto unknown;
8782             }
8783           }
8784
8785           goto unknown;
8786
8787         case 'r':
8788           if (name[1] == 'e' &&
8789               name[2] == 'f')
8790           {                                       /* ref        */
8791             return -KEY_ref;
8792           }
8793
8794           goto unknown;
8795
8796         case 's':
8797           switch (name[1])
8798           {
8799             case 'a':
8800               if (name[2] == 'y')
8801               {                                   /* say        */
8802                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8803               }
8804
8805               goto unknown;
8806
8807             case 'i':
8808               if (name[2] == 'n')
8809               {                                   /* sin        */
8810                 return -KEY_sin;
8811               }
8812
8813               goto unknown;
8814
8815             case 'u':
8816               if (name[2] == 'b')
8817               {                                   /* sub        */
8818                 return KEY_sub;
8819               }
8820
8821               goto unknown;
8822
8823             default:
8824               goto unknown;
8825           }
8826
8827         case 't':
8828           if (name[1] == 'i' &&
8829               name[2] == 'e')
8830           {                                       /* tie        */
8831             return -KEY_tie;
8832           }
8833
8834           goto unknown;
8835
8836         case 'u':
8837           if (name[1] == 's' &&
8838               name[2] == 'e')
8839           {                                       /* use        */
8840             return KEY_use;
8841           }
8842
8843           goto unknown;
8844
8845         case 'v':
8846           if (name[1] == 'e' &&
8847               name[2] == 'c')
8848           {                                       /* vec        */
8849             return -KEY_vec;
8850           }
8851
8852           goto unknown;
8853
8854         case 'x':
8855           if (name[1] == 'o' &&
8856               name[2] == 'r')
8857           {                                       /* xor        */
8858             return -KEY_xor;
8859           }
8860
8861           goto unknown;
8862
8863         default:
8864           goto unknown;
8865       }
8866
8867     case 4: /* 41 tokens of length 4 */
8868       switch (name[0])
8869       {
8870         case 'C':
8871           if (name[1] == 'O' &&
8872               name[2] == 'R' &&
8873               name[3] == 'E')
8874           {                                       /* CORE       */
8875             return -KEY_CORE;
8876           }
8877
8878           goto unknown;
8879
8880         case 'I':
8881           if (name[1] == 'N' &&
8882               name[2] == 'I' &&
8883               name[3] == 'T')
8884           {                                       /* INIT       */
8885             return KEY_INIT;
8886           }
8887
8888           goto unknown;
8889
8890         case 'b':
8891           if (name[1] == 'i' &&
8892               name[2] == 'n' &&
8893               name[3] == 'd')
8894           {                                       /* bind       */
8895             return -KEY_bind;
8896           }
8897
8898           goto unknown;
8899
8900         case 'c':
8901           if (name[1] == 'h' &&
8902               name[2] == 'o' &&
8903               name[3] == 'p')
8904           {                                       /* chop       */
8905             return -KEY_chop;
8906           }
8907
8908           goto unknown;
8909
8910         case 'd':
8911           if (name[1] == 'u' &&
8912               name[2] == 'm' &&
8913               name[3] == 'p')
8914           {                                       /* dump       */
8915             return -KEY_dump;
8916           }
8917
8918           goto unknown;
8919
8920         case 'e':
8921           switch (name[1])
8922           {
8923             case 'a':
8924               if (name[2] == 'c' &&
8925                   name[3] == 'h')
8926               {                                   /* each       */
8927                 return -KEY_each;
8928               }
8929
8930               goto unknown;
8931
8932             case 'l':
8933               if (name[2] == 's' &&
8934                   name[3] == 'e')
8935               {                                   /* else       */
8936                 return KEY_else;
8937               }
8938
8939               goto unknown;
8940
8941             case 'v':
8942               if (name[2] == 'a' &&
8943                   name[3] == 'l')
8944               {                                   /* eval       */
8945                 return KEY_eval;
8946               }
8947
8948               goto unknown;
8949
8950             case 'x':
8951               switch (name[2])
8952               {
8953                 case 'e':
8954                   if (name[3] == 'c')
8955                   {                               /* exec       */
8956                     return -KEY_exec;
8957                   }
8958
8959                   goto unknown;
8960
8961                 case 'i':
8962                   if (name[3] == 't')
8963                   {                               /* exit       */
8964                     return -KEY_exit;
8965                   }
8966
8967                   goto unknown;
8968
8969                 default:
8970                   goto unknown;
8971               }
8972
8973             default:
8974               goto unknown;
8975           }
8976
8977         case 'f':
8978           if (name[1] == 'o' &&
8979               name[2] == 'r' &&
8980               name[3] == 'k')
8981           {                                       /* fork       */
8982             return -KEY_fork;
8983           }
8984
8985           goto unknown;
8986
8987         case 'g':
8988           switch (name[1])
8989           {
8990             case 'e':
8991               if (name[2] == 't' &&
8992                   name[3] == 'c')
8993               {                                   /* getc       */
8994                 return -KEY_getc;
8995               }
8996
8997               goto unknown;
8998
8999             case 'l':
9000               if (name[2] == 'o' &&
9001                   name[3] == 'b')
9002               {                                   /* glob       */
9003                 return KEY_glob;
9004               }
9005
9006               goto unknown;
9007
9008             case 'o':
9009               if (name[2] == 't' &&
9010                   name[3] == 'o')
9011               {                                   /* goto       */
9012                 return KEY_goto;
9013               }
9014
9015               goto unknown;
9016
9017             case 'r':
9018               if (name[2] == 'e' &&
9019                   name[3] == 'p')
9020               {                                   /* grep       */
9021                 return KEY_grep;
9022               }
9023
9024               goto unknown;
9025
9026             default:
9027               goto unknown;
9028           }
9029
9030         case 'j':
9031           if (name[1] == 'o' &&
9032               name[2] == 'i' &&
9033               name[3] == 'n')
9034           {                                       /* join       */
9035             return -KEY_join;
9036           }
9037
9038           goto unknown;
9039
9040         case 'k':
9041           switch (name[1])
9042           {
9043             case 'e':
9044               if (name[2] == 'y' &&
9045                   name[3] == 's')
9046               {                                   /* keys       */
9047                 return -KEY_keys;
9048               }
9049
9050               goto unknown;
9051
9052             case 'i':
9053               if (name[2] == 'l' &&
9054                   name[3] == 'l')
9055               {                                   /* kill       */
9056                 return -KEY_kill;
9057               }
9058
9059               goto unknown;
9060
9061             default:
9062               goto unknown;
9063           }
9064
9065         case 'l':
9066           switch (name[1])
9067           {
9068             case 'a':
9069               if (name[2] == 's' &&
9070                   name[3] == 't')
9071               {                                   /* last       */
9072                 return KEY_last;
9073               }
9074
9075               goto unknown;
9076
9077             case 'i':
9078               if (name[2] == 'n' &&
9079                   name[3] == 'k')
9080               {                                   /* link       */
9081                 return -KEY_link;
9082               }
9083
9084               goto unknown;
9085
9086             case 'o':
9087               if (name[2] == 'c' &&
9088                   name[3] == 'k')
9089               {                                   /* lock       */
9090                 return -KEY_lock;
9091               }
9092
9093               goto unknown;
9094
9095             default:
9096               goto unknown;
9097           }
9098
9099         case 'n':
9100           if (name[1] == 'e' &&
9101               name[2] == 'x' &&
9102               name[3] == 't')
9103           {                                       /* next       */
9104             return KEY_next;
9105           }
9106
9107           goto unknown;
9108
9109         case 'o':
9110           if (name[1] == 'p' &&
9111               name[2] == 'e' &&
9112               name[3] == 'n')
9113           {                                       /* open       */
9114             return -KEY_open;
9115           }
9116
9117           goto unknown;
9118
9119         case 'p':
9120           switch (name[1])
9121           {
9122             case 'a':
9123               if (name[2] == 'c' &&
9124                   name[3] == 'k')
9125               {                                   /* pack       */
9126                 return -KEY_pack;
9127               }
9128
9129               goto unknown;
9130
9131             case 'i':
9132               if (name[2] == 'p' &&
9133                   name[3] == 'e')
9134               {                                   /* pipe       */
9135                 return -KEY_pipe;
9136               }
9137
9138               goto unknown;
9139
9140             case 'u':
9141               if (name[2] == 's' &&
9142                   name[3] == 'h')
9143               {                                   /* push       */
9144                 return -KEY_push;
9145               }
9146
9147               goto unknown;
9148
9149             default:
9150               goto unknown;
9151           }
9152
9153         case 'r':
9154           switch (name[1])
9155           {
9156             case 'a':
9157               if (name[2] == 'n' &&
9158                   name[3] == 'd')
9159               {                                   /* rand       */
9160                 return -KEY_rand;
9161               }
9162
9163               goto unknown;
9164
9165             case 'e':
9166               switch (name[2])
9167               {
9168                 case 'a':
9169                   if (name[3] == 'd')
9170                   {                               /* read       */
9171                     return -KEY_read;
9172                   }
9173
9174                   goto unknown;
9175
9176                 case 'c':
9177                   if (name[3] == 'v')
9178                   {                               /* recv       */
9179                     return -KEY_recv;
9180                   }
9181
9182                   goto unknown;
9183
9184                 case 'd':
9185                   if (name[3] == 'o')
9186                   {                               /* redo       */
9187                     return KEY_redo;
9188                   }
9189
9190                   goto unknown;
9191
9192                 default:
9193                   goto unknown;
9194               }
9195
9196             default:
9197               goto unknown;
9198           }
9199
9200         case 's':
9201           switch (name[1])
9202           {
9203             case 'e':
9204               switch (name[2])
9205               {
9206                 case 'e':
9207                   if (name[3] == 'k')
9208                   {                               /* seek       */
9209                     return -KEY_seek;
9210                   }
9211
9212                   goto unknown;
9213
9214                 case 'n':
9215                   if (name[3] == 'd')
9216                   {                               /* send       */
9217                     return -KEY_send;
9218                   }
9219
9220                   goto unknown;
9221
9222                 default:
9223                   goto unknown;
9224               }
9225
9226             case 'o':
9227               if (name[2] == 'r' &&
9228                   name[3] == 't')
9229               {                                   /* sort       */
9230                 return KEY_sort;
9231               }
9232
9233               goto unknown;
9234
9235             case 'q':
9236               if (name[2] == 'r' &&
9237                   name[3] == 't')
9238               {                                   /* sqrt       */
9239                 return -KEY_sqrt;
9240               }
9241
9242               goto unknown;
9243
9244             case 't':
9245               if (name[2] == 'a' &&
9246                   name[3] == 't')
9247               {                                   /* stat       */
9248                 return -KEY_stat;
9249               }
9250
9251               goto unknown;
9252
9253             default:
9254               goto unknown;
9255           }
9256
9257         case 't':
9258           switch (name[1])
9259           {
9260             case 'e':
9261               if (name[2] == 'l' &&
9262                   name[3] == 'l')
9263               {                                   /* tell       */
9264                 return -KEY_tell;
9265               }
9266
9267               goto unknown;
9268
9269             case 'i':
9270               switch (name[2])
9271               {
9272                 case 'e':
9273                   if (name[3] == 'd')
9274                   {                               /* tied       */
9275                     return -KEY_tied;
9276                   }
9277
9278                   goto unknown;
9279
9280                 case 'm':
9281                   if (name[3] == 'e')
9282                   {                               /* time       */
9283                     return -KEY_time;
9284                   }
9285
9286                   goto unknown;
9287
9288                 default:
9289                   goto unknown;
9290               }
9291
9292             default:
9293               goto unknown;
9294           }
9295
9296         case 'w':
9297           switch (name[1])
9298           {
9299             case 'a':
9300               switch (name[2])
9301               {
9302                 case 'i':
9303                   if (name[3] == 't')
9304                   {                               /* wait       */
9305                     return -KEY_wait;
9306                   }
9307
9308                   goto unknown;
9309
9310                 case 'r':
9311                   if (name[3] == 'n')
9312                   {                               /* warn       */
9313                     return -KEY_warn;
9314                   }
9315
9316                   goto unknown;
9317
9318                 default:
9319                   goto unknown;
9320               }
9321
9322             case 'h':
9323               if (name[2] == 'e' &&
9324                   name[3] == 'n')
9325               {                                   /* when       */
9326                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
9327               }
9328
9329               goto unknown;
9330
9331             default:
9332               goto unknown;
9333           }
9334
9335         default:
9336           goto unknown;
9337       }
9338
9339     case 5: /* 39 tokens of length 5 */
9340       switch (name[0])
9341       {
9342         case 'B':
9343           if (name[1] == 'E' &&
9344               name[2] == 'G' &&
9345               name[3] == 'I' &&
9346               name[4] == 'N')
9347           {                                       /* BEGIN      */
9348             return KEY_BEGIN;
9349           }
9350
9351           goto unknown;
9352
9353         case 'C':
9354           if (name[1] == 'H' &&
9355               name[2] == 'E' &&
9356               name[3] == 'C' &&
9357               name[4] == 'K')
9358           {                                       /* CHECK      */
9359             return KEY_CHECK;
9360           }
9361
9362           goto unknown;
9363
9364         case 'a':
9365           switch (name[1])
9366           {
9367             case 'l':
9368               if (name[2] == 'a' &&
9369                   name[3] == 'r' &&
9370                   name[4] == 'm')
9371               {                                   /* alarm      */
9372                 return -KEY_alarm;
9373               }
9374
9375               goto unknown;
9376
9377             case 't':
9378               if (name[2] == 'a' &&
9379                   name[3] == 'n' &&
9380                   name[4] == '2')
9381               {                                   /* atan2      */
9382                 return -KEY_atan2;
9383               }
9384
9385               goto unknown;
9386
9387             default:
9388               goto unknown;
9389           }
9390
9391         case 'b':
9392           switch (name[1])
9393           {
9394             case 'l':
9395               if (name[2] == 'e' &&
9396                   name[3] == 's' &&
9397                   name[4] == 's')
9398               {                                   /* bless      */
9399                 return -KEY_bless;
9400               }
9401
9402               goto unknown;
9403
9404             case 'r':
9405               if (name[2] == 'e' &&
9406                   name[3] == 'a' &&
9407                   name[4] == 'k')
9408               {                                   /* break      */
9409                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
9410               }
9411
9412               goto unknown;
9413
9414             default:
9415               goto unknown;
9416           }
9417
9418         case 'c':
9419           switch (name[1])
9420           {
9421             case 'h':
9422               switch (name[2])
9423               {
9424                 case 'd':
9425                   if (name[3] == 'i' &&
9426                       name[4] == 'r')
9427                   {                               /* chdir      */
9428                     return -KEY_chdir;
9429                   }
9430
9431                   goto unknown;
9432
9433                 case 'm':
9434                   if (name[3] == 'o' &&
9435                       name[4] == 'd')
9436                   {                               /* chmod      */
9437                     return -KEY_chmod;
9438                   }
9439
9440                   goto unknown;
9441
9442                 case 'o':
9443                   switch (name[3])
9444                   {
9445                     case 'm':
9446                       if (name[4] == 'p')
9447                       {                           /* chomp      */
9448                         return -KEY_chomp;
9449                       }
9450
9451                       goto unknown;
9452
9453                     case 'w':
9454                       if (name[4] == 'n')
9455                       {                           /* chown      */
9456                         return -KEY_chown;
9457                       }
9458
9459                       goto unknown;
9460
9461                     default:
9462                       goto unknown;
9463                   }
9464
9465                 default:
9466                   goto unknown;
9467               }
9468
9469             case 'l':
9470               if (name[2] == 'o' &&
9471                   name[3] == 's' &&
9472                   name[4] == 'e')
9473               {                                   /* close      */
9474                 return -KEY_close;
9475               }
9476
9477               goto unknown;
9478
9479             case 'r':
9480               if (name[2] == 'y' &&
9481                   name[3] == 'p' &&
9482                   name[4] == 't')
9483               {                                   /* crypt      */
9484                 return -KEY_crypt;
9485               }
9486
9487               goto unknown;
9488
9489             default:
9490               goto unknown;
9491           }
9492
9493         case 'e':
9494           if (name[1] == 'l' &&
9495               name[2] == 's' &&
9496               name[3] == 'i' &&
9497               name[4] == 'f')
9498           {                                       /* elsif      */
9499             return KEY_elsif;
9500           }
9501
9502           goto unknown;
9503
9504         case 'f':
9505           switch (name[1])
9506           {
9507             case 'c':
9508               if (name[2] == 'n' &&
9509                   name[3] == 't' &&
9510                   name[4] == 'l')
9511               {                                   /* fcntl      */
9512                 return -KEY_fcntl;
9513               }
9514
9515               goto unknown;
9516
9517             case 'l':
9518               if (name[2] == 'o' &&
9519                   name[3] == 'c' &&
9520                   name[4] == 'k')
9521               {                                   /* flock      */
9522                 return -KEY_flock;
9523               }
9524
9525               goto unknown;
9526
9527             default:
9528               goto unknown;
9529           }
9530
9531         case 'g':
9532           if (name[1] == 'i' &&
9533               name[2] == 'v' &&
9534               name[3] == 'e' &&
9535               name[4] == 'n')
9536           {                                       /* given      */
9537             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
9538           }
9539
9540           goto unknown;
9541
9542         case 'i':
9543           switch (name[1])
9544           {
9545             case 'n':
9546               if (name[2] == 'd' &&
9547                   name[3] == 'e' &&
9548                   name[4] == 'x')
9549               {                                   /* index      */
9550                 return -KEY_index;
9551               }
9552
9553               goto unknown;
9554
9555             case 'o':
9556               if (name[2] == 'c' &&
9557                   name[3] == 't' &&
9558                   name[4] == 'l')
9559               {                                   /* ioctl      */
9560                 return -KEY_ioctl;
9561               }
9562
9563               goto unknown;
9564
9565             default:
9566               goto unknown;
9567           }
9568
9569         case 'l':
9570           switch (name[1])
9571           {
9572             case 'o':
9573               if (name[2] == 'c' &&
9574                   name[3] == 'a' &&
9575                   name[4] == 'l')
9576               {                                   /* local      */
9577                 return KEY_local;
9578               }
9579
9580               goto unknown;
9581
9582             case 's':
9583               if (name[2] == 't' &&
9584                   name[3] == 'a' &&
9585                   name[4] == 't')
9586               {                                   /* lstat      */
9587                 return -KEY_lstat;
9588               }
9589
9590               goto unknown;
9591
9592             default:
9593               goto unknown;
9594           }
9595
9596         case 'm':
9597           if (name[1] == 'k' &&
9598               name[2] == 'd' &&
9599               name[3] == 'i' &&
9600               name[4] == 'r')
9601           {                                       /* mkdir      */
9602             return -KEY_mkdir;
9603           }
9604
9605           goto unknown;
9606
9607         case 'p':
9608           if (name[1] == 'r' &&
9609               name[2] == 'i' &&
9610               name[3] == 'n' &&
9611               name[4] == 't')
9612           {                                       /* print      */
9613             return KEY_print;
9614           }
9615
9616           goto unknown;
9617
9618         case 'r':
9619           switch (name[1])
9620           {
9621             case 'e':
9622               if (name[2] == 's' &&
9623                   name[3] == 'e' &&
9624                   name[4] == 't')
9625               {                                   /* reset      */
9626                 return -KEY_reset;
9627               }
9628
9629               goto unknown;
9630
9631             case 'm':
9632               if (name[2] == 'd' &&
9633                   name[3] == 'i' &&
9634                   name[4] == 'r')
9635               {                                   /* rmdir      */
9636                 return -KEY_rmdir;
9637               }
9638
9639               goto unknown;
9640
9641             default:
9642               goto unknown;
9643           }
9644
9645         case 's':
9646           switch (name[1])
9647           {
9648             case 'e':
9649               if (name[2] == 'm' &&
9650                   name[3] == 'o' &&
9651                   name[4] == 'p')
9652               {                                   /* semop      */
9653                 return -KEY_semop;
9654               }
9655
9656               goto unknown;
9657
9658             case 'h':
9659               if (name[2] == 'i' &&
9660                   name[3] == 'f' &&
9661                   name[4] == 't')
9662               {                                   /* shift      */
9663                 return -KEY_shift;
9664               }
9665
9666               goto unknown;
9667
9668             case 'l':
9669               if (name[2] == 'e' &&
9670                   name[3] == 'e' &&
9671                   name[4] == 'p')
9672               {                                   /* sleep      */
9673                 return -KEY_sleep;
9674               }
9675
9676               goto unknown;
9677
9678             case 'p':
9679               if (name[2] == 'l' &&
9680                   name[3] == 'i' &&
9681                   name[4] == 't')
9682               {                                   /* split      */
9683                 return KEY_split;
9684               }
9685
9686               goto unknown;
9687
9688             case 'r':
9689               if (name[2] == 'a' &&
9690                   name[3] == 'n' &&
9691                   name[4] == 'd')
9692               {                                   /* srand      */
9693                 return -KEY_srand;
9694               }
9695
9696               goto unknown;
9697
9698             case 't':
9699               switch (name[2])
9700               {
9701                 case 'a':
9702                   if (name[3] == 't' &&
9703                       name[4] == 'e')
9704                   {                               /* state      */
9705                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9706                   }
9707
9708                   goto unknown;
9709
9710                 case 'u':
9711                   if (name[3] == 'd' &&
9712                       name[4] == 'y')
9713                   {                               /* study      */
9714                     return KEY_study;
9715                   }
9716
9717                   goto unknown;
9718
9719                 default:
9720                   goto unknown;
9721               }
9722
9723             default:
9724               goto unknown;
9725           }
9726
9727         case 't':
9728           if (name[1] == 'i' &&
9729               name[2] == 'm' &&
9730               name[3] == 'e' &&
9731               name[4] == 's')
9732           {                                       /* times      */
9733             return -KEY_times;
9734           }
9735
9736           goto unknown;
9737
9738         case 'u':
9739           switch (name[1])
9740           {
9741             case 'm':
9742               if (name[2] == 'a' &&
9743                   name[3] == 's' &&
9744                   name[4] == 'k')
9745               {                                   /* umask      */
9746                 return -KEY_umask;
9747               }
9748
9749               goto unknown;
9750
9751             case 'n':
9752               switch (name[2])
9753               {
9754                 case 'd':
9755                   if (name[3] == 'e' &&
9756                       name[4] == 'f')
9757                   {                               /* undef      */
9758                     return KEY_undef;
9759                   }
9760
9761                   goto unknown;
9762
9763                 case 't':
9764                   if (name[3] == 'i')
9765                   {
9766                     switch (name[4])
9767                     {
9768                       case 'e':
9769                         {                         /* untie      */
9770                           return -KEY_untie;
9771                         }
9772
9773                       case 'l':
9774                         {                         /* until      */
9775                           return KEY_until;
9776                         }
9777
9778                       default:
9779                         goto unknown;
9780                     }
9781                   }
9782
9783                   goto unknown;
9784
9785                 default:
9786                   goto unknown;
9787               }
9788
9789             case 't':
9790               if (name[2] == 'i' &&
9791                   name[3] == 'm' &&
9792                   name[4] == 'e')
9793               {                                   /* utime      */
9794                 return -KEY_utime;
9795               }
9796
9797               goto unknown;
9798
9799             default:
9800               goto unknown;
9801           }
9802
9803         case 'w':
9804           switch (name[1])
9805           {
9806             case 'h':
9807               if (name[2] == 'i' &&
9808                   name[3] == 'l' &&
9809                   name[4] == 'e')
9810               {                                   /* while      */
9811                 return KEY_while;
9812               }
9813
9814               goto unknown;
9815
9816             case 'r':
9817               if (name[2] == 'i' &&
9818                   name[3] == 't' &&
9819                   name[4] == 'e')
9820               {                                   /* write      */
9821                 return -KEY_write;
9822               }
9823
9824               goto unknown;
9825
9826             default:
9827               goto unknown;
9828           }
9829
9830         default:
9831           goto unknown;
9832       }
9833
9834     case 6: /* 33 tokens of length 6 */
9835       switch (name[0])
9836       {
9837         case 'a':
9838           if (name[1] == 'c' &&
9839               name[2] == 'c' &&
9840               name[3] == 'e' &&
9841               name[4] == 'p' &&
9842               name[5] == 't')
9843           {                                       /* accept     */
9844             return -KEY_accept;
9845           }
9846
9847           goto unknown;
9848
9849         case 'c':
9850           switch (name[1])
9851           {
9852             case 'a':
9853               if (name[2] == 'l' &&
9854                   name[3] == 'l' &&
9855                   name[4] == 'e' &&
9856                   name[5] == 'r')
9857               {                                   /* caller     */
9858                 return -KEY_caller;
9859               }
9860
9861               goto unknown;
9862
9863             case 'h':
9864               if (name[2] == 'r' &&
9865                   name[3] == 'o' &&
9866                   name[4] == 'o' &&
9867                   name[5] == 't')
9868               {                                   /* chroot     */
9869                 return -KEY_chroot;
9870               }
9871
9872               goto unknown;
9873
9874             default:
9875               goto unknown;
9876           }
9877
9878         case 'd':
9879           if (name[1] == 'e' &&
9880               name[2] == 'l' &&
9881               name[3] == 'e' &&
9882               name[4] == 't' &&
9883               name[5] == 'e')
9884           {                                       /* delete     */
9885             return KEY_delete;
9886           }
9887
9888           goto unknown;
9889
9890         case 'e':
9891           switch (name[1])
9892           {
9893             case 'l':
9894               if (name[2] == 's' &&
9895                   name[3] == 'e' &&
9896                   name[4] == 'i' &&
9897                   name[5] == 'f')
9898               {                                   /* elseif     */
9899                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9900               }
9901
9902               goto unknown;
9903
9904             case 'x':
9905               if (name[2] == 'i' &&
9906                   name[3] == 's' &&
9907                   name[4] == 't' &&
9908                   name[5] == 's')
9909               {                                   /* exists     */
9910                 return KEY_exists;
9911               }
9912
9913               goto unknown;
9914
9915             default:
9916               goto unknown;
9917           }
9918
9919         case 'f':
9920           switch (name[1])
9921           {
9922             case 'i':
9923               if (name[2] == 'l' &&
9924                   name[3] == 'e' &&
9925                   name[4] == 'n' &&
9926                   name[5] == 'o')
9927               {                                   /* fileno     */
9928                 return -KEY_fileno;
9929               }
9930
9931               goto unknown;
9932
9933             case 'o':
9934               if (name[2] == 'r' &&
9935                   name[3] == 'm' &&
9936                   name[4] == 'a' &&
9937                   name[5] == 't')
9938               {                                   /* format     */
9939                 return KEY_format;
9940               }
9941
9942               goto unknown;
9943
9944             default:
9945               goto unknown;
9946           }
9947
9948         case 'g':
9949           if (name[1] == 'm' &&
9950               name[2] == 't' &&
9951               name[3] == 'i' &&
9952               name[4] == 'm' &&
9953               name[5] == 'e')
9954           {                                       /* gmtime     */
9955             return -KEY_gmtime;
9956           }
9957
9958           goto unknown;
9959
9960         case 'l':
9961           switch (name[1])
9962           {
9963             case 'e':
9964               if (name[2] == 'n' &&
9965                   name[3] == 'g' &&
9966                   name[4] == 't' &&
9967                   name[5] == 'h')
9968               {                                   /* length     */
9969                 return -KEY_length;
9970               }
9971
9972               goto unknown;
9973
9974             case 'i':
9975               if (name[2] == 's' &&
9976                   name[3] == 't' &&
9977                   name[4] == 'e' &&
9978                   name[5] == 'n')
9979               {                                   /* listen     */
9980                 return -KEY_listen;
9981               }
9982
9983               goto unknown;
9984
9985             default:
9986               goto unknown;
9987           }
9988
9989         case 'm':
9990           if (name[1] == 's' &&
9991               name[2] == 'g')
9992           {
9993             switch (name[3])
9994             {
9995               case 'c':
9996                 if (name[4] == 't' &&
9997                     name[5] == 'l')
9998                 {                                 /* msgctl     */
9999                   return -KEY_msgctl;
10000                 }
10001
10002                 goto unknown;
10003
10004               case 'g':
10005                 if (name[4] == 'e' &&
10006                     name[5] == 't')
10007                 {                                 /* msgget     */
10008                   return -KEY_msgget;
10009                 }
10010
10011                 goto unknown;
10012
10013               case 'r':
10014                 if (name[4] == 'c' &&
10015                     name[5] == 'v')
10016                 {                                 /* msgrcv     */
10017                   return -KEY_msgrcv;
10018                 }
10019
10020                 goto unknown;
10021
10022               case 's':
10023                 if (name[4] == 'n' &&
10024                     name[5] == 'd')
10025                 {                                 /* msgsnd     */
10026                   return -KEY_msgsnd;
10027                 }
10028
10029                 goto unknown;
10030
10031               default:
10032                 goto unknown;
10033             }
10034           }
10035
10036           goto unknown;
10037
10038         case 'p':
10039           if (name[1] == 'r' &&
10040               name[2] == 'i' &&
10041               name[3] == 'n' &&
10042               name[4] == 't' &&
10043               name[5] == 'f')
10044           {                                       /* printf     */
10045             return KEY_printf;
10046           }
10047
10048           goto unknown;
10049
10050         case 'r':
10051           switch (name[1])
10052           {
10053             case 'e':
10054               switch (name[2])
10055               {
10056                 case 'n':
10057                   if (name[3] == 'a' &&
10058                       name[4] == 'm' &&
10059                       name[5] == 'e')
10060                   {                               /* rename     */
10061                     return -KEY_rename;
10062                   }
10063
10064                   goto unknown;
10065
10066                 case 't':
10067                   if (name[3] == 'u' &&
10068                       name[4] == 'r' &&
10069                       name[5] == 'n')
10070                   {                               /* return     */
10071                     return KEY_return;
10072                   }
10073
10074                   goto unknown;
10075
10076                 default:
10077                   goto unknown;
10078               }
10079
10080             case 'i':
10081               if (name[2] == 'n' &&
10082                   name[3] == 'd' &&
10083                   name[4] == 'e' &&
10084                   name[5] == 'x')
10085               {                                   /* rindex     */
10086                 return -KEY_rindex;
10087               }
10088
10089               goto unknown;
10090
10091             default:
10092               goto unknown;
10093           }
10094
10095         case 's':
10096           switch (name[1])
10097           {
10098             case 'c':
10099               if (name[2] == 'a' &&
10100                   name[3] == 'l' &&
10101                   name[4] == 'a' &&
10102                   name[5] == 'r')
10103               {                                   /* scalar     */
10104                 return KEY_scalar;
10105               }
10106
10107               goto unknown;
10108
10109             case 'e':
10110               switch (name[2])
10111               {
10112                 case 'l':
10113                   if (name[3] == 'e' &&
10114                       name[4] == 'c' &&
10115                       name[5] == 't')
10116                   {                               /* select     */
10117                     return -KEY_select;
10118                   }
10119
10120                   goto unknown;
10121
10122                 case 'm':
10123                   switch (name[3])
10124                   {
10125                     case 'c':
10126                       if (name[4] == 't' &&
10127                           name[5] == 'l')
10128                       {                           /* semctl     */
10129                         return -KEY_semctl;
10130                       }
10131
10132                       goto unknown;
10133
10134                     case 'g':
10135                       if (name[4] == 'e' &&
10136                           name[5] == 't')
10137                       {                           /* semget     */
10138                         return -KEY_semget;
10139                       }
10140
10141                       goto unknown;
10142
10143                     default:
10144                       goto unknown;
10145                   }
10146
10147                 default:
10148                   goto unknown;
10149               }
10150
10151             case 'h':
10152               if (name[2] == 'm')
10153               {
10154                 switch (name[3])
10155                 {
10156                   case 'c':
10157                     if (name[4] == 't' &&
10158                         name[5] == 'l')
10159                     {                             /* shmctl     */
10160                       return -KEY_shmctl;
10161                     }
10162
10163                     goto unknown;
10164
10165                   case 'g':
10166                     if (name[4] == 'e' &&
10167                         name[5] == 't')
10168                     {                             /* shmget     */
10169                       return -KEY_shmget;
10170                     }
10171
10172                     goto unknown;
10173
10174                   default:
10175                     goto unknown;
10176                 }
10177               }
10178
10179               goto unknown;
10180
10181             case 'o':
10182               if (name[2] == 'c' &&
10183                   name[3] == 'k' &&
10184                   name[4] == 'e' &&
10185                   name[5] == 't')
10186               {                                   /* socket     */
10187                 return -KEY_socket;
10188               }
10189
10190               goto unknown;
10191
10192             case 'p':
10193               if (name[2] == 'l' &&
10194                   name[3] == 'i' &&
10195                   name[4] == 'c' &&
10196                   name[5] == 'e')
10197               {                                   /* splice     */
10198                 return -KEY_splice;
10199               }
10200
10201               goto unknown;
10202
10203             case 'u':
10204               if (name[2] == 'b' &&
10205                   name[3] == 's' &&
10206                   name[4] == 't' &&
10207                   name[5] == 'r')
10208               {                                   /* substr     */
10209                 return -KEY_substr;
10210               }
10211
10212               goto unknown;
10213
10214             case 'y':
10215               if (name[2] == 's' &&
10216                   name[3] == 't' &&
10217                   name[4] == 'e' &&
10218                   name[5] == 'm')
10219               {                                   /* system     */
10220                 return -KEY_system;
10221               }
10222
10223               goto unknown;
10224
10225             default:
10226               goto unknown;
10227           }
10228
10229         case 'u':
10230           if (name[1] == 'n')
10231           {
10232             switch (name[2])
10233             {
10234               case 'l':
10235                 switch (name[3])
10236                 {
10237                   case 'e':
10238                     if (name[4] == 's' &&
10239                         name[5] == 's')
10240                     {                             /* unless     */
10241                       return KEY_unless;
10242                     }
10243
10244                     goto unknown;
10245
10246                   case 'i':
10247                     if (name[4] == 'n' &&
10248                         name[5] == 'k')
10249                     {                             /* unlink     */
10250                       return -KEY_unlink;
10251                     }
10252
10253                     goto unknown;
10254
10255                   default:
10256                     goto unknown;
10257                 }
10258
10259               case 'p':
10260                 if (name[3] == 'a' &&
10261                     name[4] == 'c' &&
10262                     name[5] == 'k')
10263                 {                                 /* unpack     */
10264                   return -KEY_unpack;
10265                 }
10266
10267                 goto unknown;
10268
10269               default:
10270                 goto unknown;
10271             }
10272           }
10273
10274           goto unknown;
10275
10276         case 'v':
10277           if (name[1] == 'a' &&
10278               name[2] == 'l' &&
10279               name[3] == 'u' &&
10280               name[4] == 'e' &&
10281               name[5] == 's')
10282           {                                       /* values     */
10283             return -KEY_values;
10284           }
10285
10286           goto unknown;
10287
10288         default:
10289           goto unknown;
10290       }
10291
10292     case 7: /* 29 tokens of length 7 */
10293       switch (name[0])
10294       {
10295         case 'D':
10296           if (name[1] == 'E' &&
10297               name[2] == 'S' &&
10298               name[3] == 'T' &&
10299               name[4] == 'R' &&
10300               name[5] == 'O' &&
10301               name[6] == 'Y')
10302           {                                       /* DESTROY    */
10303             return KEY_DESTROY;
10304           }
10305
10306           goto unknown;
10307
10308         case '_':
10309           if (name[1] == '_' &&
10310               name[2] == 'E' &&
10311               name[3] == 'N' &&
10312               name[4] == 'D' &&
10313               name[5] == '_' &&
10314               name[6] == '_')
10315           {                                       /* __END__    */
10316             return KEY___END__;
10317           }
10318
10319           goto unknown;
10320
10321         case 'b':
10322           if (name[1] == 'i' &&
10323               name[2] == 'n' &&
10324               name[3] == 'm' &&
10325               name[4] == 'o' &&
10326               name[5] == 'd' &&
10327               name[6] == 'e')
10328           {                                       /* binmode    */
10329             return -KEY_binmode;
10330           }
10331
10332           goto unknown;
10333
10334         case 'c':
10335           if (name[1] == 'o' &&
10336               name[2] == 'n' &&
10337               name[3] == 'n' &&
10338               name[4] == 'e' &&
10339               name[5] == 'c' &&
10340               name[6] == 't')
10341           {                                       /* connect    */
10342             return -KEY_connect;
10343           }
10344
10345           goto unknown;
10346
10347         case 'd':
10348           switch (name[1])
10349           {
10350             case 'b':
10351               if (name[2] == 'm' &&
10352                   name[3] == 'o' &&
10353                   name[4] == 'p' &&
10354                   name[5] == 'e' &&
10355                   name[6] == 'n')
10356               {                                   /* dbmopen    */
10357                 return -KEY_dbmopen;
10358               }
10359
10360               goto unknown;
10361
10362             case 'e':
10363               if (name[2] == 'f')
10364               {
10365                 switch (name[3])
10366                 {
10367                   case 'a':
10368                     if (name[4] == 'u' &&
10369                         name[5] == 'l' &&
10370                         name[6] == 't')
10371                     {                             /* default    */
10372                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
10373                     }
10374
10375                     goto unknown;
10376
10377                   case 'i':
10378                     if (name[4] == 'n' &&
10379                         name[5] == 'e' &&
10380                         name[6] == 'd')
10381                     {                             /* defined    */
10382                       return KEY_defined;
10383                     }
10384
10385                     goto unknown;
10386
10387                   default:
10388                     goto unknown;
10389                 }
10390               }
10391
10392               goto unknown;
10393
10394             default:
10395               goto unknown;
10396           }
10397
10398         case 'f':
10399           if (name[1] == 'o' &&
10400               name[2] == 'r' &&
10401               name[3] == 'e' &&
10402               name[4] == 'a' &&
10403               name[5] == 'c' &&
10404               name[6] == 'h')
10405           {                                       /* foreach    */
10406             return KEY_foreach;
10407           }
10408
10409           goto unknown;
10410
10411         case 'g':
10412           if (name[1] == 'e' &&
10413               name[2] == 't' &&
10414               name[3] == 'p')
10415           {
10416             switch (name[4])
10417             {
10418               case 'g':
10419                 if (name[5] == 'r' &&
10420                     name[6] == 'p')
10421                 {                                 /* getpgrp    */
10422                   return -KEY_getpgrp;
10423                 }
10424
10425                 goto unknown;
10426
10427               case 'p':
10428                 if (name[5] == 'i' &&
10429                     name[6] == 'd')
10430                 {                                 /* getppid    */
10431                   return -KEY_getppid;
10432                 }
10433
10434                 goto unknown;
10435
10436               default:
10437                 goto unknown;
10438             }
10439           }
10440
10441           goto unknown;
10442
10443         case 'l':
10444           if (name[1] == 'c' &&
10445               name[2] == 'f' &&
10446               name[3] == 'i' &&
10447               name[4] == 'r' &&
10448               name[5] == 's' &&
10449               name[6] == 't')
10450           {                                       /* lcfirst    */
10451             return -KEY_lcfirst;
10452           }
10453
10454           goto unknown;
10455
10456         case 'o':
10457           if (name[1] == 'p' &&
10458               name[2] == 'e' &&
10459               name[3] == 'n' &&
10460               name[4] == 'd' &&
10461               name[5] == 'i' &&
10462               name[6] == 'r')
10463           {                                       /* opendir    */
10464             return -KEY_opendir;
10465           }
10466
10467           goto unknown;
10468
10469         case 'p':
10470           if (name[1] == 'a' &&
10471               name[2] == 'c' &&
10472               name[3] == 'k' &&
10473               name[4] == 'a' &&
10474               name[5] == 'g' &&
10475               name[6] == 'e')
10476           {                                       /* package    */
10477             return KEY_package;
10478           }
10479
10480           goto unknown;
10481
10482         case 'r':
10483           if (name[1] == 'e')
10484           {
10485             switch (name[2])
10486             {
10487               case 'a':
10488                 if (name[3] == 'd' &&
10489                     name[4] == 'd' &&
10490                     name[5] == 'i' &&
10491                     name[6] == 'r')
10492                 {                                 /* readdir    */
10493                   return -KEY_readdir;
10494                 }
10495
10496                 goto unknown;
10497
10498               case 'q':
10499                 if (name[3] == 'u' &&
10500                     name[4] == 'i' &&
10501                     name[5] == 'r' &&
10502                     name[6] == 'e')
10503                 {                                 /* require    */
10504                   return KEY_require;
10505                 }
10506
10507                 goto unknown;
10508
10509               case 'v':
10510                 if (name[3] == 'e' &&
10511                     name[4] == 'r' &&
10512                     name[5] == 's' &&
10513                     name[6] == 'e')
10514                 {                                 /* reverse    */
10515                   return -KEY_reverse;
10516                 }
10517
10518                 goto unknown;
10519
10520               default:
10521                 goto unknown;
10522             }
10523           }
10524
10525           goto unknown;
10526
10527         case 's':
10528           switch (name[1])
10529           {
10530             case 'e':
10531               switch (name[2])
10532               {
10533                 case 'e':
10534                   if (name[3] == 'k' &&
10535                       name[4] == 'd' &&
10536                       name[5] == 'i' &&
10537                       name[6] == 'r')
10538                   {                               /* seekdir    */
10539                     return -KEY_seekdir;
10540                   }
10541
10542                   goto unknown;
10543
10544                 case 't':
10545                   if (name[3] == 'p' &&
10546                       name[4] == 'g' &&
10547                       name[5] == 'r' &&
10548                       name[6] == 'p')
10549                   {                               /* setpgrp    */
10550                     return -KEY_setpgrp;
10551                   }
10552
10553                   goto unknown;
10554
10555                 default:
10556                   goto unknown;
10557               }
10558
10559             case 'h':
10560               if (name[2] == 'm' &&
10561                   name[3] == 'r' &&
10562                   name[4] == 'e' &&
10563                   name[5] == 'a' &&
10564                   name[6] == 'd')
10565               {                                   /* shmread    */
10566                 return -KEY_shmread;
10567               }
10568
10569               goto unknown;
10570
10571             case 'p':
10572               if (name[2] == 'r' &&
10573                   name[3] == 'i' &&
10574                   name[4] == 'n' &&
10575                   name[5] == 't' &&
10576                   name[6] == 'f')
10577               {                                   /* sprintf    */
10578                 return -KEY_sprintf;
10579               }
10580
10581               goto unknown;
10582
10583             case 'y':
10584               switch (name[2])
10585               {
10586                 case 'm':
10587                   if (name[3] == 'l' &&
10588                       name[4] == 'i' &&
10589                       name[5] == 'n' &&
10590                       name[6] == 'k')
10591                   {                               /* symlink    */
10592                     return -KEY_symlink;
10593                   }
10594
10595                   goto unknown;
10596
10597                 case 's':
10598                   switch (name[3])
10599                   {
10600                     case 'c':
10601                       if (name[4] == 'a' &&
10602                           name[5] == 'l' &&
10603                           name[6] == 'l')
10604                       {                           /* syscall    */
10605                         return -KEY_syscall;
10606                       }
10607
10608                       goto unknown;
10609
10610                     case 'o':
10611                       if (name[4] == 'p' &&
10612                           name[5] == 'e' &&
10613                           name[6] == 'n')
10614                       {                           /* sysopen    */
10615                         return -KEY_sysopen;
10616                       }
10617
10618                       goto unknown;
10619
10620                     case 'r':
10621                       if (name[4] == 'e' &&
10622                           name[5] == 'a' &&
10623                           name[6] == 'd')
10624                       {                           /* sysread    */
10625                         return -KEY_sysread;
10626                       }
10627
10628                       goto unknown;
10629
10630                     case 's':
10631                       if (name[4] == 'e' &&
10632                           name[5] == 'e' &&
10633                           name[6] == 'k')
10634                       {                           /* sysseek    */
10635                         return -KEY_sysseek;
10636                       }
10637
10638                       goto unknown;
10639
10640                     default:
10641                       goto unknown;
10642                   }
10643
10644                 default:
10645                   goto unknown;
10646               }
10647
10648             default:
10649               goto unknown;
10650           }
10651
10652         case 't':
10653           if (name[1] == 'e' &&
10654               name[2] == 'l' &&
10655               name[3] == 'l' &&
10656               name[4] == 'd' &&
10657               name[5] == 'i' &&
10658               name[6] == 'r')
10659           {                                       /* telldir    */
10660             return -KEY_telldir;
10661           }
10662
10663           goto unknown;
10664
10665         case 'u':
10666           switch (name[1])
10667           {
10668             case 'c':
10669               if (name[2] == 'f' &&
10670                   name[3] == 'i' &&
10671                   name[4] == 'r' &&
10672                   name[5] == 's' &&
10673                   name[6] == 't')
10674               {                                   /* ucfirst    */
10675                 return -KEY_ucfirst;
10676               }
10677
10678               goto unknown;
10679
10680             case 'n':
10681               if (name[2] == 's' &&
10682                   name[3] == 'h' &&
10683                   name[4] == 'i' &&
10684                   name[5] == 'f' &&
10685                   name[6] == 't')
10686               {                                   /* unshift    */
10687                 return -KEY_unshift;
10688               }
10689
10690               goto unknown;
10691
10692             default:
10693               goto unknown;
10694           }
10695
10696         case 'w':
10697           if (name[1] == 'a' &&
10698               name[2] == 'i' &&
10699               name[3] == 't' &&
10700               name[4] == 'p' &&
10701               name[5] == 'i' &&
10702               name[6] == 'd')
10703           {                                       /* waitpid    */
10704             return -KEY_waitpid;
10705           }
10706
10707           goto unknown;
10708
10709         default:
10710           goto unknown;
10711       }
10712
10713     case 8: /* 26 tokens of length 8 */
10714       switch (name[0])
10715       {
10716         case 'A':
10717           if (name[1] == 'U' &&
10718               name[2] == 'T' &&
10719               name[3] == 'O' &&
10720               name[4] == 'L' &&
10721               name[5] == 'O' &&
10722               name[6] == 'A' &&
10723               name[7] == 'D')
10724           {                                       /* AUTOLOAD   */
10725             return KEY_AUTOLOAD;
10726           }
10727
10728           goto unknown;
10729
10730         case '_':
10731           if (name[1] == '_')
10732           {
10733             switch (name[2])
10734             {
10735               case 'D':
10736                 if (name[3] == 'A' &&
10737                     name[4] == 'T' &&
10738                     name[5] == 'A' &&
10739                     name[6] == '_' &&
10740                     name[7] == '_')
10741                 {                                 /* __DATA__   */
10742                   return KEY___DATA__;
10743                 }
10744
10745                 goto unknown;
10746
10747               case 'F':
10748                 if (name[3] == 'I' &&
10749                     name[4] == 'L' &&
10750                     name[5] == 'E' &&
10751                     name[6] == '_' &&
10752                     name[7] == '_')
10753                 {                                 /* __FILE__   */
10754                   return -KEY___FILE__;
10755                 }
10756
10757                 goto unknown;
10758
10759               case 'L':
10760                 if (name[3] == 'I' &&
10761                     name[4] == 'N' &&
10762                     name[5] == 'E' &&
10763                     name[6] == '_' &&
10764                     name[7] == '_')
10765                 {                                 /* __LINE__   */
10766                   return -KEY___LINE__;
10767                 }
10768
10769                 goto unknown;
10770
10771               default:
10772                 goto unknown;
10773             }
10774           }
10775
10776           goto unknown;
10777
10778         case 'c':
10779           switch (name[1])
10780           {
10781             case 'l':
10782               if (name[2] == 'o' &&
10783                   name[3] == 's' &&
10784                   name[4] == 'e' &&
10785                   name[5] == 'd' &&
10786                   name[6] == 'i' &&
10787                   name[7] == 'r')
10788               {                                   /* closedir   */
10789                 return -KEY_closedir;
10790               }
10791
10792               goto unknown;
10793
10794             case 'o':
10795               if (name[2] == 'n' &&
10796                   name[3] == 't' &&
10797                   name[4] == 'i' &&
10798                   name[5] == 'n' &&
10799                   name[6] == 'u' &&
10800                   name[7] == 'e')
10801               {                                   /* continue   */
10802                 return -KEY_continue;
10803               }
10804
10805               goto unknown;
10806
10807             default:
10808               goto unknown;
10809           }
10810
10811         case 'd':
10812           if (name[1] == 'b' &&
10813               name[2] == 'm' &&
10814               name[3] == 'c' &&
10815               name[4] == 'l' &&
10816               name[5] == 'o' &&
10817               name[6] == 's' &&
10818               name[7] == 'e')
10819           {                                       /* dbmclose   */
10820             return -KEY_dbmclose;
10821           }
10822
10823           goto unknown;
10824
10825         case 'e':
10826           if (name[1] == 'n' &&
10827               name[2] == 'd')
10828           {
10829             switch (name[3])
10830             {
10831               case 'g':
10832                 if (name[4] == 'r' &&
10833                     name[5] == 'e' &&
10834                     name[6] == 'n' &&
10835                     name[7] == 't')
10836                 {                                 /* endgrent   */
10837                   return -KEY_endgrent;
10838                 }
10839
10840                 goto unknown;
10841
10842               case 'p':
10843                 if (name[4] == 'w' &&
10844                     name[5] == 'e' &&
10845                     name[6] == 'n' &&
10846                     name[7] == 't')
10847                 {                                 /* endpwent   */
10848                   return -KEY_endpwent;
10849                 }
10850
10851                 goto unknown;
10852
10853               default:
10854                 goto unknown;
10855             }
10856           }
10857
10858           goto unknown;
10859
10860         case 'f':
10861           if (name[1] == 'o' &&
10862               name[2] == 'r' &&
10863               name[3] == 'm' &&
10864               name[4] == 'l' &&
10865               name[5] == 'i' &&
10866               name[6] == 'n' &&
10867               name[7] == 'e')
10868           {                                       /* formline   */
10869             return -KEY_formline;
10870           }
10871
10872           goto unknown;
10873
10874         case 'g':
10875           if (name[1] == 'e' &&
10876               name[2] == 't')
10877           {
10878             switch (name[3])
10879             {
10880               case 'g':
10881                 if (name[4] == 'r')
10882                 {
10883                   switch (name[5])
10884                   {
10885                     case 'e':
10886                       if (name[6] == 'n' &&
10887                           name[7] == 't')
10888                       {                           /* getgrent   */
10889                         return -KEY_getgrent;
10890                       }
10891
10892                       goto unknown;
10893
10894                     case 'g':
10895                       if (name[6] == 'i' &&
10896                           name[7] == 'd')
10897                       {                           /* getgrgid   */
10898                         return -KEY_getgrgid;
10899                       }
10900
10901                       goto unknown;
10902
10903                     case 'n':
10904                       if (name[6] == 'a' &&
10905                           name[7] == 'm')
10906                       {                           /* getgrnam   */
10907                         return -KEY_getgrnam;
10908                       }
10909
10910                       goto unknown;
10911
10912                     default:
10913                       goto unknown;
10914                   }
10915                 }
10916
10917                 goto unknown;
10918
10919               case 'l':
10920                 if (name[4] == 'o' &&
10921                     name[5] == 'g' &&
10922                     name[6] == 'i' &&
10923                     name[7] == 'n')
10924                 {                                 /* getlogin   */
10925                   return -KEY_getlogin;
10926                 }
10927
10928                 goto unknown;
10929
10930               case 'p':
10931                 if (name[4] == 'w')
10932                 {
10933                   switch (name[5])
10934                   {
10935                     case 'e':
10936                       if (name[6] == 'n' &&
10937                           name[7] == 't')
10938                       {                           /* getpwent   */
10939                         return -KEY_getpwent;
10940                       }
10941
10942                       goto unknown;
10943
10944                     case 'n':
10945                       if (name[6] == 'a' &&
10946                           name[7] == 'm')
10947                       {                           /* getpwnam   */
10948                         return -KEY_getpwnam;
10949                       }
10950
10951                       goto unknown;
10952
10953                     case 'u':
10954                       if (name[6] == 'i' &&
10955                           name[7] == 'd')
10956                       {                           /* getpwuid   */
10957                         return -KEY_getpwuid;
10958                       }
10959
10960                       goto unknown;
10961
10962                     default:
10963                       goto unknown;
10964                   }
10965                 }
10966
10967                 goto unknown;
10968
10969               default:
10970                 goto unknown;
10971             }
10972           }
10973
10974           goto unknown;
10975
10976         case 'r':
10977           if (name[1] == 'e' &&
10978               name[2] == 'a' &&
10979               name[3] == 'd')
10980           {
10981             switch (name[4])
10982             {
10983               case 'l':
10984                 if (name[5] == 'i' &&
10985                     name[6] == 'n')
10986                 {
10987                   switch (name[7])
10988                   {
10989                     case 'e':
10990                       {                           /* readline   */
10991                         return -KEY_readline;
10992                       }
10993
10994                     case 'k':
10995                       {                           /* readlink   */
10996                         return -KEY_readlink;
10997                       }
10998
10999                     default:
11000                       goto unknown;
11001                   }
11002                 }
11003
11004                 goto unknown;
11005
11006               case 'p':
11007                 if (name[5] == 'i' &&
11008                     name[6] == 'p' &&
11009                     name[7] == 'e')
11010                 {                                 /* readpipe   */
11011                   return -KEY_readpipe;
11012                 }
11013
11014                 goto unknown;
11015
11016               default:
11017                 goto unknown;
11018             }
11019           }
11020
11021           goto unknown;
11022
11023         case 's':
11024           switch (name[1])
11025           {
11026             case 'e':
11027               if (name[2] == 't')
11028               {
11029                 switch (name[3])
11030                 {
11031                   case 'g':
11032                     if (name[4] == 'r' &&
11033                         name[5] == 'e' &&
11034                         name[6] == 'n' &&
11035                         name[7] == 't')
11036                     {                             /* setgrent   */
11037                       return -KEY_setgrent;
11038                     }
11039
11040                     goto unknown;
11041
11042                   case 'p':
11043                     if (name[4] == 'w' &&
11044                         name[5] == 'e' &&
11045                         name[6] == 'n' &&
11046                         name[7] == 't')
11047                     {                             /* setpwent   */
11048                       return -KEY_setpwent;
11049                     }
11050
11051                     goto unknown;
11052
11053                   default:
11054                     goto unknown;
11055                 }
11056               }
11057
11058               goto unknown;
11059
11060             case 'h':
11061               switch (name[2])
11062               {
11063                 case 'm':
11064                   if (name[3] == 'w' &&
11065                       name[4] == 'r' &&
11066                       name[5] == 'i' &&
11067                       name[6] == 't' &&
11068                       name[7] == 'e')
11069                   {                               /* shmwrite   */
11070                     return -KEY_shmwrite;
11071                   }
11072
11073                   goto unknown;
11074
11075                 case 'u':
11076                   if (name[3] == 't' &&
11077                       name[4] == 'd' &&
11078                       name[5] == 'o' &&
11079                       name[6] == 'w' &&
11080                       name[7] == 'n')
11081                   {                               /* shutdown   */
11082                     return -KEY_shutdown;
11083                   }
11084
11085                   goto unknown;
11086
11087                 default:
11088                   goto unknown;
11089               }
11090
11091             case 'y':
11092               if (name[2] == 's' &&
11093                   name[3] == 'w' &&
11094                   name[4] == 'r' &&
11095                   name[5] == 'i' &&
11096                   name[6] == 't' &&
11097                   name[7] == 'e')
11098               {                                   /* syswrite   */
11099                 return -KEY_syswrite;
11100               }
11101
11102               goto unknown;
11103
11104             default:
11105               goto unknown;
11106           }
11107
11108         case 't':
11109           if (name[1] == 'r' &&
11110               name[2] == 'u' &&
11111               name[3] == 'n' &&
11112               name[4] == 'c' &&
11113               name[5] == 'a' &&
11114               name[6] == 't' &&
11115               name[7] == 'e')
11116           {                                       /* truncate   */
11117             return -KEY_truncate;
11118           }
11119
11120           goto unknown;
11121
11122         default:
11123           goto unknown;
11124       }
11125
11126     case 9: /* 9 tokens of length 9 */
11127       switch (name[0])
11128       {
11129         case 'U':
11130           if (name[1] == 'N' &&
11131               name[2] == 'I' &&
11132               name[3] == 'T' &&
11133               name[4] == 'C' &&
11134               name[5] == 'H' &&
11135               name[6] == 'E' &&
11136               name[7] == 'C' &&
11137               name[8] == 'K')
11138           {                                       /* UNITCHECK  */
11139             return KEY_UNITCHECK;
11140           }
11141
11142           goto unknown;
11143
11144         case 'e':
11145           if (name[1] == 'n' &&
11146               name[2] == 'd' &&
11147               name[3] == 'n' &&
11148               name[4] == 'e' &&
11149               name[5] == 't' &&
11150               name[6] == 'e' &&
11151               name[7] == 'n' &&
11152               name[8] == 't')
11153           {                                       /* endnetent  */
11154             return -KEY_endnetent;
11155           }
11156
11157           goto unknown;
11158
11159         case 'g':
11160           if (name[1] == 'e' &&
11161               name[2] == 't' &&
11162               name[3] == 'n' &&
11163               name[4] == 'e' &&
11164               name[5] == 't' &&
11165               name[6] == 'e' &&
11166               name[7] == 'n' &&
11167               name[8] == 't')
11168           {                                       /* getnetent  */
11169             return -KEY_getnetent;
11170           }
11171
11172           goto unknown;
11173
11174         case 'l':
11175           if (name[1] == 'o' &&
11176               name[2] == 'c' &&
11177               name[3] == 'a' &&
11178               name[4] == 'l' &&
11179               name[5] == 't' &&
11180               name[6] == 'i' &&
11181               name[7] == 'm' &&
11182               name[8] == 'e')
11183           {                                       /* localtime  */
11184             return -KEY_localtime;
11185           }
11186
11187           goto unknown;
11188
11189         case 'p':
11190           if (name[1] == 'r' &&
11191               name[2] == 'o' &&
11192               name[3] == 't' &&
11193               name[4] == 'o' &&
11194               name[5] == 't' &&
11195               name[6] == 'y' &&
11196               name[7] == 'p' &&
11197               name[8] == 'e')
11198           {                                       /* prototype  */
11199             return KEY_prototype;
11200           }
11201
11202           goto unknown;
11203
11204         case 'q':
11205           if (name[1] == 'u' &&
11206               name[2] == 'o' &&
11207               name[3] == 't' &&
11208               name[4] == 'e' &&
11209               name[5] == 'm' &&
11210               name[6] == 'e' &&
11211               name[7] == 't' &&
11212               name[8] == 'a')
11213           {                                       /* quotemeta  */
11214             return -KEY_quotemeta;
11215           }
11216
11217           goto unknown;
11218
11219         case 'r':
11220           if (name[1] == 'e' &&
11221               name[2] == 'w' &&
11222               name[3] == 'i' &&
11223               name[4] == 'n' &&
11224               name[5] == 'd' &&
11225               name[6] == 'd' &&
11226               name[7] == 'i' &&
11227               name[8] == 'r')
11228           {                                       /* rewinddir  */
11229             return -KEY_rewinddir;
11230           }
11231
11232           goto unknown;
11233
11234         case 's':
11235           if (name[1] == 'e' &&
11236               name[2] == 't' &&
11237               name[3] == 'n' &&
11238               name[4] == 'e' &&
11239               name[5] == 't' &&
11240               name[6] == 'e' &&
11241               name[7] == 'n' &&
11242               name[8] == 't')
11243           {                                       /* setnetent  */
11244             return -KEY_setnetent;
11245           }
11246
11247           goto unknown;
11248
11249         case 'w':
11250           if (name[1] == 'a' &&
11251               name[2] == 'n' &&
11252               name[3] == 't' &&
11253               name[4] == 'a' &&
11254               name[5] == 'r' &&
11255               name[6] == 'r' &&
11256               name[7] == 'a' &&
11257               name[8] == 'y')
11258           {                                       /* wantarray  */
11259             return -KEY_wantarray;
11260           }
11261
11262           goto unknown;
11263
11264         default:
11265           goto unknown;
11266       }
11267
11268     case 10: /* 9 tokens of length 10 */
11269       switch (name[0])
11270       {
11271         case 'e':
11272           if (name[1] == 'n' &&
11273               name[2] == 'd')
11274           {
11275             switch (name[3])
11276             {
11277               case 'h':
11278                 if (name[4] == 'o' &&
11279                     name[5] == 's' &&
11280                     name[6] == 't' &&
11281                     name[7] == 'e' &&
11282                     name[8] == 'n' &&
11283                     name[9] == 't')
11284                 {                                 /* endhostent */
11285                   return -KEY_endhostent;
11286                 }
11287
11288                 goto unknown;
11289
11290               case 's':
11291                 if (name[4] == 'e' &&
11292                     name[5] == 'r' &&
11293                     name[6] == 'v' &&
11294                     name[7] == 'e' &&
11295                     name[8] == 'n' &&
11296                     name[9] == 't')
11297                 {                                 /* endservent */
11298                   return -KEY_endservent;
11299                 }
11300
11301                 goto unknown;
11302
11303               default:
11304                 goto unknown;
11305             }
11306           }
11307
11308           goto unknown;
11309
11310         case 'g':
11311           if (name[1] == 'e' &&
11312               name[2] == 't')
11313           {
11314             switch (name[3])
11315             {
11316               case 'h':
11317                 if (name[4] == 'o' &&
11318                     name[5] == 's' &&
11319                     name[6] == 't' &&
11320                     name[7] == 'e' &&
11321                     name[8] == 'n' &&
11322                     name[9] == 't')
11323                 {                                 /* gethostent */
11324                   return -KEY_gethostent;
11325                 }
11326
11327                 goto unknown;
11328
11329               case 's':
11330                 switch (name[4])
11331                 {
11332                   case 'e':
11333                     if (name[5] == 'r' &&
11334                         name[6] == 'v' &&
11335                         name[7] == 'e' &&
11336                         name[8] == 'n' &&
11337                         name[9] == 't')
11338                     {                             /* getservent */
11339                       return -KEY_getservent;
11340                     }
11341
11342                     goto unknown;
11343
11344                   case 'o':
11345                     if (name[5] == 'c' &&
11346                         name[6] == 'k' &&
11347                         name[7] == 'o' &&
11348                         name[8] == 'p' &&
11349                         name[9] == 't')
11350                     {                             /* getsockopt */
11351                       return -KEY_getsockopt;
11352                     }
11353
11354                     goto unknown;
11355
11356                   default:
11357                     goto unknown;
11358                 }
11359
11360               default:
11361                 goto unknown;
11362             }
11363           }
11364
11365           goto unknown;
11366
11367         case 's':
11368           switch (name[1])
11369           {
11370             case 'e':
11371               if (name[2] == 't')
11372               {
11373                 switch (name[3])
11374                 {
11375                   case 'h':
11376                     if (name[4] == 'o' &&
11377                         name[5] == 's' &&
11378                         name[6] == 't' &&
11379                         name[7] == 'e' &&
11380                         name[8] == 'n' &&
11381                         name[9] == 't')
11382                     {                             /* sethostent */
11383                       return -KEY_sethostent;
11384                     }
11385
11386                     goto unknown;
11387
11388                   case 's':
11389                     switch (name[4])
11390                     {
11391                       case 'e':
11392                         if (name[5] == 'r' &&
11393                             name[6] == 'v' &&
11394                             name[7] == 'e' &&
11395                             name[8] == 'n' &&
11396                             name[9] == 't')
11397                         {                         /* setservent */
11398                           return -KEY_setservent;
11399                         }
11400
11401                         goto unknown;
11402
11403                       case 'o':
11404                         if (name[5] == 'c' &&
11405                             name[6] == 'k' &&
11406                             name[7] == 'o' &&
11407                             name[8] == 'p' &&
11408                             name[9] == 't')
11409                         {                         /* setsockopt */
11410                           return -KEY_setsockopt;
11411                         }
11412
11413                         goto unknown;
11414
11415                       default:
11416                         goto unknown;
11417                     }
11418
11419                   default:
11420                     goto unknown;
11421                 }
11422               }
11423
11424               goto unknown;
11425
11426             case 'o':
11427               if (name[2] == 'c' &&
11428                   name[3] == 'k' &&
11429                   name[4] == 'e' &&
11430                   name[5] == 't' &&
11431                   name[6] == 'p' &&
11432                   name[7] == 'a' &&
11433                   name[8] == 'i' &&
11434                   name[9] == 'r')
11435               {                                   /* socketpair */
11436                 return -KEY_socketpair;
11437               }
11438
11439               goto unknown;
11440
11441             default:
11442               goto unknown;
11443           }
11444
11445         default:
11446           goto unknown;
11447       }
11448
11449     case 11: /* 8 tokens of length 11 */
11450       switch (name[0])
11451       {
11452         case '_':
11453           if (name[1] == '_' &&
11454               name[2] == 'P' &&
11455               name[3] == 'A' &&
11456               name[4] == 'C' &&
11457               name[5] == 'K' &&
11458               name[6] == 'A' &&
11459               name[7] == 'G' &&
11460               name[8] == 'E' &&
11461               name[9] == '_' &&
11462               name[10] == '_')
11463           {                                       /* __PACKAGE__ */
11464             return -KEY___PACKAGE__;
11465           }
11466
11467           goto unknown;
11468
11469         case 'e':
11470           if (name[1] == 'n' &&
11471               name[2] == 'd' &&
11472               name[3] == 'p' &&
11473               name[4] == 'r' &&
11474               name[5] == 'o' &&
11475               name[6] == 't' &&
11476               name[7] == 'o' &&
11477               name[8] == 'e' &&
11478               name[9] == 'n' &&
11479               name[10] == 't')
11480           {                                       /* endprotoent */
11481             return -KEY_endprotoent;
11482           }
11483
11484           goto unknown;
11485
11486         case 'g':
11487           if (name[1] == 'e' &&
11488               name[2] == 't')
11489           {
11490             switch (name[3])
11491             {
11492               case 'p':
11493                 switch (name[4])
11494                 {
11495                   case 'e':
11496                     if (name[5] == 'e' &&
11497                         name[6] == 'r' &&
11498                         name[7] == 'n' &&
11499                         name[8] == 'a' &&
11500                         name[9] == 'm' &&
11501                         name[10] == 'e')
11502                     {                             /* getpeername */
11503                       return -KEY_getpeername;
11504                     }
11505
11506                     goto unknown;
11507
11508                   case 'r':
11509                     switch (name[5])
11510                     {
11511                       case 'i':
11512                         if (name[6] == 'o' &&
11513                             name[7] == 'r' &&
11514                             name[8] == 'i' &&
11515                             name[9] == 't' &&
11516                             name[10] == 'y')
11517                         {                         /* getpriority */
11518                           return -KEY_getpriority;
11519                         }
11520
11521                         goto unknown;
11522
11523                       case 'o':
11524                         if (name[6] == 't' &&
11525                             name[7] == 'o' &&
11526                             name[8] == 'e' &&
11527                             name[9] == 'n' &&
11528                             name[10] == 't')
11529                         {                         /* getprotoent */
11530                           return -KEY_getprotoent;
11531                         }
11532
11533                         goto unknown;
11534
11535                       default:
11536                         goto unknown;
11537                     }
11538
11539                   default:
11540                     goto unknown;
11541                 }
11542
11543               case 's':
11544                 if (name[4] == 'o' &&
11545                     name[5] == 'c' &&
11546                     name[6] == 'k' &&
11547                     name[7] == 'n' &&
11548                     name[8] == 'a' &&
11549                     name[9] == 'm' &&
11550                     name[10] == 'e')
11551                 {                                 /* getsockname */
11552                   return -KEY_getsockname;
11553                 }
11554
11555                 goto unknown;
11556
11557               default:
11558                 goto unknown;
11559             }
11560           }
11561
11562           goto unknown;
11563
11564         case 's':
11565           if (name[1] == 'e' &&
11566               name[2] == 't' &&
11567               name[3] == 'p' &&
11568               name[4] == 'r')
11569           {
11570             switch (name[5])
11571             {
11572               case 'i':
11573                 if (name[6] == 'o' &&
11574                     name[7] == 'r' &&
11575                     name[8] == 'i' &&
11576                     name[9] == 't' &&
11577                     name[10] == 'y')
11578                 {                                 /* setpriority */
11579                   return -KEY_setpriority;
11580                 }
11581
11582                 goto unknown;
11583
11584               case 'o':
11585                 if (name[6] == 't' &&
11586                     name[7] == 'o' &&
11587                     name[8] == 'e' &&
11588                     name[9] == 'n' &&
11589                     name[10] == 't')
11590                 {                                 /* setprotoent */
11591                   return -KEY_setprotoent;
11592                 }
11593
11594                 goto unknown;
11595
11596               default:
11597                 goto unknown;
11598             }
11599           }
11600
11601           goto unknown;
11602
11603         default:
11604           goto unknown;
11605       }
11606
11607     case 12: /* 2 tokens of length 12 */
11608       if (name[0] == 'g' &&
11609           name[1] == 'e' &&
11610           name[2] == 't' &&
11611           name[3] == 'n' &&
11612           name[4] == 'e' &&
11613           name[5] == 't' &&
11614           name[6] == 'b' &&
11615           name[7] == 'y')
11616       {
11617         switch (name[8])
11618         {
11619           case 'a':
11620             if (name[9] == 'd' &&
11621                 name[10] == 'd' &&
11622                 name[11] == 'r')
11623             {                                     /* getnetbyaddr */
11624               return -KEY_getnetbyaddr;
11625             }
11626
11627             goto unknown;
11628
11629           case 'n':
11630             if (name[9] == 'a' &&
11631                 name[10] == 'm' &&
11632                 name[11] == 'e')
11633             {                                     /* getnetbyname */
11634               return -KEY_getnetbyname;
11635             }
11636
11637             goto unknown;
11638
11639           default:
11640             goto unknown;
11641         }
11642       }
11643
11644       goto unknown;
11645
11646     case 13: /* 4 tokens of length 13 */
11647       if (name[0] == 'g' &&
11648           name[1] == 'e' &&
11649           name[2] == 't')
11650       {
11651         switch (name[3])
11652         {
11653           case 'h':
11654             if (name[4] == 'o' &&
11655                 name[5] == 's' &&
11656                 name[6] == 't' &&
11657                 name[7] == 'b' &&
11658                 name[8] == 'y')
11659             {
11660               switch (name[9])
11661               {
11662                 case 'a':
11663                   if (name[10] == 'd' &&
11664                       name[11] == 'd' &&
11665                       name[12] == 'r')
11666                   {                               /* gethostbyaddr */
11667                     return -KEY_gethostbyaddr;
11668                   }
11669
11670                   goto unknown;
11671
11672                 case 'n':
11673                   if (name[10] == 'a' &&
11674                       name[11] == 'm' &&
11675                       name[12] == 'e')
11676                   {                               /* gethostbyname */
11677                     return -KEY_gethostbyname;
11678                   }
11679
11680                   goto unknown;
11681
11682                 default:
11683                   goto unknown;
11684               }
11685             }
11686
11687             goto unknown;
11688
11689           case 's':
11690             if (name[4] == 'e' &&
11691                 name[5] == 'r' &&
11692                 name[6] == 'v' &&
11693                 name[7] == 'b' &&
11694                 name[8] == 'y')
11695             {
11696               switch (name[9])
11697               {
11698                 case 'n':
11699                   if (name[10] == 'a' &&
11700                       name[11] == 'm' &&
11701                       name[12] == 'e')
11702                   {                               /* getservbyname */
11703                     return -KEY_getservbyname;
11704                   }
11705
11706                   goto unknown;
11707
11708                 case 'p':
11709                   if (name[10] == 'o' &&
11710                       name[11] == 'r' &&
11711                       name[12] == 't')
11712                   {                               /* getservbyport */
11713                     return -KEY_getservbyport;
11714                   }
11715
11716                   goto unknown;
11717
11718                 default:
11719                   goto unknown;
11720               }
11721             }
11722
11723             goto unknown;
11724
11725           default:
11726             goto unknown;
11727         }
11728       }
11729
11730       goto unknown;
11731
11732     case 14: /* 1 tokens of length 14 */
11733       if (name[0] == 'g' &&
11734           name[1] == 'e' &&
11735           name[2] == 't' &&
11736           name[3] == 'p' &&
11737           name[4] == 'r' &&
11738           name[5] == 'o' &&
11739           name[6] == 't' &&
11740           name[7] == 'o' &&
11741           name[8] == 'b' &&
11742           name[9] == 'y' &&
11743           name[10] == 'n' &&
11744           name[11] == 'a' &&
11745           name[12] == 'm' &&
11746           name[13] == 'e')
11747       {                                           /* getprotobyname */
11748         return -KEY_getprotobyname;
11749       }
11750
11751       goto unknown;
11752
11753     case 16: /* 1 tokens of length 16 */
11754       if (name[0] == 'g' &&
11755           name[1] == 'e' &&
11756           name[2] == 't' &&
11757           name[3] == 'p' &&
11758           name[4] == 'r' &&
11759           name[5] == 'o' &&
11760           name[6] == 't' &&
11761           name[7] == 'o' &&
11762           name[8] == 'b' &&
11763           name[9] == 'y' &&
11764           name[10] == 'n' &&
11765           name[11] == 'u' &&
11766           name[12] == 'm' &&
11767           name[13] == 'b' &&
11768           name[14] == 'e' &&
11769           name[15] == 'r')
11770       {                                           /* getprotobynumber */
11771         return -KEY_getprotobynumber;
11772       }
11773
11774       goto unknown;
11775
11776     default:
11777       goto unknown;
11778   }
11779
11780 unknown:
11781   return 0;
11782 }
11783
11784 STATIC void
11785 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11786 {
11787     dVAR;
11788
11789     PERL_ARGS_ASSERT_CHECKCOMMA;
11790
11791     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11792         if (ckWARN(WARN_SYNTAX)) {
11793             int level = 1;
11794             const char *w;
11795             for (w = s+2; *w && level; w++) {
11796                 if (*w == '(')
11797                     ++level;
11798                 else if (*w == ')')
11799                     --level;
11800             }
11801             while (isSPACE(*w))
11802                 ++w;
11803             /* the list of chars below is for end of statements or
11804              * block / parens, boolean operators (&&, ||, //) and branch
11805              * constructs (or, and, if, until, unless, while, err, for).
11806              * Not a very solid hack... */
11807             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11808                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11809                             "%s (...) interpreted as function",name);
11810         }
11811     }
11812     while (s < PL_bufend && isSPACE(*s))
11813         s++;
11814     if (*s == '(')
11815         s++;
11816     while (s < PL_bufend && isSPACE(*s))
11817         s++;
11818     if (isIDFIRST_lazy_if(s,UTF)) {
11819         const char * const w = s++;
11820         while (isALNUM_lazy_if(s,UTF))
11821             s++;
11822         while (s < PL_bufend && isSPACE(*s))
11823             s++;
11824         if (*s == ',') {
11825             GV* gv;
11826             if (keyword(w, s - w, 0))
11827                 return;
11828
11829             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11830             if (gv && GvCVu(gv))
11831                 return;
11832             Perl_croak(aTHX_ "No comma allowed after %s", what);
11833         }
11834     }
11835 }
11836
11837 /* Either returns sv, or mortalizes sv and returns a new SV*.
11838    Best used as sv=new_constant(..., sv, ...).
11839    If s, pv are NULL, calls subroutine with one argument,
11840    and type is used with error messages only. */
11841
11842 STATIC SV *
11843 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11844                SV *sv, SV *pv, const char *type, STRLEN typelen)
11845 {
11846     dVAR; dSP;
11847     HV * const table = GvHV(PL_hintgv);          /* ^H */
11848     SV *res;
11849     SV **cvp;
11850     SV *cv, *typesv;
11851     const char *why1 = "", *why2 = "", *why3 = "";
11852
11853     PERL_ARGS_ASSERT_NEW_CONSTANT;
11854
11855     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11856         SV *msg;
11857
11858         why2 = (const char *)
11859             (strEQ(key,"charnames")
11860              ? "(possibly a missing \"use charnames ...\")"
11861              : "");
11862         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11863                             (type ? type: "undef"), why2);
11864
11865         /* This is convoluted and evil ("goto considered harmful")
11866          * but I do not understand the intricacies of all the different
11867          * failure modes of %^H in here.  The goal here is to make
11868          * the most probable error message user-friendly. --jhi */
11869
11870         goto msgdone;
11871
11872     report:
11873         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11874                             (type ? type: "undef"), why1, why2, why3);
11875     msgdone:
11876         yyerror(SvPVX_const(msg));
11877         SvREFCNT_dec(msg);
11878         return sv;
11879     }
11880
11881     /* charnames doesn't work well if there have been errors found */
11882     if (PL_error_count > 0 && strEQ(key,"charnames"))
11883         return &PL_sv_undef;
11884
11885     cvp = hv_fetch(table, key, keylen, FALSE);
11886     if (!cvp || !SvOK(*cvp)) {
11887         why1 = "$^H{";
11888         why2 = key;
11889         why3 = "} is not defined";
11890         goto report;
11891     }
11892     sv_2mortal(sv);                     /* Parent created it permanently */
11893     cv = *cvp;
11894     if (!pv && s)
11895         pv = newSVpvn_flags(s, len, SVs_TEMP);
11896     if (type && pv)
11897         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11898     else
11899         typesv = &PL_sv_undef;
11900
11901     PUSHSTACKi(PERLSI_OVERLOAD);
11902     ENTER ;
11903     SAVETMPS;
11904
11905     PUSHMARK(SP) ;
11906     EXTEND(sp, 3);
11907     if (pv)
11908         PUSHs(pv);
11909     PUSHs(sv);
11910     if (pv)
11911         PUSHs(typesv);
11912     PUTBACK;
11913     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11914
11915     SPAGAIN ;
11916
11917     /* Check the eval first */
11918     if (!PL_in_eval && SvTRUE(ERRSV)) {
11919         sv_catpvs(ERRSV, "Propagated");
11920         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11921         (void)POPs;
11922         res = SvREFCNT_inc_simple(sv);
11923     }
11924     else {
11925         res = POPs;
11926         SvREFCNT_inc_simple_void(res);
11927     }
11928
11929     PUTBACK ;
11930     FREETMPS ;
11931     LEAVE ;
11932     POPSTACK;
11933
11934     if (!SvOK(res)) {
11935         why1 = "Call to &{$^H{";
11936         why2 = key;
11937         why3 = "}} did not return a defined value";
11938         sv = res;
11939         goto report;
11940     }
11941
11942     return res;
11943 }
11944
11945 /* Returns a NUL terminated string, with the length of the string written to
11946    *slp
11947    */
11948 STATIC char *
11949 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11950 {
11951     dVAR;
11952     register char *d = dest;
11953     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11954
11955     PERL_ARGS_ASSERT_SCAN_WORD;
11956
11957     for (;;) {
11958         if (d >= e)
11959             Perl_croak(aTHX_ ident_too_long);
11960         if (isALNUM(*s))        /* UTF handled below */
11961             *d++ = *s++;
11962         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11963             *d++ = ':';
11964             *d++ = ':';
11965             s++;
11966         }
11967         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11968             *d++ = *s++;
11969             *d++ = *s++;
11970         }
11971         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11972             char *t = s + UTF8SKIP(s);
11973             size_t len;
11974             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11975                 t += UTF8SKIP(t);
11976             len = t - s;
11977             if (d + len > e)
11978                 Perl_croak(aTHX_ ident_too_long);
11979             Copy(s, d, len, char);
11980             d += len;
11981             s = t;
11982         }
11983         else {
11984             *d = '\0';
11985             *slp = d - dest;
11986             return s;
11987         }
11988     }
11989 }
11990
11991 STATIC char *
11992 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11993 {
11994     dVAR;
11995     char *bracket = NULL;
11996     char funny = *s++;
11997     register char *d = dest;
11998     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
11999
12000     PERL_ARGS_ASSERT_SCAN_IDENT;
12001
12002     if (isSPACE(*s))
12003         s = PEEKSPACE(s);
12004     if (isDIGIT(*s)) {
12005         while (isDIGIT(*s)) {
12006             if (d >= e)
12007                 Perl_croak(aTHX_ ident_too_long);
12008             *d++ = *s++;
12009         }
12010     }
12011     else {
12012         for (;;) {
12013             if (d >= e)
12014                 Perl_croak(aTHX_ ident_too_long);
12015             if (isALNUM(*s))    /* UTF handled below */
12016                 *d++ = *s++;
12017             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
12018                 *d++ = ':';
12019                 *d++ = ':';
12020                 s++;
12021             }
12022             else if (*s == ':' && s[1] == ':') {
12023                 *d++ = *s++;
12024                 *d++ = *s++;
12025             }
12026             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
12027                 char *t = s + UTF8SKIP(s);
12028                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
12029                     t += UTF8SKIP(t);
12030                 if (d + (t - s) > e)
12031                     Perl_croak(aTHX_ ident_too_long);
12032                 Copy(s, d, t - s, char);
12033                 d += t - s;
12034                 s = t;
12035             }
12036             else
12037                 break;
12038         }
12039     }
12040     *d = '\0';
12041     d = dest;
12042     if (*d) {
12043         if (PL_lex_state != LEX_NORMAL)
12044             PL_lex_state = LEX_INTERPENDMAYBE;
12045         return s;
12046     }
12047     if (*s == '$' && s[1] &&
12048         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
12049     {
12050         return s;
12051     }
12052     if (*s == '{') {
12053         bracket = s;
12054         s++;
12055     }
12056     else if (ck_uni)
12057         check_uni();
12058     if (s < send)
12059         *d = *s++;
12060     d[1] = '\0';
12061     if (*d == '^' && *s && isCONTROLVAR(*s)) {
12062         *d = toCTRL(*s);
12063         s++;
12064     }
12065     if (bracket) {
12066         if (isSPACE(s[-1])) {
12067             while (s < send) {
12068                 const char ch = *s++;
12069                 if (!SPACE_OR_TAB(ch)) {
12070                     *d = ch;
12071                     break;
12072                 }
12073             }
12074         }
12075         if (isIDFIRST_lazy_if(d,UTF)) {
12076             d++;
12077             if (UTF) {
12078                 char *end = s;
12079                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
12080                     end += UTF8SKIP(end);
12081                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
12082                         end += UTF8SKIP(end);
12083                 }
12084                 Copy(s, d, end - s, char);
12085                 d += end - s;
12086                 s = end;
12087             }
12088             else {
12089                 while ((isALNUM(*s) || *s == ':') && d < e)
12090                     *d++ = *s++;
12091                 if (d >= e)
12092                     Perl_croak(aTHX_ ident_too_long);
12093             }
12094             *d = '\0';
12095             while (s < send && SPACE_OR_TAB(*s))
12096                 s++;
12097             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
12098                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
12099                     const char * const brack =
12100                         (const char *)
12101                         ((*s == '[') ? "[...]" : "{...}");
12102                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
12103                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
12104                         funny, dest, brack, funny, dest, brack);
12105                 }
12106                 bracket++;
12107                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
12108                 PL_lex_allbrackets++;
12109                 return s;
12110             }
12111         }
12112         /* Handle extended ${^Foo} variables
12113          * 1999-02-27 mjd-perl-patch@plover.com */
12114         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
12115                  && isALNUM(*s))
12116         {
12117             d++;
12118             while (isALNUM(*s) && d < e) {
12119                 *d++ = *s++;
12120             }
12121             if (d >= e)
12122                 Perl_croak(aTHX_ ident_too_long);
12123             *d = '\0';
12124         }
12125         if (*s == '}') {
12126             s++;
12127             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
12128                 PL_lex_state = LEX_INTERPEND;
12129                 PL_expect = XREF;
12130             }
12131             if (PL_lex_state == LEX_NORMAL) {
12132                 if (ckWARN(WARN_AMBIGUOUS) &&
12133                     (keyword(dest, d - dest, 0)
12134                      || get_cvn_flags(dest, d - dest, 0)))
12135                 {
12136                     if (funny == '#')
12137                         funny = '@';
12138                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
12139                         "Ambiguous use of %c{%s} resolved to %c%s",
12140                         funny, dest, funny, dest);
12141                 }
12142             }
12143         }
12144         else {
12145             s = bracket;                /* let the parser handle it */
12146             *dest = '\0';
12147         }
12148     }
12149     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
12150         PL_lex_state = LEX_INTERPEND;
12151     return s;
12152 }
12153
12154 static U32
12155 S_pmflag(U32 pmfl, const char ch) {
12156     switch (ch) {
12157         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
12158     case GLOBAL_PAT_MOD:      pmfl |= PMf_GLOBAL; break;
12159     case CONTINUE_PAT_MOD:    pmfl |= PMf_CONTINUE; break;
12160     case ONCE_PAT_MOD:        pmfl |= PMf_KEEP; break;
12161     case KEEPCOPY_PAT_MOD:    pmfl |= RXf_PMf_KEEPCOPY; break;
12162     case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
12163     }
12164     return pmfl;
12165 }
12166
12167 STATIC char *
12168 S_scan_pat(pTHX_ char *start, I32 type)
12169 {
12170     dVAR;
12171     PMOP *pm;
12172     char *s = scan_str(start,!!PL_madskills,FALSE);
12173     const char * const valid_flags =
12174         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
12175 #ifdef PERL_MAD
12176     char *modstart;
12177 #endif
12178
12179     PERL_ARGS_ASSERT_SCAN_PAT;
12180
12181     if (!s) {
12182         const char * const delimiter = skipspace(start);
12183         Perl_croak(aTHX_
12184                    (const char *)
12185                    (*delimiter == '?'
12186                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
12187                     : "Search pattern not terminated" ));
12188     }
12189
12190     pm = (PMOP*)newPMOP(type, 0);
12191     if (PL_multi_open == '?') {
12192         /* This is the only point in the code that sets PMf_ONCE:  */
12193         pm->op_pmflags |= PMf_ONCE;
12194
12195         /* Hence it's safe to do this bit of PMOP book-keeping here, which
12196            allows us to restrict the list needed by reset to just the ??
12197            matches.  */
12198         assert(type != OP_TRANS);
12199         if (PL_curstash) {
12200             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
12201             U32 elements;
12202             if (!mg) {
12203                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
12204                                  0);
12205             }
12206             elements = mg->mg_len / sizeof(PMOP**);
12207             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
12208             ((PMOP**)mg->mg_ptr) [elements++] = pm;
12209             mg->mg_len = elements * sizeof(PMOP**);
12210             PmopSTASH_set(pm,PL_curstash);
12211         }
12212     }
12213 #ifdef PERL_MAD
12214     modstart = s;
12215 #endif
12216     while (*s && strchr(valid_flags, *s))
12217         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
12218
12219     if (isALNUM(*s)) {
12220         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12221             "Having no space between pattern and following word is deprecated");
12222
12223     }
12224 #ifdef PERL_MAD
12225     if (PL_madskills && modstart != s) {
12226         SV* tmptoken = newSVpvn(modstart, s - modstart);
12227         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
12228     }
12229 #endif
12230     /* issue a warning if /c is specified,but /g is not */
12231     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
12232     {
12233         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
12234                        "Use of /c modifier is meaningless without /g" );
12235     }
12236
12237     PL_lex_op = (OP*)pm;
12238     pl_yylval.ival = OP_MATCH;
12239     return s;
12240 }
12241
12242 STATIC char *
12243 S_scan_subst(pTHX_ char *start)
12244 {
12245     dVAR;
12246     register char *s;
12247     register PMOP *pm;
12248     I32 first_start;
12249     I32 es = 0;
12250 #ifdef PERL_MAD
12251     char *modstart;
12252 #endif
12253
12254     PERL_ARGS_ASSERT_SCAN_SUBST;
12255
12256     pl_yylval.ival = OP_NULL;
12257
12258     s = scan_str(start,!!PL_madskills,FALSE);
12259
12260     if (!s)
12261         Perl_croak(aTHX_ "Substitution pattern not terminated");
12262
12263     if (s[-1] == PL_multi_open)
12264         s--;
12265 #ifdef PERL_MAD
12266     if (PL_madskills) {
12267         CURMAD('q', PL_thisopen);
12268         CURMAD('_', PL_thiswhite);
12269         CURMAD('E', PL_thisstuff);
12270         CURMAD('Q', PL_thisclose);
12271         PL_realtokenstart = s - SvPVX(PL_linestr);
12272     }
12273 #endif
12274
12275     first_start = PL_multi_start;
12276     s = scan_str(s,!!PL_madskills,FALSE);
12277     if (!s) {
12278         if (PL_lex_stuff) {
12279             SvREFCNT_dec(PL_lex_stuff);
12280             PL_lex_stuff = NULL;
12281         }
12282         Perl_croak(aTHX_ "Substitution replacement not terminated");
12283     }
12284     PL_multi_start = first_start;       /* so whole substitution is taken together */
12285
12286     pm = (PMOP*)newPMOP(OP_SUBST, 0);
12287
12288 #ifdef PERL_MAD
12289     if (PL_madskills) {
12290         CURMAD('z', PL_thisopen);
12291         CURMAD('R', PL_thisstuff);
12292         CURMAD('Z', PL_thisclose);
12293     }
12294     modstart = s;
12295 #endif
12296
12297     while (*s) {
12298         if (*s == EXEC_PAT_MOD) {
12299             s++;
12300             es++;
12301         }
12302         else if (strchr(S_PAT_MODS, *s))
12303             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
12304         else {
12305             if (isALNUM(*s)) {
12306                 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12307                     "Having no space between pattern and following word is deprecated");
12308
12309             }
12310             break;
12311         }
12312     }
12313
12314 #ifdef PERL_MAD
12315     if (PL_madskills) {
12316         if (modstart != s)
12317             curmad('m', newSVpvn(modstart, s - modstart));
12318         append_madprops(PL_thismad, (OP*)pm, 0);
12319         PL_thismad = 0;
12320     }
12321 #endif
12322     if ((pm->op_pmflags & PMf_CONTINUE)) {
12323         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
12324     }
12325
12326     if (es) {
12327         SV * const repl = newSVpvs("");
12328
12329         PL_sublex_info.super_bufptr = s;
12330         PL_sublex_info.super_bufend = PL_bufend;
12331         PL_multi_end = 0;
12332         pm->op_pmflags |= PMf_EVAL;
12333         while (es-- > 0) {
12334             if (es)
12335                 sv_catpvs(repl, "eval ");
12336             else
12337                 sv_catpvs(repl, "do ");
12338         }
12339         sv_catpvs(repl, "{");
12340         sv_catsv(repl, PL_lex_repl);
12341         if (strchr(SvPVX(PL_lex_repl), '#'))
12342             sv_catpvs(repl, "\n");
12343         sv_catpvs(repl, "}");
12344         SvEVALED_on(repl);
12345         SvREFCNT_dec(PL_lex_repl);
12346         PL_lex_repl = repl;
12347     }
12348
12349     PL_lex_op = (OP*)pm;
12350     pl_yylval.ival = OP_SUBST;
12351     return s;
12352 }
12353
12354 STATIC char *
12355 S_scan_trans(pTHX_ char *start)
12356 {
12357     dVAR;
12358     register char* s;
12359     OP *o;
12360     short *tbl;
12361     U8 squash;
12362     U8 del;
12363     U8 complement;
12364     bool nondestruct = 0;
12365 #ifdef PERL_MAD
12366     char *modstart;
12367 #endif
12368
12369     PERL_ARGS_ASSERT_SCAN_TRANS;
12370
12371     pl_yylval.ival = OP_NULL;
12372
12373     s = scan_str(start,!!PL_madskills,FALSE);
12374     if (!s)
12375         Perl_croak(aTHX_ "Transliteration pattern not terminated");
12376
12377     if (s[-1] == PL_multi_open)
12378         s--;
12379 #ifdef PERL_MAD
12380     if (PL_madskills) {
12381         CURMAD('q', PL_thisopen);
12382         CURMAD('_', PL_thiswhite);
12383         CURMAD('E', PL_thisstuff);
12384         CURMAD('Q', PL_thisclose);
12385         PL_realtokenstart = s - SvPVX(PL_linestr);
12386     }
12387 #endif
12388
12389     s = scan_str(s,!!PL_madskills,FALSE);
12390     if (!s) {
12391         if (PL_lex_stuff) {
12392             SvREFCNT_dec(PL_lex_stuff);
12393             PL_lex_stuff = NULL;
12394         }
12395         Perl_croak(aTHX_ "Transliteration replacement not terminated");
12396     }
12397     if (PL_madskills) {
12398         CURMAD('z', PL_thisopen);
12399         CURMAD('R', PL_thisstuff);
12400         CURMAD('Z', PL_thisclose);
12401     }
12402
12403     complement = del = squash = 0;
12404 #ifdef PERL_MAD
12405     modstart = s;
12406 #endif
12407     while (1) {
12408         switch (*s) {
12409         case 'c':
12410             complement = OPpTRANS_COMPLEMENT;
12411             break;
12412         case 'd':
12413             del = OPpTRANS_DELETE;
12414             break;
12415         case 's':
12416             squash = OPpTRANS_SQUASH;
12417             break;
12418         case 'r':
12419             nondestruct = 1;
12420             break;
12421         default:
12422             goto no_more;
12423         }
12424         s++;
12425     }
12426   no_more:
12427
12428     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
12429     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
12430     o->op_private &= ~OPpTRANS_ALL;
12431     o->op_private |= del|squash|complement|
12432       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12433       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
12434
12435     PL_lex_op = o;
12436     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
12437
12438 #ifdef PERL_MAD
12439     if (PL_madskills) {
12440         if (modstart != s)
12441             curmad('m', newSVpvn(modstart, s - modstart));
12442         append_madprops(PL_thismad, o, 0);
12443         PL_thismad = 0;
12444     }
12445 #endif
12446
12447     return s;
12448 }
12449
12450 STATIC char *
12451 S_scan_heredoc(pTHX_ register char *s)
12452 {
12453     dVAR;
12454     SV *herewas;
12455     I32 op_type = OP_SCALAR;
12456     I32 len;
12457     SV *tmpstr;
12458     char term;
12459     const char *found_newline;
12460     register char *d;
12461     register char *e;
12462     char *peek;
12463     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
12464 #ifdef PERL_MAD
12465     I32 stuffstart = s - SvPVX(PL_linestr);
12466     char *tstart;
12467
12468     PL_realtokenstart = -1;
12469 #endif
12470
12471     PERL_ARGS_ASSERT_SCAN_HEREDOC;
12472
12473     s += 2;
12474     d = PL_tokenbuf;
12475     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
12476     if (!outer)
12477         *d++ = '\n';
12478     peek = s;
12479     while (SPACE_OR_TAB(*peek))
12480         peek++;
12481     if (*peek == '`' || *peek == '\'' || *peek =='"') {
12482         s = peek;
12483         term = *s++;
12484         s = delimcpy(d, e, s, PL_bufend, term, &len);
12485         d += len;
12486         if (s < PL_bufend)
12487             s++;
12488     }
12489     else {
12490         if (*s == '\\')
12491             s++, term = '\'';
12492         else
12493             term = '"';
12494         if (!isALNUM_lazy_if(s,UTF))
12495             deprecate("bare << to mean <<\"\"");
12496         for (; isALNUM_lazy_if(s,UTF); s++) {
12497             if (d < e)
12498                 *d++ = *s;
12499         }
12500     }
12501     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
12502         Perl_croak(aTHX_ "Delimiter for here document is too long");
12503     *d++ = '\n';
12504     *d = '\0';
12505     len = d - PL_tokenbuf;
12506
12507 #ifdef PERL_MAD
12508     if (PL_madskills) {
12509         tstart = PL_tokenbuf + !outer;
12510         PL_thisclose = newSVpvn(tstart, len - !outer);
12511         tstart = SvPVX(PL_linestr) + stuffstart;
12512         PL_thisopen = newSVpvn(tstart, s - tstart);
12513         stuffstart = s - SvPVX(PL_linestr);
12514     }
12515 #endif
12516 #ifndef PERL_STRICT_CR
12517     d = strchr(s, '\r');
12518     if (d) {
12519         char * const olds = s;
12520         s = d;
12521         while (s < PL_bufend) {
12522             if (*s == '\r') {
12523                 *d++ = '\n';
12524                 if (*++s == '\n')
12525                     s++;
12526             }
12527             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
12528                 *d++ = *s++;
12529                 s++;
12530             }
12531             else
12532                 *d++ = *s++;
12533         }
12534         *d = '\0';
12535         PL_bufend = d;
12536         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12537         s = olds;
12538     }
12539 #endif
12540 #ifdef PERL_MAD
12541     found_newline = 0;
12542 #endif
12543     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
12544         herewas = newSVpvn(s,PL_bufend-s);
12545     }
12546     else {
12547 #ifdef PERL_MAD
12548         herewas = newSVpvn(s-1,found_newline-s+1);
12549 #else
12550         s--;
12551         herewas = newSVpvn(s,found_newline-s);
12552 #endif
12553     }
12554 #ifdef PERL_MAD
12555     if (PL_madskills) {
12556         tstart = SvPVX(PL_linestr) + stuffstart;
12557         if (PL_thisstuff)
12558             sv_catpvn(PL_thisstuff, tstart, s - tstart);
12559         else
12560             PL_thisstuff = newSVpvn(tstart, s - tstart);
12561     }
12562 #endif
12563     s += SvCUR(herewas);
12564
12565 #ifdef PERL_MAD
12566     stuffstart = s - SvPVX(PL_linestr);
12567
12568     if (found_newline)
12569         s--;
12570 #endif
12571
12572     tmpstr = newSV_type(SVt_PVIV);
12573     SvGROW(tmpstr, 80);
12574     if (term == '\'') {
12575         op_type = OP_CONST;
12576         SvIV_set(tmpstr, -1);
12577     }
12578     else if (term == '`') {
12579         op_type = OP_BACKTICK;
12580         SvIV_set(tmpstr, '\\');
12581     }
12582
12583     CLINE;
12584     PL_multi_start = CopLINE(PL_curcop);
12585     PL_multi_open = PL_multi_close = '<';
12586     term = *PL_tokenbuf;
12587     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
12588         char * const bufptr = PL_sublex_info.super_bufptr;
12589         char * const bufend = PL_sublex_info.super_bufend;
12590         char * const olds = s - SvCUR(herewas);
12591         s = strchr(bufptr, '\n');
12592         if (!s)
12593             s = bufend;
12594         d = s;
12595         while (s < bufend &&
12596           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12597             if (*s++ == '\n')
12598                 CopLINE_inc(PL_curcop);
12599         }
12600         if (s >= bufend) {
12601             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12602             missingterm(PL_tokenbuf);
12603         }
12604         sv_setpvn(herewas,bufptr,d-bufptr+1);
12605         sv_setpvn(tmpstr,d+1,s-d);
12606         s += len - 1;
12607         sv_catpvn(herewas,s,bufend-s);
12608         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
12609
12610         s = olds;
12611         goto retval;
12612     }
12613     else if (!outer) {
12614         d = s;
12615         while (s < PL_bufend &&
12616           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12617             if (*s++ == '\n')
12618                 CopLINE_inc(PL_curcop);
12619         }
12620         if (s >= PL_bufend) {
12621             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12622             missingterm(PL_tokenbuf);
12623         }
12624         sv_setpvn(tmpstr,d+1,s-d);
12625 #ifdef PERL_MAD
12626         if (PL_madskills) {
12627             if (PL_thisstuff)
12628                 sv_catpvn(PL_thisstuff, d + 1, s - d);
12629             else
12630                 PL_thisstuff = newSVpvn(d + 1, s - d);
12631             stuffstart = s - SvPVX(PL_linestr);
12632         }
12633 #endif
12634         s += len - 1;
12635         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12636
12637         sv_catpvn(herewas,s,PL_bufend-s);
12638         sv_setsv(PL_linestr,herewas);
12639         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12640         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12641         PL_last_lop = PL_last_uni = NULL;
12642     }
12643     else
12644         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12645     while (s >= PL_bufend) {    /* multiple line string? */
12646 #ifdef PERL_MAD
12647         if (PL_madskills) {
12648             tstart = SvPVX(PL_linestr) + stuffstart;
12649             if (PL_thisstuff)
12650                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12651             else
12652                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12653         }
12654 #endif
12655         PL_bufptr = s;
12656         CopLINE_inc(PL_curcop);
12657         if (!outer || !lex_next_chunk(0)) {
12658             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12659             missingterm(PL_tokenbuf);
12660         }
12661         CopLINE_dec(PL_curcop);
12662         s = PL_bufptr;
12663 #ifdef PERL_MAD
12664         stuffstart = s - SvPVX(PL_linestr);
12665 #endif
12666         CopLINE_inc(PL_curcop);
12667         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12668         PL_last_lop = PL_last_uni = NULL;
12669 #ifndef PERL_STRICT_CR
12670         if (PL_bufend - PL_linestart >= 2) {
12671             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12672                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12673             {
12674                 PL_bufend[-2] = '\n';
12675                 PL_bufend--;
12676                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12677             }
12678             else if (PL_bufend[-1] == '\r')
12679                 PL_bufend[-1] = '\n';
12680         }
12681         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12682             PL_bufend[-1] = '\n';
12683 #endif
12684         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12685             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12686             *(SvPVX(PL_linestr) + off ) = ' ';
12687             sv_catsv(PL_linestr,herewas);
12688             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12689             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12690         }
12691         else {
12692             s = PL_bufend;
12693             sv_catsv(tmpstr,PL_linestr);
12694         }
12695     }
12696     s++;
12697 retval:
12698     PL_multi_end = CopLINE(PL_curcop);
12699     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12700         SvPV_shrink_to_cur(tmpstr);
12701     }
12702     SvREFCNT_dec(herewas);
12703     if (!IN_BYTES) {
12704         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12705             SvUTF8_on(tmpstr);
12706         else if (PL_encoding)
12707             sv_recode_to_utf8(tmpstr, PL_encoding);
12708     }
12709     PL_lex_stuff = tmpstr;
12710     pl_yylval.ival = op_type;
12711     return s;
12712 }
12713
12714 /* scan_inputsymbol
12715    takes: current position in input buffer
12716    returns: new position in input buffer
12717    side-effects: pl_yylval and lex_op are set.
12718
12719    This code handles:
12720
12721    <>           read from ARGV
12722    <FH>         read from filehandle
12723    <pkg::FH>    read from package qualified filehandle
12724    <pkg'FH>     read from package qualified filehandle
12725    <$fh>        read from filehandle in $fh
12726    <*.h>        filename glob
12727
12728 */
12729
12730 STATIC char *
12731 S_scan_inputsymbol(pTHX_ char *start)
12732 {
12733     dVAR;
12734     register char *s = start;           /* current position in buffer */
12735     char *end;
12736     I32 len;
12737     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12738     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12739
12740     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12741
12742     end = strchr(s, '\n');
12743     if (!end)
12744         end = PL_bufend;
12745     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12746
12747     /* die if we didn't have space for the contents of the <>,
12748        or if it didn't end, or if we see a newline
12749     */
12750
12751     if (len >= (I32)sizeof PL_tokenbuf)
12752         Perl_croak(aTHX_ "Excessively long <> operator");
12753     if (s >= end)
12754         Perl_croak(aTHX_ "Unterminated <> operator");
12755
12756     s++;
12757
12758     /* check for <$fh>
12759        Remember, only scalar variables are interpreted as filehandles by
12760        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12761        treated as a glob() call.
12762        This code makes use of the fact that except for the $ at the front,
12763        a scalar variable and a filehandle look the same.
12764     */
12765     if (*d == '$' && d[1]) d++;
12766
12767     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12768     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12769         d++;
12770
12771     /* If we've tried to read what we allow filehandles to look like, and
12772        there's still text left, then it must be a glob() and not a getline.
12773        Use scan_str to pull out the stuff between the <> and treat it
12774        as nothing more than a string.
12775     */
12776
12777     if (d - PL_tokenbuf != len) {
12778         pl_yylval.ival = OP_GLOB;
12779         s = scan_str(start,!!PL_madskills,FALSE);
12780         if (!s)
12781            Perl_croak(aTHX_ "Glob not terminated");
12782         return s;
12783     }
12784     else {
12785         bool readline_overriden = FALSE;
12786         GV *gv_readline;
12787         GV **gvp;
12788         /* we're in a filehandle read situation */
12789         d = PL_tokenbuf;
12790
12791         /* turn <> into <ARGV> */
12792         if (!len)
12793             Copy("ARGV",d,5,char);
12794
12795         /* Check whether readline() is overriden */
12796         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12797         if ((gv_readline
12798                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12799                 ||
12800                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12801                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12802                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12803             readline_overriden = TRUE;
12804
12805         /* if <$fh>, create the ops to turn the variable into a
12806            filehandle
12807         */
12808         if (*d == '$') {
12809             /* try to find it in the pad for this block, otherwise find
12810                add symbol table ops
12811             */
12812             const PADOFFSET tmp = pad_findmy(d, len, 0);
12813             if (tmp != NOT_IN_PAD) {
12814                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12815                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12816                     HEK * const stashname = HvNAME_HEK(stash);
12817                     SV * const sym = sv_2mortal(newSVhek(stashname));
12818                     sv_catpvs(sym, "::");
12819                     sv_catpv(sym, d+1);
12820                     d = SvPVX(sym);
12821                     goto intro_sym;
12822                 }
12823                 else {
12824                     OP * const o = newOP(OP_PADSV, 0);
12825                     o->op_targ = tmp;
12826                     PL_lex_op = readline_overriden
12827                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12828                                 op_append_elem(OP_LIST, o,
12829                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12830                         : (OP*)newUNOP(OP_READLINE, 0, o);
12831                 }
12832             }
12833             else {
12834                 GV *gv;
12835                 ++d;
12836 intro_sym:
12837                 gv = gv_fetchpv(d,
12838                                 (PL_in_eval
12839                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12840                                  : GV_ADDMULTI),
12841                                 SVt_PV);
12842                 PL_lex_op = readline_overriden
12843                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12844                             op_append_elem(OP_LIST,
12845                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12846                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12847                     : (OP*)newUNOP(OP_READLINE, 0,
12848                             newUNOP(OP_RV2SV, 0,
12849                                 newGVOP(OP_GV, 0, gv)));
12850             }
12851             if (!readline_overriden)
12852                 PL_lex_op->op_flags |= OPf_SPECIAL;
12853             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12854             pl_yylval.ival = OP_NULL;
12855         }
12856
12857         /* If it's none of the above, it must be a literal filehandle
12858            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12859         else {
12860             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12861             PL_lex_op = readline_overriden
12862                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12863                         op_append_elem(OP_LIST,
12864                             newGVOP(OP_GV, 0, gv),
12865                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12866                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12867             pl_yylval.ival = OP_NULL;
12868         }
12869     }
12870
12871     return s;
12872 }
12873
12874
12875 /* scan_str
12876    takes: start position in buffer
12877           keep_quoted preserve \ on the embedded delimiter(s)
12878           keep_delims preserve the delimiters around the string
12879    returns: position to continue reading from buffer
12880    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12881         updates the read buffer.
12882
12883    This subroutine pulls a string out of the input.  It is called for:
12884         q               single quotes           q(literal text)
12885         '               single quotes           'literal text'
12886         qq              double quotes           qq(interpolate $here please)
12887         "               double quotes           "interpolate $here please"
12888         qx              backticks               qx(/bin/ls -l)
12889         `               backticks               `/bin/ls -l`
12890         qw              quote words             @EXPORT_OK = qw( func() $spam )
12891         m//             regexp match            m/this/
12892         s///            regexp substitute       s/this/that/
12893         tr///           string transliterate    tr/this/that/
12894         y///            string transliterate    y/this/that/
12895         ($*@)           sub prototypes          sub foo ($)
12896         (stuff)         sub attr parameters     sub foo : attr(stuff)
12897         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12898
12899    In most of these cases (all but <>, patterns and transliterate)
12900    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12901    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12902    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12903    calls scan_str().
12904
12905    It skips whitespace before the string starts, and treats the first
12906    character as the delimiter.  If the delimiter is one of ([{< then
12907    the corresponding "close" character )]}> is used as the closing
12908    delimiter.  It allows quoting of delimiters, and if the string has
12909    balanced delimiters ([{<>}]) it allows nesting.
12910
12911    On success, the SV with the resulting string is put into lex_stuff or,
12912    if that is already non-NULL, into lex_repl. The second case occurs only
12913    when parsing the RHS of the special constructs s/// and tr/// (y///).
12914    For convenience, the terminating delimiter character is stuffed into
12915    SvIVX of the SV.
12916 */
12917
12918 STATIC char *
12919 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12920 {
12921     dVAR;
12922     SV *sv;                             /* scalar value: string */
12923     const char *tmps;                   /* temp string, used for delimiter matching */
12924     register char *s = start;           /* current position in the buffer */
12925     register char term;                 /* terminating character */
12926     register char *to;                  /* current position in the sv's data */
12927     I32 brackets = 1;                   /* bracket nesting level */
12928     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12929     I32 termcode;                       /* terminating char. code */
12930     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12931     STRLEN termlen;                     /* length of terminating string */
12932     int last_off = 0;                   /* last position for nesting bracket */
12933 #ifdef PERL_MAD
12934     int stuffstart;
12935     char *tstart;
12936 #endif
12937
12938     PERL_ARGS_ASSERT_SCAN_STR;
12939
12940     /* skip space before the delimiter */
12941     if (isSPACE(*s)) {
12942         s = PEEKSPACE(s);
12943     }
12944
12945 #ifdef PERL_MAD
12946     if (PL_realtokenstart >= 0) {
12947         stuffstart = PL_realtokenstart;
12948         PL_realtokenstart = -1;
12949     }
12950     else
12951         stuffstart = start - SvPVX(PL_linestr);
12952 #endif
12953     /* mark where we are, in case we need to report errors */
12954     CLINE;
12955
12956     /* after skipping whitespace, the next character is the terminator */
12957     term = *s;
12958     if (!UTF) {
12959         termcode = termstr[0] = term;
12960         termlen = 1;
12961     }
12962     else {
12963         termcode = utf8_to_uvchr((U8*)s, &termlen);
12964         Copy(s, termstr, termlen, U8);
12965         if (!UTF8_IS_INVARIANT(term))
12966             has_utf8 = TRUE;
12967     }
12968
12969     /* mark where we are */
12970     PL_multi_start = CopLINE(PL_curcop);
12971     PL_multi_open = term;
12972
12973     /* find corresponding closing delimiter */
12974     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12975         termcode = termstr[0] = term = tmps[5];
12976
12977     PL_multi_close = term;
12978
12979     /* create a new SV to hold the contents.  79 is the SV's initial length.
12980        What a random number. */
12981     sv = newSV_type(SVt_PVIV);
12982     SvGROW(sv, 80);
12983     SvIV_set(sv, termcode);
12984     (void)SvPOK_only(sv);               /* validate pointer */
12985
12986     /* move past delimiter and try to read a complete string */
12987     if (keep_delims)
12988         sv_catpvn(sv, s, termlen);
12989     s += termlen;
12990 #ifdef PERL_MAD
12991     tstart = SvPVX(PL_linestr) + stuffstart;
12992     if (!PL_thisopen && !keep_delims) {
12993         PL_thisopen = newSVpvn(tstart, s - tstart);
12994         stuffstart = s - SvPVX(PL_linestr);
12995     }
12996 #endif
12997     for (;;) {
12998         if (PL_encoding && !UTF) {
12999             bool cont = TRUE;
13000
13001             while (cont) {
13002                 int offset = s - SvPVX_const(PL_linestr);
13003                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
13004                                            &offset, (char*)termstr, termlen);
13005                 const char * const ns = SvPVX_const(PL_linestr) + offset;
13006                 char * const svlast = SvEND(sv) - 1;
13007
13008                 for (; s < ns; s++) {
13009                     if (*s == '\n' && !PL_rsfp)
13010                         CopLINE_inc(PL_curcop);
13011                 }
13012                 if (!found)
13013                     goto read_more_line;
13014                 else {
13015                     /* handle quoted delimiters */
13016                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
13017                         const char *t;
13018                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
13019                             t--;
13020                         if ((svlast-1 - t) % 2) {
13021                             if (!keep_quoted) {
13022                                 *(svlast-1) = term;
13023                                 *svlast = '\0';
13024                                 SvCUR_set(sv, SvCUR(sv) - 1);
13025                             }
13026                             continue;
13027                         }
13028                     }
13029                     if (PL_multi_open == PL_multi_close) {
13030                         cont = FALSE;
13031                     }
13032                     else {
13033                         const char *t;
13034                         char *w;
13035                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
13036                             /* At here, all closes are "was quoted" one,
13037                                so we don't check PL_multi_close. */
13038                             if (*t == '\\') {
13039                                 if (!keep_quoted && *(t+1) == PL_multi_open)
13040                                     t++;
13041                                 else
13042                                     *w++ = *t++;
13043                             }
13044                             else if (*t == PL_multi_open)
13045                                 brackets++;
13046
13047                             *w = *t;
13048                         }
13049                         if (w < t) {
13050                             *w++ = term;
13051                             *w = '\0';
13052                             SvCUR_set(sv, w - SvPVX_const(sv));
13053                         }
13054                         last_off = w - SvPVX(sv);
13055                         if (--brackets <= 0)
13056                             cont = FALSE;
13057                     }
13058                 }
13059             }
13060             if (!keep_delims) {
13061                 SvCUR_set(sv, SvCUR(sv) - 1);
13062                 *SvEND(sv) = '\0';
13063             }
13064             break;
13065         }
13066
13067         /* extend sv if need be */
13068         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
13069         /* set 'to' to the next character in the sv's string */
13070         to = SvPVX(sv)+SvCUR(sv);
13071
13072         /* if open delimiter is the close delimiter read unbridle */
13073         if (PL_multi_open == PL_multi_close) {
13074             for (; s < PL_bufend; s++,to++) {
13075                 /* embedded newlines increment the current line number */
13076                 if (*s == '\n' && !PL_rsfp)
13077                     CopLINE_inc(PL_curcop);
13078                 /* handle quoted delimiters */
13079                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
13080                     if (!keep_quoted && s[1] == term)
13081                         s++;
13082                 /* any other quotes are simply copied straight through */
13083                     else
13084                         *to++ = *s++;
13085                 }
13086                 /* terminate when run out of buffer (the for() condition), or
13087                    have found the terminator */
13088                 else if (*s == term) {
13089                     if (termlen == 1)
13090                         break;
13091                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
13092                         break;
13093                 }
13094                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
13095                     has_utf8 = TRUE;
13096                 *to = *s;
13097             }
13098         }
13099
13100         /* if the terminator isn't the same as the start character (e.g.,
13101            matched brackets), we have to allow more in the quoting, and
13102            be prepared for nested brackets.
13103         */
13104         else {
13105             /* read until we run out of string, or we find the terminator */
13106             for (; s < PL_bufend; s++,to++) {
13107                 /* embedded newlines increment the line count */
13108                 if (*s == '\n' && !PL_rsfp)
13109                     CopLINE_inc(PL_curcop);
13110                 /* backslashes can escape the open or closing characters */
13111                 if (*s == '\\' && s+1 < PL_bufend) {
13112                     if (!keep_quoted &&
13113                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
13114                         s++;
13115                     else
13116                         *to++ = *s++;
13117                 }
13118                 /* allow nested opens and closes */
13119                 else if (*s == PL_multi_close && --brackets <= 0)
13120                     break;
13121                 else if (*s == PL_multi_open)
13122                     brackets++;
13123                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
13124                     has_utf8 = TRUE;
13125                 *to = *s;
13126             }
13127         }
13128         /* terminate the copied string and update the sv's end-of-string */
13129         *to = '\0';
13130         SvCUR_set(sv, to - SvPVX_const(sv));
13131
13132         /*
13133          * this next chunk reads more into the buffer if we're not done yet
13134          */
13135
13136         if (s < PL_bufend)
13137             break;              /* handle case where we are done yet :-) */
13138
13139 #ifndef PERL_STRICT_CR
13140         if (to - SvPVX_const(sv) >= 2) {
13141             if ((to[-2] == '\r' && to[-1] == '\n') ||
13142                 (to[-2] == '\n' && to[-1] == '\r'))
13143             {
13144                 to[-2] = '\n';
13145                 to--;
13146                 SvCUR_set(sv, to - SvPVX_const(sv));
13147             }
13148             else if (to[-1] == '\r')
13149                 to[-1] = '\n';
13150         }
13151         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
13152             to[-1] = '\n';
13153 #endif
13154
13155      read_more_line:
13156         /* if we're out of file, or a read fails, bail and reset the current
13157            line marker so we can report where the unterminated string began
13158         */
13159 #ifdef PERL_MAD
13160         if (PL_madskills) {
13161             char * const tstart = SvPVX(PL_linestr) + stuffstart;
13162             if (PL_thisstuff)
13163                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
13164             else
13165                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
13166         }
13167 #endif
13168         CopLINE_inc(PL_curcop);
13169         PL_bufptr = PL_bufend;
13170         if (!lex_next_chunk(0)) {
13171             sv_free(sv);
13172             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
13173             return NULL;
13174         }
13175         s = PL_bufptr;
13176 #ifdef PERL_MAD
13177         stuffstart = 0;
13178 #endif
13179     }
13180
13181     /* at this point, we have successfully read the delimited string */
13182
13183     if (!PL_encoding || UTF) {
13184 #ifdef PERL_MAD
13185         if (PL_madskills) {
13186             char * const tstart = SvPVX(PL_linestr) + stuffstart;
13187             const int len = s - tstart;
13188             if (PL_thisstuff)
13189                 sv_catpvn(PL_thisstuff, tstart, len);
13190             else
13191                 PL_thisstuff = newSVpvn(tstart, len);
13192             if (!PL_thisclose && !keep_delims)
13193                 PL_thisclose = newSVpvn(s,termlen);
13194         }
13195 #endif
13196
13197         if (keep_delims)
13198             sv_catpvn(sv, s, termlen);
13199         s += termlen;
13200     }
13201 #ifdef PERL_MAD
13202     else {
13203         if (PL_madskills) {
13204             char * const tstart = SvPVX(PL_linestr) + stuffstart;
13205             const int len = s - tstart - termlen;
13206             if (PL_thisstuff)
13207                 sv_catpvn(PL_thisstuff, tstart, len);
13208             else
13209                 PL_thisstuff = newSVpvn(tstart, len);
13210             if (!PL_thisclose && !keep_delims)
13211                 PL_thisclose = newSVpvn(s - termlen,termlen);
13212         }
13213     }
13214 #endif
13215     if (has_utf8 || PL_encoding)
13216         SvUTF8_on(sv);
13217
13218     PL_multi_end = CopLINE(PL_curcop);
13219
13220     /* if we allocated too much space, give some back */
13221     if (SvCUR(sv) + 5 < SvLEN(sv)) {
13222         SvLEN_set(sv, SvCUR(sv) + 1);
13223         SvPV_renew(sv, SvLEN(sv));
13224     }
13225
13226     /* decide whether this is the first or second quoted string we've read
13227        for this op
13228     */
13229
13230     if (PL_lex_stuff)
13231         PL_lex_repl = sv;
13232     else
13233         PL_lex_stuff = sv;
13234     return s;
13235 }
13236
13237 /*
13238   scan_num
13239   takes: pointer to position in buffer
13240   returns: pointer to new position in buffer
13241   side-effects: builds ops for the constant in pl_yylval.op
13242
13243   Read a number in any of the formats that Perl accepts:
13244
13245   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
13246   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
13247   0b[01](_?[01])*
13248   0[0-7](_?[0-7])*
13249   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
13250
13251   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
13252   thing it reads.
13253
13254   If it reads a number without a decimal point or an exponent, it will
13255   try converting the number to an integer and see if it can do so
13256   without loss of precision.
13257 */
13258
13259 char *
13260 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
13261 {
13262     dVAR;
13263     register const char *s = start;     /* current position in buffer */
13264     register char *d;                   /* destination in temp buffer */
13265     register char *e;                   /* end of temp buffer */
13266     NV nv;                              /* number read, as a double */
13267     SV *sv = NULL;                      /* place to put the converted number */
13268     bool floatit;                       /* boolean: int or float? */
13269     const char *lastub = NULL;          /* position of last underbar */
13270     static char const number_too_long[] = "Number too long";
13271
13272     PERL_ARGS_ASSERT_SCAN_NUM;
13273
13274     /* We use the first character to decide what type of number this is */
13275
13276     switch (*s) {
13277     default:
13278       Perl_croak(aTHX_ "panic: scan_num");
13279
13280     /* if it starts with a 0, it could be an octal number, a decimal in
13281        0.13 disguise, or a hexadecimal number, or a binary number. */
13282     case '0':
13283         {
13284           /* variables:
13285              u          holds the "number so far"
13286              shift      the power of 2 of the base
13287                         (hex == 4, octal == 3, binary == 1)
13288              overflowed was the number more than we can hold?
13289
13290              Shift is used when we add a digit.  It also serves as an "are
13291              we in octal/hex/binary?" indicator to disallow hex characters
13292              when in octal mode.
13293            */
13294             NV n = 0.0;
13295             UV u = 0;
13296             I32 shift;
13297             bool overflowed = FALSE;
13298             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
13299             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
13300             static const char* const bases[5] =
13301               { "", "binary", "", "octal", "hexadecimal" };
13302             static const char* const Bases[5] =
13303               { "", "Binary", "", "Octal", "Hexadecimal" };
13304             static const char* const maxima[5] =
13305               { "",
13306                 "0b11111111111111111111111111111111",
13307                 "",
13308                 "037777777777",
13309                 "0xffffffff" };
13310             const char *base, *Base, *max;
13311
13312             /* check for hex */
13313             if (s[1] == 'x' || s[1] == 'X') {
13314                 shift = 4;
13315                 s += 2;
13316                 just_zero = FALSE;
13317             } else if (s[1] == 'b' || s[1] == 'B') {
13318                 shift = 1;
13319                 s += 2;
13320                 just_zero = FALSE;
13321             }
13322             /* check for a decimal in disguise */
13323             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
13324                 goto decimal;
13325             /* so it must be octal */
13326             else {
13327                 shift = 3;
13328                 s++;
13329             }
13330
13331             if (*s == '_') {
13332                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13333                                "Misplaced _ in number");
13334                lastub = s++;
13335             }
13336
13337             base = bases[shift];
13338             Base = Bases[shift];
13339             max  = maxima[shift];
13340
13341             /* read the rest of the number */
13342             for (;;) {
13343                 /* x is used in the overflow test,
13344                    b is the digit we're adding on. */
13345                 UV x, b;
13346
13347                 switch (*s) {
13348
13349                 /* if we don't mention it, we're done */
13350                 default:
13351                     goto out;
13352
13353                 /* _ are ignored -- but warned about if consecutive */
13354                 case '_':
13355                     if (lastub && s == lastub + 1)
13356                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13357                                        "Misplaced _ in number");
13358                     lastub = s++;
13359                     break;
13360
13361                 /* 8 and 9 are not octal */
13362                 case '8': case '9':
13363                     if (shift == 3)
13364                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
13365                     /* FALL THROUGH */
13366
13367                 /* octal digits */
13368                 case '2': case '3': case '4':
13369                 case '5': case '6': case '7':
13370                     if (shift == 1)
13371                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
13372                     /* FALL THROUGH */
13373
13374                 case '0': case '1':
13375                     b = *s++ & 15;              /* ASCII digit -> value of digit */
13376                     goto digit;
13377
13378                 /* hex digits */
13379                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13380                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
13381                     /* make sure they said 0x */
13382                     if (shift != 4)
13383                         goto out;
13384                     b = (*s++ & 7) + 9;
13385
13386                     /* Prepare to put the digit we have onto the end
13387                        of the number so far.  We check for overflows.
13388                     */
13389
13390                   digit:
13391                     just_zero = FALSE;
13392                     if (!overflowed) {
13393                         x = u << shift; /* make room for the digit */
13394
13395                         if ((x >> shift) != u
13396                             && !(PL_hints & HINT_NEW_BINARY)) {
13397                             overflowed = TRUE;
13398                             n = (NV) u;
13399                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13400                                              "Integer overflow in %s number",
13401                                              base);
13402                         } else
13403                             u = x | b;          /* add the digit to the end */
13404                     }
13405                     if (overflowed) {
13406                         n *= nvshift[shift];
13407                         /* If an NV has not enough bits in its
13408                          * mantissa to represent an UV this summing of
13409                          * small low-order numbers is a waste of time
13410                          * (because the NV cannot preserve the
13411                          * low-order bits anyway): we could just
13412                          * remember when did we overflow and in the
13413                          * end just multiply n by the right
13414                          * amount. */
13415                         n += (NV) b;
13416                     }
13417                     break;
13418                 }
13419             }
13420
13421           /* if we get here, we had success: make a scalar value from
13422              the number.
13423           */
13424           out:
13425
13426             /* final misplaced underbar check */
13427             if (s[-1] == '_') {
13428                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13429             }
13430
13431             if (overflowed) {
13432                 if (n > 4294967295.0)
13433                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13434                                    "%s number > %s non-portable",
13435                                    Base, max);
13436                 sv = newSVnv(n);
13437             }
13438             else {
13439 #if UVSIZE > 4
13440                 if (u > 0xffffffff)
13441                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13442                                    "%s number > %s non-portable",
13443                                    Base, max);
13444 #endif
13445                 sv = newSVuv(u);
13446             }
13447             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
13448                 sv = new_constant(start, s - start, "integer",
13449                                   sv, NULL, NULL, 0);
13450             else if (PL_hints & HINT_NEW_BINARY)
13451                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
13452         }
13453         break;
13454
13455     /*
13456       handle decimal numbers.
13457       we're also sent here when we read a 0 as the first digit
13458     */
13459     case '1': case '2': case '3': case '4': case '5':
13460     case '6': case '7': case '8': case '9': case '.':
13461       decimal:
13462         d = PL_tokenbuf;
13463         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
13464         floatit = FALSE;
13465
13466         /* read next group of digits and _ and copy into d */
13467         while (isDIGIT(*s) || *s == '_') {
13468             /* skip underscores, checking for misplaced ones
13469                if -w is on
13470             */
13471             if (*s == '_') {
13472                 if (lastub && s == lastub + 1)
13473                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13474                                    "Misplaced _ in number");
13475                 lastub = s++;
13476             }
13477             else {
13478                 /* check for end of fixed-length buffer */
13479                 if (d >= e)
13480                     Perl_croak(aTHX_ number_too_long);
13481                 /* if we're ok, copy the character */
13482                 *d++ = *s++;
13483             }
13484         }
13485
13486         /* final misplaced underbar check */
13487         if (lastub && s == lastub + 1) {
13488             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13489         }
13490
13491         /* read a decimal portion if there is one.  avoid
13492            3..5 being interpreted as the number 3. followed
13493            by .5
13494         */
13495         if (*s == '.' && s[1] != '.') {
13496             floatit = TRUE;
13497             *d++ = *s++;
13498
13499             if (*s == '_') {
13500                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13501                                "Misplaced _ in number");
13502                 lastub = s;
13503             }
13504
13505             /* copy, ignoring underbars, until we run out of digits.
13506             */
13507             for (; isDIGIT(*s) || *s == '_'; s++) {
13508                 /* fixed length buffer check */
13509                 if (d >= e)
13510                     Perl_croak(aTHX_ number_too_long);
13511                 if (*s == '_') {
13512                    if (lastub && s == lastub + 1)
13513                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13514                                       "Misplaced _ in number");
13515                    lastub = s;
13516                 }
13517                 else
13518                     *d++ = *s;
13519             }
13520             /* fractional part ending in underbar? */
13521             if (s[-1] == '_') {
13522                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13523                                "Misplaced _ in number");
13524             }
13525             if (*s == '.' && isDIGIT(s[1])) {
13526                 /* oops, it's really a v-string, but without the "v" */
13527                 s = start;
13528                 goto vstring;
13529             }
13530         }
13531
13532         /* read exponent part, if present */
13533         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
13534             floatit = TRUE;
13535             s++;
13536
13537             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
13538             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
13539
13540             /* stray preinitial _ */
13541             if (*s == '_') {
13542                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13543                                "Misplaced _ in number");
13544                 lastub = s++;
13545             }
13546
13547             /* allow positive or negative exponent */
13548             if (*s == '+' || *s == '-')
13549                 *d++ = *s++;
13550
13551             /* stray initial _ */
13552             if (*s == '_') {
13553                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13554                                "Misplaced _ in number");
13555                 lastub = s++;
13556             }
13557
13558             /* read digits of exponent */
13559             while (isDIGIT(*s) || *s == '_') {
13560                 if (isDIGIT(*s)) {
13561                     if (d >= e)
13562                         Perl_croak(aTHX_ number_too_long);
13563                     *d++ = *s++;
13564                 }
13565                 else {
13566                    if (((lastub && s == lastub + 1) ||
13567                         (!isDIGIT(s[1]) && s[1] != '_')))
13568                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13569                                       "Misplaced _ in number");
13570                    lastub = s++;
13571                 }
13572             }
13573         }
13574
13575
13576         /*
13577            We try to do an integer conversion first if no characters
13578            indicating "float" have been found.
13579          */
13580
13581         if (!floatit) {
13582             UV uv;
13583             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
13584
13585             if (flags == IS_NUMBER_IN_UV) {
13586               if (uv <= IV_MAX)
13587                 sv = newSViv(uv); /* Prefer IVs over UVs. */
13588               else
13589                 sv = newSVuv(uv);
13590             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13591               if (uv <= (UV) IV_MIN)
13592                 sv = newSViv(-(IV)uv);
13593               else
13594                 floatit = TRUE;
13595             } else
13596               floatit = TRUE;
13597         }
13598         if (floatit) {
13599             /* terminate the string */
13600             *d = '\0';
13601             nv = Atof(PL_tokenbuf);
13602             sv = newSVnv(nv);
13603         }
13604
13605         if ( floatit
13606              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13607             const char *const key = floatit ? "float" : "integer";
13608             const STRLEN keylen = floatit ? 5 : 7;
13609             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13610                                 key, keylen, sv, NULL, NULL, 0);
13611         }
13612         break;
13613
13614     /* if it starts with a v, it could be a v-string */
13615     case 'v':
13616 vstring:
13617                 sv = newSV(5); /* preallocate storage space */
13618                 s = scan_vstring(s, PL_bufend, sv);
13619         break;
13620     }
13621
13622     /* make the op for the constant and return */
13623
13624     if (sv)
13625         lvalp->opval = newSVOP(OP_CONST, 0, sv);
13626     else
13627         lvalp->opval = NULL;
13628
13629     return (char *)s;
13630 }
13631
13632 STATIC char *
13633 S_scan_formline(pTHX_ register char *s)
13634 {
13635     dVAR;
13636     register char *eol;
13637     register char *t;
13638     SV * const stuff = newSVpvs("");
13639     bool needargs = FALSE;
13640     bool eofmt = FALSE;
13641 #ifdef PERL_MAD
13642     char *tokenstart = s;
13643     SV* savewhite = NULL;
13644
13645     if (PL_madskills) {
13646         savewhite = PL_thiswhite;
13647         PL_thiswhite = 0;
13648     }
13649 #endif
13650
13651     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13652
13653     while (!needargs) {
13654         if (*s == '.') {
13655             t = s+1;
13656 #ifdef PERL_STRICT_CR
13657             while (SPACE_OR_TAB(*t))
13658                 t++;
13659 #else
13660             while (SPACE_OR_TAB(*t) || *t == '\r')
13661                 t++;
13662 #endif
13663             if (*t == '\n' || t == PL_bufend) {
13664                 eofmt = TRUE;
13665                 break;
13666             }
13667         }
13668         if (PL_in_eval && !PL_rsfp) {
13669             eol = (char *) memchr(s,'\n',PL_bufend-s);
13670             if (!eol++)
13671                 eol = PL_bufend;
13672         }
13673         else
13674             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13675         if (*s != '#') {
13676             for (t = s; t < eol; t++) {
13677                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13678                     needargs = FALSE;
13679                     goto enough;        /* ~~ must be first line in formline */
13680                 }
13681                 if (*t == '@' || *t == '^')
13682                     needargs = TRUE;
13683             }
13684             if (eol > s) {
13685                 sv_catpvn(stuff, s, eol-s);
13686 #ifndef PERL_STRICT_CR
13687                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13688                     char *end = SvPVX(stuff) + SvCUR(stuff);
13689                     end[-2] = '\n';
13690                     end[-1] = '\0';
13691                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13692                 }
13693 #endif
13694             }
13695             else
13696               break;
13697         }
13698         s = (char*)eol;
13699         if (PL_rsfp) {
13700             bool got_some;
13701 #ifdef PERL_MAD
13702             if (PL_madskills) {
13703                 if (PL_thistoken)
13704                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13705                 else
13706                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13707             }
13708 #endif
13709             PL_bufptr = PL_bufend;
13710             CopLINE_inc(PL_curcop);
13711             got_some = lex_next_chunk(0);
13712             CopLINE_dec(PL_curcop);
13713             s = PL_bufptr;
13714 #ifdef PERL_MAD
13715             tokenstart = PL_bufptr;
13716 #endif
13717             if (!got_some)
13718                 break;
13719         }
13720         incline(s);
13721     }
13722   enough:
13723     if (SvCUR(stuff)) {
13724         PL_expect = XTERM;
13725         if (needargs) {
13726             PL_lex_state = LEX_NORMAL;
13727             start_force(PL_curforce);
13728             NEXTVAL_NEXTTOKE.ival = 0;
13729             force_next(',');
13730         }
13731         else
13732             PL_lex_state = LEX_FORMLINE;
13733         if (!IN_BYTES) {
13734             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13735                 SvUTF8_on(stuff);
13736             else if (PL_encoding)
13737                 sv_recode_to_utf8(stuff, PL_encoding);
13738         }
13739         start_force(PL_curforce);
13740         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13741         force_next(THING);
13742         start_force(PL_curforce);
13743         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13744         force_next(LSTOP);
13745     }
13746     else {
13747         SvREFCNT_dec(stuff);
13748         if (eofmt)
13749             PL_lex_formbrack = 0;
13750         PL_bufptr = s;
13751     }
13752 #ifdef PERL_MAD
13753     if (PL_madskills) {
13754         if (PL_thistoken)
13755             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13756         else
13757             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13758         PL_thiswhite = savewhite;
13759     }
13760 #endif
13761     return s;
13762 }
13763
13764 I32
13765 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13766 {
13767     dVAR;
13768     const I32 oldsavestack_ix = PL_savestack_ix;
13769     CV* const outsidecv = PL_compcv;
13770
13771     if (PL_compcv) {
13772         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13773     }
13774     SAVEI32(PL_subline);
13775     save_item(PL_subname);
13776     SAVESPTR(PL_compcv);
13777
13778     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13779     CvFLAGS(PL_compcv) |= flags;
13780
13781     PL_subline = CopLINE(PL_curcop);
13782     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13783     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13784     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13785
13786     return oldsavestack_ix;
13787 }
13788
13789 #ifdef __SC__
13790 #pragma segment Perl_yylex
13791 #endif
13792 static int
13793 S_yywarn(pTHX_ const char *const s)
13794 {
13795     dVAR;
13796
13797     PERL_ARGS_ASSERT_YYWARN;
13798
13799     PL_in_eval |= EVAL_WARNONLY;
13800     yyerror(s);
13801     PL_in_eval &= ~EVAL_WARNONLY;
13802     return 0;
13803 }
13804
13805 int
13806 Perl_yyerror(pTHX_ const char *const s)
13807 {
13808     dVAR;
13809     const char *where = NULL;
13810     const char *context = NULL;
13811     int contlen = -1;
13812     SV *msg;
13813     int yychar  = PL_parser->yychar;
13814
13815     PERL_ARGS_ASSERT_YYERROR;
13816
13817     if (!yychar || (yychar == ';' && !PL_rsfp))
13818         where = "at EOF";
13819     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13820       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13821       PL_oldbufptr != PL_bufptr) {
13822         /*
13823                 Only for NetWare:
13824                 The code below is removed for NetWare because it abends/crashes on NetWare
13825                 when the script has error such as not having the closing quotes like:
13826                     if ($var eq "value)
13827                 Checking of white spaces is anyway done in NetWare code.
13828         */
13829 #ifndef NETWARE
13830         while (isSPACE(*PL_oldoldbufptr))
13831             PL_oldoldbufptr++;
13832 #endif
13833         context = PL_oldoldbufptr;
13834         contlen = PL_bufptr - PL_oldoldbufptr;
13835     }
13836     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13837       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13838         /*
13839                 Only for NetWare:
13840                 The code below is removed for NetWare because it abends/crashes on NetWare
13841                 when the script has error such as not having the closing quotes like:
13842                     if ($var eq "value)
13843                 Checking of white spaces is anyway done in NetWare code.
13844         */
13845 #ifndef NETWARE
13846         while (isSPACE(*PL_oldbufptr))
13847             PL_oldbufptr++;
13848 #endif
13849         context = PL_oldbufptr;
13850         contlen = PL_bufptr - PL_oldbufptr;
13851     }
13852     else if (yychar > 255)
13853         where = "next token ???";
13854     else if (yychar == -2) { /* YYEMPTY */
13855         if (PL_lex_state == LEX_NORMAL ||
13856            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13857             where = "at end of line";
13858         else if (PL_lex_inpat)
13859             where = "within pattern";
13860         else
13861             where = "within string";
13862     }
13863     else {
13864         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13865         if (yychar < 32)
13866             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13867         else if (isPRINT_LC(yychar)) {
13868             const char string = yychar;
13869             sv_catpvn(where_sv, &string, 1);
13870         }
13871         else
13872             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13873         where = SvPVX_const(where_sv);
13874     }
13875     msg = sv_2mortal(newSVpv(s, 0));
13876     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13877         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13878     if (context)
13879         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13880     else
13881         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13882     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13883         Perl_sv_catpvf(aTHX_ msg,
13884         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13885                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13886         PL_multi_end = 0;
13887     }
13888     if (PL_in_eval & EVAL_WARNONLY) {
13889         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13890     }
13891     else
13892         qerror(msg);
13893     if (PL_error_count >= 10) {
13894         if (PL_in_eval && SvCUR(ERRSV))
13895             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13896                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13897         else
13898             Perl_croak(aTHX_ "%s has too many errors.\n",
13899             OutCopFILE(PL_curcop));
13900     }
13901     PL_in_my = 0;
13902     PL_in_my_stash = NULL;
13903     return 0;
13904 }
13905 #ifdef __SC__
13906 #pragma segment Main
13907 #endif
13908
13909 STATIC char*
13910 S_swallow_bom(pTHX_ U8 *s)
13911 {
13912     dVAR;
13913     const STRLEN slen = SvCUR(PL_linestr);
13914
13915     PERL_ARGS_ASSERT_SWALLOW_BOM;
13916
13917     switch (s[0]) {
13918     case 0xFF:
13919         if (s[1] == 0xFE) {
13920             /* UTF-16 little-endian? (or UTF-32LE?) */
13921             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13922                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13923 #ifndef PERL_NO_UTF16_FILTER
13924             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13925             s += 2;
13926             if (PL_bufend > (char*)s) {
13927                 s = add_utf16_textfilter(s, TRUE);
13928             }
13929 #else
13930             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13931 #endif
13932         }
13933         break;
13934     case 0xFE:
13935         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13936 #ifndef PERL_NO_UTF16_FILTER
13937             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13938             s += 2;
13939             if (PL_bufend > (char *)s) {
13940                 s = add_utf16_textfilter(s, FALSE);
13941             }
13942 #else
13943             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13944 #endif
13945         }
13946         break;
13947     case 0xEF:
13948         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13949             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13950             s += 3;                      /* UTF-8 */
13951         }
13952         break;
13953     case 0:
13954         if (slen > 3) {
13955              if (s[1] == 0) {
13956                   if (s[2] == 0xFE && s[3] == 0xFF) {
13957                        /* UTF-32 big-endian */
13958                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13959                   }
13960              }
13961              else if (s[2] == 0 && s[3] != 0) {
13962                   /* Leading bytes
13963                    * 00 xx 00 xx
13964                    * are a good indicator of UTF-16BE. */
13965 #ifndef PERL_NO_UTF16_FILTER
13966                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13967                   s = add_utf16_textfilter(s, FALSE);
13968 #else
13969                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13970 #endif
13971              }
13972         }
13973 #ifdef EBCDIC
13974     case 0xDD:
13975         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13976             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13977             s += 4;                      /* UTF-8 */
13978         }
13979         break;
13980 #endif
13981
13982     default:
13983          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13984                   /* Leading bytes
13985                    * xx 00 xx 00
13986                    * are a good indicator of UTF-16LE. */
13987 #ifndef PERL_NO_UTF16_FILTER
13988               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13989               s = add_utf16_textfilter(s, TRUE);
13990 #else
13991               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13992 #endif
13993          }
13994     }
13995     return (char*)s;
13996 }
13997
13998
13999 #ifndef PERL_NO_UTF16_FILTER
14000 static I32
14001 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
14002 {
14003     dVAR;
14004     SV *const filter = FILTER_DATA(idx);
14005     /* We re-use this each time round, throwing the contents away before we
14006        return.  */
14007     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
14008     SV *const utf8_buffer = filter;
14009     IV status = IoPAGE(filter);
14010     const bool reverse = cBOOL(IoLINES(filter));
14011     I32 retval;
14012
14013     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
14014
14015     /* As we're automatically added, at the lowest level, and hence only called
14016        from this file, we can be sure that we're not called in block mode. Hence
14017        don't bother writing code to deal with block mode.  */
14018     if (maxlen) {
14019         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
14020     }
14021     if (status < 0) {
14022         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
14023     }
14024     DEBUG_P(PerlIO_printf(Perl_debug_log,
14025                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
14026                           FPTR2DPTR(void *, S_utf16_textfilter),
14027                           reverse ? 'l' : 'b', idx, maxlen, status,
14028                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
14029
14030     while (1) {
14031         STRLEN chars;
14032         STRLEN have;
14033         I32 newlen;
14034         U8 *end;
14035         /* First, look in our buffer of existing UTF-8 data:  */
14036         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
14037
14038         if (nl) {
14039             ++nl;
14040         } else if (status == 0) {
14041             /* EOF */
14042             IoPAGE(filter) = 0;
14043             nl = SvEND(utf8_buffer);
14044         }
14045         if (nl) {
14046             STRLEN got = nl - SvPVX(utf8_buffer);
14047             /* Did we have anything to append?  */
14048             retval = got != 0;
14049             sv_catpvn(sv, SvPVX(utf8_buffer), got);
14050             /* Everything else in this code works just fine if SVp_POK isn't
14051                set.  This, however, needs it, and we need it to work, else
14052                we loop infinitely because the buffer is never consumed.  */
14053             sv_chop(utf8_buffer, nl);
14054             break;
14055         }
14056
14057         /* OK, not a complete line there, so need to read some more UTF-16.
14058            Read an extra octect if the buffer currently has an odd number. */
14059         while (1) {
14060             if (status <= 0)
14061                 break;
14062             if (SvCUR(utf16_buffer) >= 2) {
14063                 /* Location of the high octet of the last complete code point.
14064                    Gosh, UTF-16 is a pain. All the benefits of variable length,
14065                    *coupled* with all the benefits of partial reads and
14066                    endianness.  */
14067                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
14068                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
14069
14070                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
14071                     break;
14072                 }
14073
14074                 /* We have the first half of a surrogate. Read more.  */
14075                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
14076             }
14077
14078             status = FILTER_READ(idx + 1, utf16_buffer,
14079                                  160 + (SvCUR(utf16_buffer) & 1));
14080             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
14081             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
14082             if (status < 0) {
14083                 /* Error */
14084                 IoPAGE(filter) = status;
14085                 return status;
14086             }
14087         }
14088
14089         chars = SvCUR(utf16_buffer) >> 1;
14090         have = SvCUR(utf8_buffer);
14091         SvGROW(utf8_buffer, have + chars * 3 + 1);
14092
14093         if (reverse) {
14094             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
14095                                          (U8*)SvPVX_const(utf8_buffer) + have,
14096                                          chars * 2, &newlen);
14097         } else {
14098             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
14099                                 (U8*)SvPVX_const(utf8_buffer) + have,
14100                                 chars * 2, &newlen);
14101         }
14102         SvCUR_set(utf8_buffer, have + newlen);
14103         *end = '\0';
14104
14105         /* No need to keep this SV "well-formed" with a '\0' after the end, as
14106            it's private to us, and utf16_to_utf8{,reversed} take a
14107            (pointer,length) pair, rather than a NUL-terminated string.  */
14108         if(SvCUR(utf16_buffer) & 1) {
14109             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
14110             SvCUR_set(utf16_buffer, 1);
14111         } else {
14112             SvCUR_set(utf16_buffer, 0);
14113         }
14114     }
14115     DEBUG_P(PerlIO_printf(Perl_debug_log,
14116                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
14117                           status,
14118                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
14119     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
14120     return retval;
14121 }
14122
14123 static U8 *
14124 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
14125 {
14126     SV *filter = filter_add(S_utf16_textfilter, NULL);
14127
14128     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
14129
14130     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
14131     sv_setpvs(filter, "");
14132     IoLINES(filter) = reversed;
14133     IoPAGE(filter) = 1; /* Not EOF */
14134
14135     /* Sadly, we have to return a valid pointer, come what may, so we have to
14136        ignore any error return from this.  */
14137     SvCUR_set(PL_linestr, 0);
14138     if (FILTER_READ(0, PL_linestr, 0)) {
14139         SvUTF8_on(PL_linestr);
14140     } else {
14141         SvUTF8_on(PL_linestr);
14142     }
14143     PL_bufend = SvEND(PL_linestr);
14144     return (U8*)SvPVX(PL_linestr);
14145 }
14146 #endif
14147
14148 /*
14149 Returns a pointer to the next character after the parsed
14150 vstring, as well as updating the passed in sv.
14151
14152 Function must be called like
14153
14154         sv = newSV(5);
14155         s = scan_vstring(s,e,sv);
14156
14157 where s and e are the start and end of the string.
14158 The sv should already be large enough to store the vstring
14159 passed in, for performance reasons.
14160
14161 */
14162
14163 char *
14164 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
14165 {
14166     dVAR;
14167     const char *pos = s;
14168     const char *start = s;
14169
14170     PERL_ARGS_ASSERT_SCAN_VSTRING;
14171
14172     if (*pos == 'v') pos++;  /* get past 'v' */
14173     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
14174         pos++;
14175     if ( *pos != '.') {
14176         /* this may not be a v-string if followed by => */
14177         const char *next = pos;
14178         while (next < e && isSPACE(*next))
14179             ++next;
14180         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
14181             /* return string not v-string */
14182             sv_setpvn(sv,(char *)s,pos-s);
14183             return (char *)pos;
14184         }
14185     }
14186
14187     if (!isALPHA(*pos)) {
14188         U8 tmpbuf[UTF8_MAXBYTES+1];
14189
14190         if (*s == 'v')
14191             s++;  /* get past 'v' */
14192
14193         sv_setpvs(sv, "");
14194
14195         for (;;) {
14196             /* this is atoi() that tolerates underscores */
14197             U8 *tmpend;
14198             UV rev = 0;
14199             const char *end = pos;
14200             UV mult = 1;
14201             while (--end >= s) {
14202                 if (*end != '_') {
14203                     const UV orev = rev;
14204                     rev += (*end - '0') * mult;
14205                     mult *= 10;
14206                     if (orev > rev)
14207                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
14208                                          "Integer overflow in decimal number");
14209                 }
14210             }
14211 #ifdef EBCDIC
14212             if (rev > 0x7FFFFFFF)
14213                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
14214 #endif
14215             /* Append native character for the rev point */
14216             tmpend = uvchr_to_utf8(tmpbuf, rev);
14217             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
14218             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
14219                  SvUTF8_on(sv);
14220             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
14221                  s = ++pos;
14222             else {
14223                  s = pos;
14224                  break;
14225             }
14226             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
14227                  pos++;
14228         }
14229         SvPOK_on(sv);
14230         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
14231         SvRMAGICAL_on(sv);
14232     }
14233     return (char *)s;
14234 }
14235
14236 int
14237 Perl_keyword_plugin_standard(pTHX_
14238         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
14239 {
14240     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
14241     PERL_UNUSED_CONTEXT;
14242     PERL_UNUSED_ARG(keyword_ptr);
14243     PERL_UNUSED_ARG(keyword_len);
14244     PERL_UNUSED_ARG(op_ptr);
14245     return KEYWORD_PLUGIN_DECLINE;
14246 }
14247
14248 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
14249 static void
14250 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
14251 {
14252     SAVEI32(PL_lex_brackets);
14253     if (PL_lex_brackets > 100)
14254         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
14255     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
14256     SAVEI32(PL_lex_allbrackets);
14257     PL_lex_allbrackets = 0;
14258     SAVEI8(PL_lex_fakeeof);
14259     PL_lex_fakeeof = (U8)fakeeof;
14260     if(yyparse(gramtype) && !PL_parser->error_count)
14261         qerror(Perl_mess(aTHX_ "Parse error"));
14262 }
14263
14264 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
14265 static OP *
14266 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
14267 {
14268     OP *o;
14269     ENTER;
14270     SAVEVPTR(PL_eval_root);
14271     PL_eval_root = NULL;
14272     parse_recdescent(gramtype, fakeeof);
14273     o = PL_eval_root;
14274     LEAVE;
14275     return o;
14276 }
14277
14278 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
14279 static OP *
14280 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
14281 {
14282     OP *exprop;
14283     if (flags & ~PARSE_OPTIONAL)
14284         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
14285     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
14286     if (!exprop && !(flags & PARSE_OPTIONAL)) {
14287         if (!PL_parser->error_count)
14288             qerror(Perl_mess(aTHX_ "Parse error"));
14289         exprop = newOP(OP_NULL, 0);
14290     }
14291     return exprop;
14292 }
14293
14294 /*
14295 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
14296
14297 Parse a Perl arithmetic expression.  This may contain operators of precedence
14298 down to the bit shift operators.  The expression must be followed (and thus
14299 terminated) either by a comparison or lower-precedence operator or by
14300 something that would normally terminate an expression such as semicolon.
14301 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
14302 otherwise it is mandatory.  It is up to the caller to ensure that the
14303 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
14304 the source of the code to be parsed and the lexical context for the
14305 expression.
14306
14307 The op tree representing the expression is returned.  If an optional
14308 expression is absent, a null pointer is returned, otherwise the pointer
14309 will be non-null.
14310
14311 If an error occurs in parsing or compilation, in most cases a valid op
14312 tree is returned anyway.  The error is reflected in the parser state,
14313 normally resulting in a single exception at the top level of parsing
14314 which covers all the compilation errors that occurred.  Some compilation
14315 errors, however, will throw an exception immediately.
14316
14317 =cut
14318 */
14319
14320 OP *
14321 Perl_parse_arithexpr(pTHX_ U32 flags)
14322 {
14323     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
14324 }
14325
14326 /*
14327 =for apidoc Amx|OP *|parse_termexpr|U32 flags
14328
14329 Parse a Perl term expression.  This may contain operators of precedence
14330 down to the assignment operators.  The expression must be followed (and thus
14331 terminated) either by a comma or lower-precedence operator or by
14332 something that would normally terminate an expression such as semicolon.
14333 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
14334 otherwise it is mandatory.  It is up to the caller to ensure that the
14335 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
14336 the source of the code to be parsed and the lexical context for the
14337 expression.
14338
14339 The op tree representing the expression is returned.  If an optional
14340 expression is absent, a null pointer is returned, otherwise the pointer
14341 will be non-null.
14342
14343 If an error occurs in parsing or compilation, in most cases a valid op
14344 tree is returned anyway.  The error is reflected in the parser state,
14345 normally resulting in a single exception at the top level of parsing
14346 which covers all the compilation errors that occurred.  Some compilation
14347 errors, however, will throw an exception immediately.
14348
14349 =cut
14350 */
14351
14352 OP *
14353 Perl_parse_termexpr(pTHX_ U32 flags)
14354 {
14355     return parse_expr(LEX_FAKEEOF_COMMA, flags);
14356 }
14357
14358 /*
14359 =for apidoc Amx|OP *|parse_listexpr|U32 flags
14360
14361 Parse a Perl list expression.  This may contain operators of precedence
14362 down to the comma operator.  The expression must be followed (and thus
14363 terminated) either by a low-precedence logic operator such as C<or> or by
14364 something that would normally terminate an expression such as semicolon.
14365 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
14366 otherwise it is mandatory.  It is up to the caller to ensure that the
14367 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
14368 the source of the code to be parsed and the lexical context for the
14369 expression.
14370
14371 The op tree representing the expression is returned.  If an optional
14372 expression is absent, a null pointer is returned, otherwise the pointer
14373 will be non-null.
14374
14375 If an error occurs in parsing or compilation, in most cases a valid op
14376 tree is returned anyway.  The error is reflected in the parser state,
14377 normally resulting in a single exception at the top level of parsing
14378 which covers all the compilation errors that occurred.  Some compilation
14379 errors, however, will throw an exception immediately.
14380
14381 =cut
14382 */
14383
14384 OP *
14385 Perl_parse_listexpr(pTHX_ U32 flags)
14386 {
14387     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
14388 }
14389
14390 /*
14391 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
14392
14393 Parse a single complete Perl expression.  This allows the full
14394 expression grammar, including the lowest-precedence operators such
14395 as C<or>.  The expression must be followed (and thus terminated) by a
14396 token that an expression would normally be terminated by: end-of-file,
14397 closing bracketing punctuation, semicolon, or one of the keywords that
14398 signals a postfix expression-statement modifier.  If I<flags> includes
14399 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
14400 mandatory.  It is up to the caller to ensure that the dynamic parser
14401 state (L</PL_parser> et al) is correctly set to reflect the source of
14402 the code to be parsed and the lexical context for the expression.
14403
14404 The op tree representing the expression is returned.  If an optional
14405 expression is absent, a null pointer is returned, otherwise the pointer
14406 will be non-null.
14407
14408 If an error occurs in parsing or compilation, in most cases a valid op
14409 tree is returned anyway.  The error is reflected in the parser state,
14410 normally resulting in a single exception at the top level of parsing
14411 which covers all the compilation errors that occurred.  Some compilation
14412 errors, however, will throw an exception immediately.
14413
14414 =cut
14415 */
14416
14417 OP *
14418 Perl_parse_fullexpr(pTHX_ U32 flags)
14419 {
14420     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
14421 }
14422
14423 /*
14424 =for apidoc Amx|OP *|parse_block|U32 flags
14425
14426 Parse a single complete Perl code block.  This consists of an opening
14427 brace, a sequence of statements, and a closing brace.  The block
14428 constitutes a lexical scope, so C<my> variables and various compile-time
14429 effects can be contained within it.  It is up to the caller to ensure
14430 that the dynamic parser state (L</PL_parser> et al) is correctly set to
14431 reflect the source of the code to be parsed and the lexical context for
14432 the statement.
14433
14434 The op tree representing the code block is returned.  This is always a
14435 real op, never a null pointer.  It will normally be a C<lineseq> list,
14436 including C<nextstate> or equivalent ops.  No ops to construct any kind
14437 of runtime scope are included by virtue of it being a block.
14438
14439 If an error occurs in parsing or compilation, in most cases a valid op
14440 tree (most likely null) is returned anyway.  The error is reflected in
14441 the parser state, normally resulting in a single exception at the top
14442 level of parsing which covers all the compilation errors that occurred.
14443 Some compilation errors, however, will throw an exception immediately.
14444
14445 The I<flags> parameter is reserved for future use, and must always
14446 be zero.
14447
14448 =cut
14449 */
14450
14451 OP *
14452 Perl_parse_block(pTHX_ U32 flags)
14453 {
14454     if (flags)
14455         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
14456     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
14457 }
14458
14459 /*
14460 =for apidoc Amx|OP *|parse_barestmt|U32 flags
14461
14462 Parse a single unadorned Perl statement.  This may be a normal imperative
14463 statement or a declaration that has compile-time effect.  It does not
14464 include any label or other affixture.  It is up to the caller to ensure
14465 that the dynamic parser state (L</PL_parser> et al) is correctly set to
14466 reflect the source of the code to be parsed and the lexical context for
14467 the statement.
14468
14469 The op tree representing the statement is returned.  This may be a
14470 null pointer if the statement is null, for example if it was actually
14471 a subroutine definition (which has compile-time side effects).  If not
14472 null, it will be ops directly implementing the statement, suitable to
14473 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
14474 equivalent op (except for those embedded in a scope contained entirely
14475 within the statement).
14476
14477 If an error occurs in parsing or compilation, in most cases a valid op
14478 tree (most likely null) is returned anyway.  The error is reflected in
14479 the parser state, normally resulting in a single exception at the top
14480 level of parsing which covers all the compilation errors that occurred.
14481 Some compilation errors, however, will throw an exception immediately.
14482
14483 The I<flags> parameter is reserved for future use, and must always
14484 be zero.
14485
14486 =cut
14487 */
14488
14489 OP *
14490 Perl_parse_barestmt(pTHX_ U32 flags)
14491 {
14492     if (flags)
14493         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
14494     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
14495 }
14496
14497 /*
14498 =for apidoc Amx|SV *|parse_label|U32 flags
14499
14500 Parse a single label, possibly optional, of the type that may prefix a
14501 Perl statement.  It is up to the caller to ensure that the dynamic parser
14502 state (L</PL_parser> et al) is correctly set to reflect the source of
14503 the code to be parsed.  If I<flags> includes C<PARSE_OPTIONAL> then the
14504 label is optional, otherwise it is mandatory.
14505
14506 The name of the label is returned in the form of a fresh scalar.  If an
14507 optional label is absent, a null pointer is returned.
14508
14509 If an error occurs in parsing, which can only occur if the label is
14510 mandatory, a valid label is returned anyway.  The error is reflected in
14511 the parser state, normally resulting in a single exception at the top
14512 level of parsing which covers all the compilation errors that occurred.
14513
14514 =cut
14515 */
14516
14517 SV *
14518 Perl_parse_label(pTHX_ U32 flags)
14519 {
14520     if (flags & ~PARSE_OPTIONAL)
14521         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
14522     if (PL_lex_state == LEX_KNOWNEXT) {
14523         PL_parser->yychar = yylex();
14524         if (PL_parser->yychar == LABEL) {
14525             char *lpv = pl_yylval.pval;
14526             STRLEN llen = strlen(lpv);
14527             SV *lsv;
14528             PL_parser->yychar = YYEMPTY;
14529             lsv = newSV_type(SVt_PV);
14530             SvPV_set(lsv, lpv);
14531             SvCUR_set(lsv, llen);
14532             SvLEN_set(lsv, llen+1);
14533             SvPOK_on(lsv);
14534             return lsv;
14535         } else {
14536             yyunlex();
14537             goto no_label;
14538         }
14539     } else {
14540         char *s, *t;
14541         U8 c;
14542         STRLEN wlen, bufptr_pos;
14543         lex_read_space(0);
14544         t = s = PL_bufptr;
14545         c = (U8)*s;
14546         if (!isIDFIRST_A(c))
14547             goto no_label;
14548         do {
14549             c = (U8)*++t;
14550         } while(isWORDCHAR_A(c));
14551         wlen = t - s;
14552         if (word_takes_any_delimeter(s, wlen))
14553             goto no_label;
14554         bufptr_pos = s - SvPVX(PL_linestr);
14555         PL_bufptr = t;
14556         lex_read_space(LEX_KEEP_PREVIOUS);
14557         t = PL_bufptr;
14558         s = SvPVX(PL_linestr) + bufptr_pos;
14559         if (t[0] == ':' && t[1] != ':') {
14560             PL_oldoldbufptr = PL_oldbufptr;
14561             PL_oldbufptr = s;
14562             PL_bufptr = t+1;
14563             return newSVpvn(s, wlen);
14564         } else {
14565             PL_bufptr = s;
14566             no_label:
14567             if (flags & PARSE_OPTIONAL) {
14568                 return NULL;
14569             } else {
14570                 qerror(Perl_mess(aTHX_ "Parse error"));
14571                 return newSVpvs("x");
14572             }
14573         }
14574     }
14575 }
14576
14577 /*
14578 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
14579
14580 Parse a single complete Perl statement.  This may be a normal imperative
14581 statement or a declaration that has compile-time effect, and may include
14582 optional labels.  It is up to the caller to ensure that the dynamic
14583 parser state (L</PL_parser> et al) is correctly set to reflect the source
14584 of the code to be parsed and the lexical context for the statement.
14585
14586 The op tree representing the statement is returned.  This may be a
14587 null pointer if the statement is null, for example if it was actually
14588 a subroutine definition (which has compile-time side effects).  If not
14589 null, it will be the result of a L</newSTATEOP> call, normally including
14590 a C<nextstate> or equivalent op.
14591
14592 If an error occurs in parsing or compilation, in most cases a valid op
14593 tree (most likely null) is returned anyway.  The error is reflected in
14594 the parser state, normally resulting in a single exception at the top
14595 level of parsing which covers all the compilation errors that occurred.
14596 Some compilation errors, however, will throw an exception immediately.
14597
14598 The I<flags> parameter is reserved for future use, and must always
14599 be zero.
14600
14601 =cut
14602 */
14603
14604 OP *
14605 Perl_parse_fullstmt(pTHX_ U32 flags)
14606 {
14607     if (flags)
14608         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
14609     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
14610 }
14611
14612 /*
14613 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
14614
14615 Parse a sequence of zero or more Perl statements.  These may be normal
14616 imperative statements, including optional labels, or declarations
14617 that have compile-time effect, or any mixture thereof.  The statement
14618 sequence ends when a closing brace or end-of-file is encountered in a
14619 place where a new statement could have validly started.  It is up to
14620 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
14621 is correctly set to reflect the source of the code to be parsed and the
14622 lexical context for the statements.
14623
14624 The op tree representing the statement sequence is returned.  This may
14625 be a null pointer if the statements were all null, for example if there
14626 were no statements or if there were only subroutine definitions (which
14627 have compile-time side effects).  If not null, it will be a C<lineseq>
14628 list, normally including C<nextstate> or equivalent ops.
14629
14630 If an error occurs in parsing or compilation, in most cases a valid op
14631 tree is returned anyway.  The error is reflected in the parser state,
14632 normally resulting in a single exception at the top level of parsing
14633 which covers all the compilation errors that occurred.  Some compilation
14634 errors, however, will throw an exception immediately.
14635
14636 The I<flags> parameter is reserved for future use, and must always
14637 be zero.
14638
14639 =cut
14640 */
14641
14642 OP *
14643 Perl_parse_stmtseq(pTHX_ U32 flags)
14644 {
14645     OP *stmtseqop;
14646     I32 c;
14647     if (flags)
14648         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
14649     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
14650     c = lex_peek_unichar(0);
14651     if (c != -1 && c != /*{*/'}')
14652         qerror(Perl_mess(aTHX_ "Parse error"));
14653     return stmtseqop;
14654 }
14655
14656 void
14657 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
14658 {
14659     PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
14660     deprecate("qw(...) as parentheses");
14661     force_next((4<<24)|')');
14662     if (qwlist->op_type == OP_STUB) {
14663         op_free(qwlist);
14664     }
14665     else {
14666         start_force(PL_curforce);
14667         NEXTVAL_NEXTTOKE.opval = qwlist;
14668         force_next(THING);
14669     }
14670     force_next((2<<24)|'(');
14671 }
14672
14673 /*
14674  * Local variables:
14675  * c-indentation-style: bsd
14676  * c-basic-offset: 4
14677  * indent-tabs-mode: t
14678  * End:
14679  *
14680  * ex: set ts=8 sts=4 sw=4 noet:
14681  */